diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images.slnx b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images.slnx new file mode 100644 index 00000000..7552e300 --- /dev/null +++ b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images.slnx @@ -0,0 +1,3 @@ + + + diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/App.config b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/App.config new file mode 100644 index 00000000..56efbc7b --- /dev/null +++ b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/App.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Data/multipage_tiff_example.tif b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Data/multipage_tiff_example.tif new file mode 100644 index 00000000..86df9d5f Binary files /dev/null and b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Data/multipage_tiff_example.tif differ diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Output/.gitkeep b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Output/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images.csproj b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images.csproj new file mode 100644 index 00000000..be0ab6b9 --- /dev/null +++ b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images.csproj @@ -0,0 +1,79 @@ + + + + + Debug + AnyCPU + {23E85557-733B-494C-8D01-816104E10267} + Exe + Perform_OCR_on_Tiff_images + Perform-OCR-on-Tiff-images + v4.7.2 + 512 + true + true + + + + + AnyCPU + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + ..\packages\Syncfusion.Pdf.AspNet.Mvc5.32.1.25\lib\net462\Syncfusion.Compression.Base.dll + + + ..\packages\Syncfusion.ImagePreProcessor.AspNet.Mvc5.32.1.25\lib\net462\Syncfusion.ImagePreProcessor.Base.dll + + + ..\packages\Syncfusion.Pdf.AspNet.Mvc5.32.1.25\lib\net462\Syncfusion.Licensing.dll + + + ..\packages\Syncfusion.Pdf.OCR.AspNet.Mvc5.32.1.25\lib\net462\Syncfusion.OCRProcessor.Base.dll + + + ..\packages\Syncfusion.Pdf.AspNet.Mvc5.32.1.25\lib\net462\Syncfusion.Pdf.Base.dll + + + + + + + + + + + + + + + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + \ No newline at end of file diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Program.cs b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Program.cs new file mode 100644 index 00000000..f11be6f4 --- /dev/null +++ b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Program.cs @@ -0,0 +1,59 @@ +using Syncfusion.OCRProcessor; +using System; +using System.IO; +using System.Text; +using System.Drawing; +using System.Drawing.Imaging; + +namespace Perform_OCR_on_Tiff_images +{ + internal class Program + { + static void Main(string[] args) + { + string filePath = Path.GetFullPath(@"Data/multipage_tiff_example.tif"); + + StringBuilder output = new StringBuilder(); + + using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read)) + using (Image img = Image.FromStream(fs, useEmbeddedColorManagement: false, validateImageData: false)) + using (OCRProcessor processor = new OCRProcessor()) + { + processor.TessDataPath = Path.GetFullPath(@"TessdataBest/"); + processor.Settings.Language = Languages.English; + processor.Settings.TesseractVersion = TesseractVersion.Version5_0; + + // Determine how many frames/pages the TIFF contains. + int frameCount = img.GetFrameCount(FrameDimension.Page); + if (frameCount <= 1) + { + // Some TIFFs may use other dimensions; try Time/Resolution as fallback + frameCount = Math.Max(frameCount, img.GetFrameCount(FrameDimension.Time)); + frameCount = Math.Max(frameCount, img.GetFrameCount(FrameDimension.Resolution)); + } + if (frameCount < 1) frameCount = 1; + + for (int i = 0; i < frameCount; i++) + { + // Prefer Page dimension + try { img.SelectActiveFrame(FrameDimension.Page, i); } + catch { /* fallback if needed */ } + + // Clone the selected frame to a standalone Bitmap for OCR (important for some engines) + using (Bitmap frameBmp = new Bitmap(img.Width, img.Height)) + using (Graphics g = Graphics.FromImage(frameBmp)) + { + g.DrawImage(img, 0, 0, img.Width, img.Height); + + string pageText = processor.PerformOCR(frameBmp, processor.TessDataPath); + output.AppendLine($"--- Page {i + 1} ---"); + output.AppendLine(pageText ?? string.Empty); + output.AppendLine(); + } + } + } + File.WriteAllText(Path.GetFullPath(@"Output/Output.txt"), output.ToString()); + + } + } +} diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Properties/AssemblyInfo.cs b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Properties/AssemblyInfo.cs new file mode 100644 index 00000000..a5aa599a --- /dev/null +++ b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/Properties/AssemblyInfo.cs @@ -0,0 +1,33 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Perform-OCR-on-Tiff-images")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("Perform-OCR-on-Tiff-images")] +[assembly: AssemblyCopyright("Copyright © 2026")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("23e85557-733b-494c-8d01-816104e10267")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/TessdataBest/eng.traineddata b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/TessdataBest/eng.traineddata new file mode 100644 index 00000000..176dc322 Binary files /dev/null and b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/TessdataBest/eng.traineddata differ diff --git a/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/packages.config b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/packages.config new file mode 100644 index 00000000..e2bf09c5 --- /dev/null +++ b/OCR/.NET/Perform-OCR-on-Tiff-images/Perform-OCR-on-Tiff-images/packages.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file