Sei sulla pagina 1di 8

8/27/2019 emgucv/OCRForm.

cs at master · emgucv/emgucv · GitHub

Dismiss
Join GitHub today
GitHub is home to over 40 million developers
working together to host and review code,
manage projects, and build software together.

Sign up

Branch: master Find file Copy path

emgucv / Emgu.CV.Example / OCR / OCRForm.cs

emgucv Fix Xamarin Forms OCR demo error.

05d6307 on Mar 5

1 contributor

Raw Blame History

340 lines (290 sloc) 12.5 KB

1 //----------------------------------------------------------------------------
2 // Copyright (C) 2004-2019 by EMGU Corporation. All rights reserved.
3 //----------------------------------------------------------------------------
4
5 using System;
6 using System.Collections.Generic;
7 using System.Data;
8 using System.Drawing;
9 using System.Text;
10 using System.Windows.Forms;
11 using System.IO;
12 using Emgu.CV;
13 using Emgu.CV.CvEnum;
14 using Emgu.CV.OCR;
15 using Emgu.CV.Structure;
16 using Emgu.CV.Text;
17 using Emgu.CV.Util;
18
19 namespace OCR
20 {
21 public partial class OCRForm : Form
22 {
23 private Tesseract _ocr;
https://github.com/emgucv/emgucv/blob/master/Emgu.CV.Example/OCR/OCRForm.cs 1/8
8/27/2019 emgucv/OCRForm.cs at master · emgucv/emgucv · GitHub

24
25 public OCRForm()
26 {
27 InitializeComponent();
28
29 //System.Net.ServicePointManager.Expect100Continue = true;
30 //System.Net.ServicePointManager.SecurityProtocol = System.Net.SecurityProtocol
31
32 if (InitOcr(Emgu.CV.OCR.Tesseract.DefaultTesseractDirectory, "eng", OcrEngineMo
33 {
34 ocrOptionsComboBox.SelectedIndex = 0;
35
36 Mat img = new Mat(200, 400, DepthType.Cv8U, 3); //Create a 3 channel image
37 img.SetTo(new Bgr(255, 0, 0).MCvScalar); // set it to Blue color
38
39 //Draw "Hello, world." on the image using the specific font
40 CvInvoke.PutText(
41 img,
42 "Hello, world",
43 new System.Drawing.Point(10, 80),
44 FontFace.HersheyComplex,
45 1.0,
46 new Bgr(0, 255, 0).MCvScalar);
47 OcrImage(img);
48 }
49 }
50
51 private static void TesseractDownloadLangFile(String folder, String lang)
52 {
53 //String subfolderName = "tessdata";
54 //String folderName = System.IO.Path.Combine(folder, subfolderName);
55 String folderName = folder;
56 if (!System.IO.Directory.Exists(folderName))
57 {
58 System.IO.Directory.CreateDirectory(folderName);
59 }
60 String dest = System.IO.Path.Combine(folderName, String.Format("{0}.traineddata
61 if (!System.IO.File.Exists(dest))
62 using (System.Net.WebClient webclient = new System.Net.WebClient())
63 {
64 String source = Emgu.CV.OCR.Tesseract.GetLangFileUrl(lang);
65
66 Console.WriteLine(String.Format("Downloading file from '{0}' to '{1}'",
67 webclient.DownloadFile(source, dest);
68 Console.WriteLine(String.Format("Download completed"));
69 }
70 }
71

https://github.com/emgucv/emgucv/blob/master/Emgu.CV.Example/OCR/OCRForm.cs 2/8
8/27/2019 emgucv/OCRForm.cs at master · emgucv/emgucv · GitHub

72 private bool InitOcr(String path, String lang, OcrEngineMode mode)


73 {
74 try
75 {
76 if (_ocr != null)
77 {
78 _ocr.Dispose();
79 _ocr = null;
80 }
81
82 if (String.IsNullOrEmpty(path))
83 path = Emgu.CV.OCR.Tesseract.DefaultTesseractDirectory;
84
85 TesseractDownloadLangFile(path, lang);
86 TesseractDownloadLangFile(path, "osd"); //script orientation detection
87 /*
88 String pathFinal = path.Length == 0 || path.Substring(path.Length - 1, 1).E
89 ? path
90 : String.Format("{0}{1}", path, System.IO.Path.DirectorySeparatorChar);
91 */
92 _ocr = new Tesseract(path, lang, mode);
93
94 languageNameLabel.Text = String.Format("{0} : {1} (tesseract version {2})",
95 return true;
96 }
97 catch (Exception e)
98 {
99 _ocr = null;
100 MessageBox.Show(e.Message, "Failed to initialize tesseract OCR engine", Mes
101 languageNameLabel.Text = "Failed to initialize tesseract OCR engine";
102 return false;
103 }
104 }
105
106 /// <summary>
107 /// The OCR mode
108 /// </summary>
109 private enum OCRMode
110 {
111 /// <summary>
112 /// Perform a full page OCR
113 /// </summary>
114 FullPage,
115
116 /// <summary>
117 /// Detect the text region before applying OCR.
118 /// </summary>
119 TextDetection

https://github.com/emgucv/emgucv/blob/master/Emgu.CV.Example/OCR/OCRForm.cs 3/8
8/27/2019 emgucv/OCRForm.cs at master · emgucv/emgucv · GitHub

120 }
121
122 private OCRMode Mode
123 {
124 get { return ocrOptionsComboBox.SelectedIndex == 0 ? OCRMode.FullPage : OCRMode
125 }
126
127 private static Rectangle ScaleRectangle(Rectangle r, double scale)
128 {
129 double centerX = r.Location.X + r.Width / 2.0;
130 double centerY = r.Location.Y + r.Height / 2.0;
131 double newWidth = Math.Round(r.Width * scale);
132 double newHeight = Math.Round(r.Height * scale);
133 return new Rectangle((int)Math.Round(centerX - newWidth / 2.0), (int)Math.Round
134 (int)newWidth, (int)newHeight);
135 }
136
137 private static String OcrImage(Tesseract ocr, Mat image, OCRMode mode, Mat imageCol
138 {
139 Bgr drawCharColor = new Bgr(Color.Red);
140
141 if (image.NumberOfChannels == 1)
142 CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr);
143 else
144 image.CopyTo(imageColor);
145
146 if (mode == OCRMode.FullPage)
147 {
148 ocr.SetImage(imageColor);
149
150 if (ocr.Recognize() != 0)
151 throw new Exception("Failed to recognizer image");
152
153 Tesseract.Character[] characters = ocr.GetCharacters();
154 if (characters.Length == 0)
155 {
156 Mat imgGrey = new Mat();
157 CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray);
158 Mat imgThresholded = new Mat();
159 CvInvoke.Threshold(imgGrey, imgThresholded, 65, 255, ThresholdType.Bina
160 ocr.SetImage(imgThresholded);
161 characters = ocr.GetCharacters();
162 imageColor = imgThresholded;
163 if (characters.Length == 0)
164 {
165 CvInvoke.Threshold(image, imgThresholded, 190, 255, ThresholdType.B
166 ocr.SetImage(imgThresholded);
167 characters = ocr.GetCharacters();

https://github.com/emgucv/emgucv/blob/master/Emgu.CV.Example/OCR/OCRForm.cs 4/8
8/27/2019 emgucv/OCRForm.cs at master · emgucv/emgucv · GitHub

168 imageColor = imgThresholded;


169 }
170 }
171 foreach (Tesseract.Character c in characters)
172 {
173 CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar);
174 }
175
176 return ocr.GetUTF8Text();
177
178 }
179 else
180 {
181 bool checkInvert = true;
182
183 Rectangle[] regions;
184
185 using (
186 ERFilterNM1 er1 = new ERFilterNM1("trained_classifierNM1.xml", 8, 0.0002
187 using (ERFilterNM2 er2 = new ERFilterNM2("trained_classifierNM2.xml", 0.3f)
188 {
189 int channelCount = image.NumberOfChannels;
190 UMat[] channels = new UMat[checkInvert ? channelCount * 2 : channelCoun
191
192 for (int i = 0; i < channelCount; i++)
193 {
194 UMat c = new UMat();
195 CvInvoke.ExtractChannel(image, c, i);
196 channels[i] = c;
197 }
198
199 if (checkInvert)
200 {
201 for (int i = 0; i < channelCount; i++)
202 {
203 UMat c = new UMat();
204 CvInvoke.BitwiseNot(channels[i], c);
205 channels[i + channelCount] = c;
206 }
207 }
208
209 VectorOfERStat[] regionVecs = new VectorOfERStat[channels.Length];
210 for (int i = 0; i < regionVecs.Length; i++)
211 regionVecs[i] = new VectorOfERStat();
212
213 try
214 {
215 for (int i = 0; i < channels.Length; i++)

https://github.com/emgucv/emgucv/blob/master/Emgu.CV.Example/OCR/OCRForm.cs 5/8
8/27/2019 emgucv/OCRForm.cs at master · emgucv/emgucv · GitHub

216 {
217 er1.Run(channels[i], regionVecs[i]);
218 er2.Run(channels[i], regionVecs[i]);
219 }
220 using (VectorOfUMat vm = new VectorOfUMat(channels))
221 {
222 regions = ERFilter.ERGrouping(image, vm, regionVecs, ERFilter.G
223 "trained_classifier_erGrouping.xml", 0.5f);
224 }
225 }
226 finally
227 {
228 foreach (UMat tmp in channels)
229 if (tmp != null)
230 tmp.Dispose();
231 foreach (VectorOfERStat tmp in regionVecs)
232 if (tmp != null)
233 tmp.Dispose();
234 }
235
236 Rectangle imageRegion = new Rectangle(Point.Empty, imageColor.Size);
237 for (int i = 0; i < regions.Length; i++)
238 {
239 Rectangle r = ScaleRectangle(regions[i], 1.1);
240
241 r.Intersect(imageRegion);
242 regions[i] = r;
243 }
244
245 }
246
247
248 List<Tesseract.Character> allChars = new List<Tesseract.Character>();
249 String allText = String.Empty;
250 foreach (Rectangle rect in regions)
251 {
252 using (Mat region = new Mat(image, rect))
253 {
254 ocr.SetImage(region);
255 if (ocr.Recognize() != 0)
256 throw new Exception("Failed to recognize image");
257 Tesseract.Character[] characters = ocr.GetCharacters();
258
259 //convert the coordinates from the local region to global
260 for (int i = 0; i < characters.Length; i++)
261 {
262 Rectangle charRegion = characters[i].Region;
263 charRegion.Offset(rect.Location);

https://github.com/emgucv/emgucv/blob/master/Emgu.CV.Example/OCR/OCRForm.cs 6/8
8/27/2019 emgucv/OCRForm.cs at master · emgucv/emgucv · GitHub

264 characters[i].Region = charRegion;


265
266 }
267 allChars.AddRange(characters);
268
269 allText += ocr.GetUTF8Text() + Environment.NewLine;
270
271 }
272 }
273
274 Bgr drawRegionColor = new Bgr(Color.Red);
275 foreach (Rectangle rect in regions)
276 {
277 CvInvoke.Rectangle(imageColor, rect, drawRegionColor.MCvScalar);
278 }
279 foreach (Tesseract.Character c in allChars)
280 {
281 CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar);
282 }
283
284 return allText;
285
286 }
287
288 }
289
290 private void OcrImage(Mat source)
291 {
292 imageBox1.Image = null;
293 ocrTextBox.Text = String.Empty;
294 hocrTextBox.Text = String.Empty;
295 #if !DEBUG
296 try
297 #endif
298 {
299
300 Mat result = new Mat();
301 String ocredText = OcrImage(_ocr, source, Mode, result);
302 imageBox1.Image = result;
303 ocrTextBox.Text = ocredText;
304 if (Mode == OCRMode.FullPage)
305 {
306 hocrTextBox.Text = _ocr.GetHOCRText();
307 }
308 }
309 #if !DEBUG
310 catch (Exception exception)
311 {

https://github.com/emgucv/emgucv/blob/master/Emgu.CV.Example/OCR/OCRForm.cs 7/8
8/27/2019 emgucv/OCRForm.cs at master · emgucv/emgucv · GitHub

312 MessageBox.Show(exception.Message);
313 }
314 #endif
315 }
316
317 private void loadImageButton_Click(object sender, EventArgs e)
318 {
319 if (openImageFileDialog.ShowDialog() == System.Windows.Forms.DialogResult.OK)
320 {
321 fileNameTextBox.Text = openImageFileDialog.FileName;
322 Mat source = new Mat(fileNameTextBox.Text);
323 OcrImage(source);
324 }
325 }
326
327 private void loadLanguageToolStripMenuItem_Click(object sender, EventArgs e)
328 {
329 if (openLanguageFileDialog.ShowDialog() == System.Windows.Forms.DialogResult.OK
330 {
331 string path = Path.GetDirectoryName(openLanguageFileDialog.FileName);
332 string lang = Path.GetFileNameWithoutExtension(openLanguageFileDialog.FileN
333
334 InitOcr(path, lang, OcrEngineMode.Default);
335
336 }
337 }
338 }
339 }

https://github.com/emgucv/emgucv/blob/master/Emgu.CV.Example/OCR/OCRForm.cs 8/8