ReadmeExamples.php 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. <?php namespace thiagoalessio\TesseractOCR\Tests\EndToEnd;
  2. use thiagoalessio\TesseractOCR\TesseractOcrException;
  3. use thiagoalessio\TesseractOCR\Tests\Common\TestCase;
  4. use thiagoalessio\TesseractOCR\TesseractOCR;
  5. use ReflectionObject;
  6. class ReadmeExamples extends TestCase
  7. {
  8. private $executable = 'tesseract';
  9. private $imagesDir = './tests/EndToEnd/images';
  10. public function testBasicUsage()
  11. {
  12. $expected = "The quick brown fox\njumps over\nthe lazy dog.";
  13. $actual = (new TesseractOCR("{$this->imagesDir}/text.png"))
  14. ->executable($this->executable)
  15. ->run();
  16. $this->assertEquals($expected, str_replace(PHP_EOL, "\n", $actual));
  17. }
  18. public function testOtherLanguages()
  19. {
  20. $expected = 'Bülowstraße';
  21. $actual = (new TesseractOCR("{$this->imagesDir}/german.png"))
  22. ->executable($this->executable)
  23. ->lang('deu')
  24. ->run();
  25. $this->assertEquals($expected, $actual);
  26. }
  27. public function testMultipleLanguages()
  28. {
  29. // training data for these versions return different output
  30. if ($this->isVersion302() || $this->isVersion305()) $this->skip();
  31. $expected = 'I eat すし y Pollo';
  32. $actual = (new TesseractOCR("{$this->imagesDir}/mixed-languages.png"))
  33. ->executable($this->executable)
  34. ->lang('eng', 'jpn', 'spa')
  35. ->run();
  36. $this->assertEquals($expected, $actual);
  37. }
  38. public function testInducingRecognition()
  39. {
  40. // https://github.com/tesseract-ocr/tesseract/issues/751
  41. if ($this->isVersion302() || $this->isVersion4()) $this->skip();
  42. $expected = 'BOSS';
  43. $actual = (new TesseractOCR("{$this->imagesDir}/8055.png"))
  44. ->executable($this->executable)
  45. ->allowlist(range('A', 'Z'))
  46. ->run();
  47. $this->assertEquals($expected, $actual);
  48. }
  49. public function testListAvailableLanguages()
  50. {
  51. // feature not available in this version of tesseract
  52. if ($this->isVersion302()) $this->skip();
  53. $actual = (new TesseractOCR())->availableLanguages();
  54. $this->assertEquals(true, in_array('deu', $actual));
  55. $this->assertEquals(true, in_array('eng', $actual));
  56. $this->assertEquals(true, in_array('jpn', $actual));
  57. $this->assertEquals(true, in_array('spa', $actual));
  58. }
  59. public function testTemporaryFilesAreDeleted()
  60. {
  61. // https://github.com/thiagoalessio/tesseract-ocr-for-php/issues/169
  62. $ocr = new TesseractOCR("{$this->imagesDir}/text.png");
  63. $ocr->run();
  64. $this->assertEquals(false, file_exists($ocr->command->getOutputFile(false)));
  65. $this->assertEquals(false, file_exists($ocr->command->getOutputFile(true)));
  66. }
  67. public function testTemporaryFilesAreNotCreated()
  68. {
  69. // Cannot read from stdin in version 3.02
  70. if ($this->isVersion302()) $this->skip();
  71. $ocr = new TesseractOCR("{$this->imagesDir}/text.png");
  72. $ocr->withoutTempFiles();
  73. $ocr->run();
  74. $reflectionProperty = (new ReflectionObject($ocr->command))->getProperty('outputFile');
  75. $reflectionProperty->setAccessible(true);
  76. $outputFileValue = $reflectionProperty->getValue($ocr->command);
  77. $this->assertEquals(null, $outputFileValue);
  78. }
  79. public function testTemporaryFilesAreDeletedInCaseOfException()
  80. {
  81. try {
  82. $ocr = new TesseractOCR("{$this->imagesDir}/not-an-image.txt");
  83. $ocr->run();
  84. }
  85. catch (TesseractOcrException $e) {
  86. }
  87. $this->assertEquals(false, file_exists($ocr->command->getOutputFile(false)));
  88. $this->assertEquals(false, file_exists($ocr->command->getOutputFile(true)));
  89. }
  90. public function testWithoutInputFile()
  91. {
  92. // Cannot read from stdin in version 3.02
  93. if ($this->isVersion302()) $this->skip();
  94. $expected = "The quick brown fox\njumps over\nthe lazy dog.";
  95. $actual = (new TesseractOCR)
  96. ->imageData(file_get_contents("{$this->imagesDir}/text.png"), filesize("{$this->imagesDir}/text.png"))
  97. ->executable($this->executable)
  98. ->run();
  99. $this->assertEquals($expected, $actual);
  100. $this->assertEquals($expected, str_replace(PHP_EOL, "\n", $actual));
  101. }
  102. public function testWithoutOutputFile()
  103. {
  104. // Cannot write to stdout in version 3.02
  105. if ($this->isVersion302()) $this->skip();
  106. $expected = "The quick brown fox\njumps over\nthe lazy dog.";
  107. $actual = (new TesseractOCR("{$this->imagesDir}/text.png"))
  108. ->executable($this->executable)
  109. ->withoutTempFiles()
  110. ->run();
  111. $this->assertEquals($expected, str_replace(PHP_EOL, "\n", $actual));
  112. }
  113. public function testWithoutFiles()
  114. {
  115. // Cannot read from stdin and write to stdout in version 3.02
  116. if ($this->isVersion302()) $this->skip();
  117. $expected = "The quick brown fox\njumps over\nthe lazy dog.";
  118. $actual = (new TesseractOCR)
  119. ->imageData(file_get_contents("{$this->imagesDir}/text.png"), filesize("{$this->imagesDir}/text.png"))
  120. ->executable($this->executable)
  121. ->withoutTempFiles()
  122. ->run();
  123. $this->assertEquals($expected, str_replace(PHP_EOL, "\n", $actual));
  124. }
  125. public function testBacktickOnFilenames()
  126. {
  127. // skipping for now until I take the time to properly fix it
  128. if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') $this->skip();
  129. $expected = "The quick brown fox\njumps over\nthe lazy dog.";
  130. $actual = (new TesseractOCR("{$this->imagesDir}/file`with`backtick.png"))
  131. ->executable($this->executable)
  132. ->run();
  133. $this->assertEquals($expected, str_replace(PHP_EOL, "\n", $actual));
  134. }
  135. protected function isVersion302()
  136. {
  137. exec('tesseract --version 2>&1', $output);
  138. $version = explode(' ', $output[0])[1];
  139. return version_compare($version, '3.02', '>=')
  140. && version_compare($version, '3.03', '<');
  141. }
  142. protected function isVersion305()
  143. {
  144. exec('tesseract --version 2>&1', $output);
  145. $version = explode(' ', $output[0])[1];
  146. return version_compare($version, '3.05', '>=')
  147. && version_compare($version, '3.06', '<');
  148. }
  149. protected function isVersion4()
  150. {
  151. exec('tesseract --version 2>&1', $output);
  152. $version = explode(' ', $output[0])[1];
  153. return version_compare($version, '4.00', '>=');
  154. }
  155. }