Command.php 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. <?php namespace thiagoalessio\TesseractOCR;
  2. class Command
  3. {
  4. public $executable = 'tesseract';
  5. public $useFileAsInput = true;
  6. public $useFileAsOutput = true;
  7. public $options = array();
  8. public $configFile;
  9. public $tempDir;
  10. public $threadLimit;
  11. public $image;
  12. public $imageSize;
  13. private $outputFile;
  14. public function __construct($image=null, $outputFile=null)
  15. {
  16. $this->image = $image;
  17. $this->outputFile = $outputFile;
  18. }
  19. public function build() { return "$this"; }
  20. public function __toString()
  21. {
  22. $cmd = array();
  23. if ($this->threadLimit) $cmd[] = "OMP_THREAD_LIMIT={$this->threadLimit}";
  24. $cmd[] = self::escape($this->executable);
  25. $cmd[] = $this->useFileAsInput ? self::escape($this->image) : "-";
  26. $cmd[] = $this->useFileAsOutput ? self::escape($this->getOutputFile(false)) : "-";
  27. $version = $this->getTesseractVersion();
  28. foreach ($this->options as $option) {
  29. $cmd[] = is_callable($option) ? $option($version) : "$option";
  30. }
  31. if ($this->configFile) $cmd[] = $this->configFile;
  32. return join(' ', $cmd);
  33. }
  34. public function getOutputFile($withExt=true)
  35. {
  36. if (!$this->outputFile)
  37. $this->outputFile = $this->getTempDir()
  38. .DIRECTORY_SEPARATOR
  39. .basename(tempnam($this->getTempDir(), 'ocr'));
  40. if (!$withExt) return $this->outputFile;
  41. $hasCustomExt = array('hocr', 'tsv', 'pdf');
  42. $ext = in_array($this->configFile, $hasCustomExt) ? $this->configFile : 'txt';
  43. return "{$this->outputFile}.{$ext}";
  44. }
  45. public function getTempDir()
  46. {
  47. return $this->tempDir ?: sys_get_temp_dir();
  48. }
  49. public function getTesseractVersion()
  50. {
  51. exec(self::escape($this->executable).' --version 2>&1', $output);
  52. $outputParts = explode(' ', $output[0]);
  53. return $outputParts[1];
  54. }
  55. public function getAvailableLanguages()
  56. {
  57. exec(self::escape($this->executable) . ' --list-langs 2>&1', $output);
  58. array_shift($output);
  59. sort($output);
  60. return $output;
  61. }
  62. public static function escape($str)
  63. {
  64. $charlist = strtoupper(substr(PHP_OS, 0, 3)) == 'WIN' ? '$"`' : '$"\\`';
  65. return '"'.addcslashes($str, $charlist).'"';
  66. }
  67. }