diff --git a/.gitignore b/.gitignore index 3c3629e..eb79dd5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules +.idea diff --git a/README.md b/README.md index af8fcde..54dd1d6 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,3 @@ # Tesseract for node.js -[![NPM](https://nodei.co/npm/node-tesseract.png)](https://nodei.co/npm/node-tesseract/) - -A simple wrapper for the Tesseract OCR package for node.js - -## Requirements - -* Tesseract 3.01 or higher is needed for this to work - -## Installation -There is a hard dependency on the [Tesseract project](https://github.com/tesseract-ocr/tesseract). You can find installation instructions for various platforms on the project site. For Homebrew users, the installation is quick and easy. - - brew install tesseract --with-all-languages - -The above will install all of the language packages available, if you don't need them all you can remove the `--all-languages` flag and install them manually, by downloading them to your local machine and then exposing the `TESSDATA_PREFIX` variable into your path: - - export TESSDATA_PREFIX=~/Downloads/ - -You can then go about installing the node-module to expose the JavaScript API: - - npm install node-tesseract - -## Usage - -```JavaScript -var tesseract = require('node-tesseract'); - -// Recognize text of any language in any format -tesseract.process(__dirname + '/path/to/image.jpg',function(err, text) { - if(err) { - console.error(err); - } else { - console.log(text); - } -}); - -// Recognize German text in a single uniform block of text and set the binary path - -var options = { - l: 'deu', - psm: 6, - binary: '/usr/local/bin/tesseract' -}; - -tesseract.process(__dirname + '/path/to/image.jpg', options, function(err, text) { - if(err) { - console.error(err); - } else { - console.log(text); - } -}); -``` - -## Changelog -* **0.2.7**: Adds output file extension detection -* **0.2.6**: Catches exception when deleting tmp files that do not exist -* **0.2.5**: Preserves whitespace and replaces tmp module -* **0.2.4**: Removes console logging for messaging -* **0.2.3**: The ability to set the binary path via the config object. Better installation documentation. -* **0.2.2**: Adds test converage to utils module -* **0.2.1**: Strips leading & trailing whitespace from output by default -* **0.2.0**: Adds ability to pass options via a configuration object. -* **0.1.1**: Updates tmp module. -* **0.1.0**: Removes preprocessing functionatlity. See #3. -* **0.0.3**: Adds basic test coverage for process method -* **0.0.2**: Pulls in changes by [joscha](https://github.com/joscha) including: refactored to support tesseract 3.01, added language parameter, config parameter, documentation, Added support for custom preprocessors, OTB Preprocessor using ImageMagick 'convert' -* **0.0.1**: Initial version +This project is fork from [node-tesseract](https://github.com/desmondmorris/node-tesseract) diff --git a/lib/tesseract.js b/lib/tesseract.js index 2f7e13c..9a70b01 100644 --- a/lib/tesseract.js +++ b/lib/tesseract.js @@ -63,7 +63,7 @@ var Tesseract = { } if (options.psm !== null) { - command.push('-psm ' + options.psm); + command.push('--psm ' + options.psm); } if (options.config !== null) { @@ -83,7 +83,7 @@ var Tesseract = { } // Find one of the three possible extension - glob(output + '.+(html|hocr|txt)', function(err, files){ + glob(output + '.+(html|hocr|txt)', function(err, files) { if (err) { callback(err, null); return; @@ -97,7 +97,12 @@ var Tesseract = { var index = Tesseract.tmpFiles.indexOf(output); if (~index) Tesseract.tmpFiles.splice(index, 1); - fs.unlinkSync(files[0]); + fs.unlink(files[0], (err) => { + if (err) { + callback(err, null); + return; + } + }); callback(null, data) }); diff --git a/test/tesseract.js b/test/tesseract.js deleted file mode 100644 index bd64c53..0000000 --- a/test/tesseract.js +++ /dev/null @@ -1,19 +0,0 @@ -'use strict'; - -var tesseract = require('../lib/tesseract'); -var should = require('should'); - - -describe('process', function(){ - it('should return the string "node-tesseract"', function(done){ - - var testImage = __dirname + '/test.png'; - - tesseract.process(testImage, function(err, text) { - text.trim().should.equal('node-tesseract'); - done(); - }); - - }) -}) - diff --git a/test/test.png b/test/test.png deleted file mode 100644 index 9ebe362..0000000 Binary files a/test/test.png and /dev/null differ diff --git a/test/utils.js b/test/utils.js deleted file mode 100644 index c6f250a..0000000 --- a/test/utils.js +++ /dev/null @@ -1,17 +0,0 @@ -'use strict'; - -var utils = require('../lib/utils'); -var should = require('should'); - - -describe('Tests merge helper', function () { - it('object should have all properties', function () { - var objA = {'a': 'A', 'b': 'B'}, - objB = {'c': 'C', 'd': 'D'}, - objC = utils.merge(objA, objB); - objC.should.have.property('a', 'A'); - objC.should.have.property('b', 'B'); - objC.should.have.property('c', 'C'); - objC.should.have.property('d', 'D'); - }); -}); diff --git a/wercker.yml b/wercker.yml deleted file mode 100644 index eaa7f3e..0000000 --- a/wercker.yml +++ /dev/null @@ -1,12 +0,0 @@ -box: wercker/nodejs -# Build definition -build: - - steps: - - script: - name: install tesseract - code: | - sudo apt-get update -qq - sudo apt-get install -qq tesseract-ocr - - npm-install - - npm-test