From ac28d9721b7d61c9087306268501bbb6f30faed2 Mon Sep 17 00:00:00 2001 From: heyhey Date: Thu, 12 Nov 2020 22:47:40 +0100 Subject: [PATCH] Merge https://github.com/desmondmorris/node-tesseract/pull/70 pull request --- .gitignore | 1 + README.md | 67 +--------------------------------------------- lib/tesseract.js | 11 +++++--- test/tesseract.js | 19 ------------- test/test.png | Bin 2977 -> 0 bytes test/utils.js | 17 ------------ wercker.yml | 12 --------- 7 files changed, 10 insertions(+), 117 deletions(-) delete mode 100644 test/tesseract.js delete mode 100644 test/test.png delete mode 100644 test/utils.js delete mode 100644 wercker.yml diff --git a/.gitignore b/.gitignore index 3c3629e..eb79dd5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules +.idea diff --git a/README.md b/README.md index af8fcde..54dd1d6 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,3 @@ # Tesseract for node.js -[![NPM](https://nodei.co/npm/node-tesseract.png)](https://nodei.co/npm/node-tesseract/) - -A simple wrapper for the Tesseract OCR package for node.js - -## Requirements - -* Tesseract 3.01 or higher is needed for this to work - -## Installation -There is a hard dependency on the [Tesseract project](https://github.com/tesseract-ocr/tesseract). You can find installation instructions for various platforms on the project site. For Homebrew users, the installation is quick and easy. - - brew install tesseract --with-all-languages - -The above will install all of the language packages available, if you don't need them all you can remove the `--all-languages` flag and install them manually, by downloading them to your local machine and then exposing the `TESSDATA_PREFIX` variable into your path: - - export TESSDATA_PREFIX=~/Downloads/ - -You can then go about installing the node-module to expose the JavaScript API: - - npm install node-tesseract - -## Usage - -```JavaScript -var tesseract = require('node-tesseract'); - -// Recognize text of any language in any format -tesseract.process(__dirname + '/path/to/image.jpg',function(err, text) { - if(err) { - console.error(err); - } else { - console.log(text); - } -}); - -// Recognize German text in a single uniform block of text and set the binary path - -var options = { - l: 'deu', - psm: 6, - binary: '/usr/local/bin/tesseract' -}; - -tesseract.process(__dirname + '/path/to/image.jpg', options, function(err, text) { - if(err) { - console.error(err); - } else { - console.log(text); - } -}); -``` - -## Changelog -* **0.2.7**: Adds output file extension detection -* **0.2.6**: Catches exception when deleting tmp files that do not exist -* **0.2.5**: Preserves whitespace and replaces tmp module -* **0.2.4**: Removes console logging for messaging -* **0.2.3**: The ability to set the binary path via the config object. Better installation documentation. -* **0.2.2**: Adds test converage to utils module -* **0.2.1**: Strips leading & trailing whitespace from output by default -* **0.2.0**: Adds ability to pass options via a configuration object. -* **0.1.1**: Updates tmp module. -* **0.1.0**: Removes preprocessing functionatlity. See #3. -* **0.0.3**: Adds basic test coverage for process method -* **0.0.2**: Pulls in changes by [joscha](https://github.com/joscha) including: refactored to support tesseract 3.01, added language parameter, config parameter, documentation, Added support for custom preprocessors, OTB Preprocessor using ImageMagick 'convert' -* **0.0.1**: Initial version +This project is fork from [node-tesseract](https://github.com/desmondmorris/node-tesseract) diff --git a/lib/tesseract.js b/lib/tesseract.js index 2f7e13c..9a70b01 100644 --- a/lib/tesseract.js +++ b/lib/tesseract.js @@ -63,7 +63,7 @@ var Tesseract = { } if (options.psm !== null) { - command.push('-psm ' + options.psm); + command.push('--psm ' + options.psm); } if (options.config !== null) { @@ -83,7 +83,7 @@ var Tesseract = { } // Find one of the three possible extension - glob(output + '.+(html|hocr|txt)', function(err, files){ + glob(output + '.+(html|hocr|txt)', function(err, files) { if (err) { callback(err, null); return; @@ -97,7 +97,12 @@ var Tesseract = { var index = Tesseract.tmpFiles.indexOf(output); if (~index) Tesseract.tmpFiles.splice(index, 1); - fs.unlinkSync(files[0]); + fs.unlink(files[0], (err) => { + if (err) { + callback(err, null); + return; + } + }); callback(null, data) }); diff --git a/test/tesseract.js b/test/tesseract.js deleted file mode 100644 index bd64c53..0000000 --- a/test/tesseract.js +++ /dev/null @@ -1,19 +0,0 @@ -'use strict'; - -var tesseract = require('../lib/tesseract'); -var should = require('should'); - - -describe('process', function(){ - it('should return the string "node-tesseract"', function(done){ - - var testImage = __dirname + '/test.png'; - - tesseract.process(testImage, function(err, text) { - text.trim().should.equal('node-tesseract'); - done(); - }); - - }) -}) - diff --git a/test/test.png b/test/test.png deleted file mode 100644 index 9ebe3629076bd4f6fcde5061bdf89e205e23bf0d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2977 zcmaJ@2{@E%8y?9dOV$ddrXkfa%#s;qA|`{8H6)T{%#50t`KDR2H0LBFB}7NjhAeZm z2nX3CS)zXs$?k}bGAVJ8{r|L`^Z)-j|9gGc_dV}(J=(`;7f;DGbu;uPIL+__{3AX zF$5y1&2Vw!x!Ktgr~p%+vJ#^o#AJin5Qwp95Sv2vr}JRG^rH-x3H(FZBRGsfGl3t% z+M(>&=JaC>atMd+9AfW64e_VqX>ilMFykNs2*9NCD6k-A0E1ibA7dIGjG(0E@$+5ikr2jX|Qo2d9TNAYgF>j3Mmn zfP<@XXnq7IlI7R6z?ljB7>~y$Ad!KAf%<{F^#RUNBpQ#$uV7#>dSHYeH<-nv1nIH3 zTHg>zbS{;{VDlIN3$}tt@dfxi6FA89O9@Q&cUTtpYn#A^A%iGvBw8P}Qqs3TJG=iM z%4B|z=JK5AfAalXV6ICrn~rp%a{)ew3T~XA*2+?Bf;opy;Q<^M00{WjMaN?R58xgH z*f4Wv9Bii@h00*9ICg!`u(Kmrv$#A8i%Pd9nZQ8|eFlR@urRVTM5C~lI3o)p8f~$g zXn`XVEm0^#OCzkIfjQ}0E(xIWnRFKKTQ2Q;E)MmNxhrB|vcZ)}bPnSroo2}an6S@9 z6BvKP#lR4cH`-?b;^DA=&*e)l?H{>dE&h=U5+s9M+1kHs^*0x&pB3}FaKXuU>C;)D z-Z`MKHHnNa2t-=NnnZL7>V9{`hJ4UdwJ62!mD#H8X+-x8X;5r7QIV{mF{Ng(3&ZgV zcigEVwSG7QikC~+Vl;-9EmM@fjg@JON^sg{Ad^At%UiDZ%(_za zVxpGEsh@>mF{-S=q?DA6-eY2My1^1={etT?Ei?Xt^r!{erna z-Y(A1JROm-Ee-t8f8XPw-(^Y5`Hw?Z=FD*WoO}1~JrjxcmlYNkW@ct8LdP4Lnwnsw zvXYXLUw_?gn^<%j+7HKtT4p-AA3hxBCAxv9`jZS7&tzp~Y3@0xt+dB4c;>xl6&dpj z4C~6J`uL>gMJ6XFA9~13~?fR)xGw-pbkUO)p!2{*R{ONb@*JU=I5Wj5` zPN>L_WGHOi`-j~q6)ShM^}O_^F10Eyi2Te-YIfvZO0r=a6n@!q%VyJ?h{0Fw?LRvd zNoXnYk2(@@swMXq=BC8rmk-kOew?2+KesLojlsYlW}uCYH*c%vv=0m{zp9m;J)e}v z9kC}#m8`n+@#BetvGH&p?!@p@q<3Y5LxCZ)TRQ?0wjTf<#5H$Xw^p8Bt|T>LF2zU~ z=UX}_j*m2_(BGa82Gm+}!xw-x2!Orzhn#tv@JEitb z=2*Xp*6psoEQaL#|}NF3IHK>RO+&*J+-@a9BFwQ zkrtOgmGL;THs*<-&TT!-vU=Unk=F;KlzPs#yEKZ8D*=M)NUQEduNMx zT|yfr9}O9DbjR;F8)Yj7QH5+cu`QYGpTiocZ3uZ;?{qId>4JVUOwB5+=n0`=6|MDB>ldehte>NX)b z=&?)ilI`-- z^3vF;s)*OelhuzgnJ;`!EPOiA)1gIPDa$w9&&EbpAh#;u`0@RPAJy*qUqm!9`T}%& zV&hkPKl&-LZfS8|vacwh3myZCUV5-K-AXc|Ebv1S(l@iCc++{BQuDc6ib_fwv76y@ zqm3Jg6LX?xa#aYQ=*Lyr&O$|@!wn;eGh$p0(=vN*MAbf^oXJln>ued-Q3&q!npfg_=_ z!7uf_2H21XLg9{A1&sNP@Ee_}+4XyXfy%@eT94w}yQ*I=uMX0kO03MhLvjTTO8@%v$wJr^71J(zeMzWXwO1*kTgGo1J*wU~$5yytjT%LDR1{b0*~udN z6#O{&`0Nzjj1yO_F`Th^LzaKkWUelA*|@xRmC9R>%$sn1ef{SY<%)4LTcJbBG`~$_ zja_V_VpE9nJ{2aD+3j_;85QlDdKVfju;vU;DtxM{jovsGfB6sOL8BxWg+ePUE0x%k z7IKHO*j14IaV;9zVwWC!$9d+o!LC4f{#m_a0B{wm9Lf(K#i&)yn_OJmv=8Q_{nq;q z+a<1|qT*@A0lvIew(H$9ixrOsD7lJFx)PWO@U+9vBM=C3cW7>VU*G+?D=(lCiTi7< z-XN+S55H-)>T%p7W47cNsWE|q?2;nC)4No#_|np-^8U5N#P!1BOXcTtz*AU2ot~Kq z-#(dc)Wah_>XuVdw zeYRkcE3=~m%A*NwU3{md^AL=&M27a%0gcNYlxL;TD9I~&e5;n#Pck(49i(4ulFnM| zq+L1OSx9Tl#b>*PyJtq96g^inTVp8Ok3sod89pNb^+^va@{9(LS^L2!-Jw@TI}4fd zGJ!ocKO&M9Uftb3nVssT2~}WR*@nMcS(7F@G;zPXP*W#=^?EOv*pD@mepJ|zGFJU> z1&fp88~i14rjpp5JN#z7pV<4l=Wo?^m6{qw{jgs+Rr5yDI^HuWz^V2pg`w-JROsHv zPZyXM7fo!TPyng!iAWGh#$yOOLyJ=_m=%~r~cQ~`=8DO4xbK%N