| Hash | Commit message | Author | Date | Files | + | - |
1 | commit 03ee7bbb016621559434e19f72daba45383ff954 |
2 | Author: Connor Etherington <[email protected]> |
3 | Date: Wed Oct 25 10:35:45 2023 +0200 |
4 | |
5 | Auto-Commit Update - 20231025 |
6 | --- |
7 | usr/share/lfp/ocr/ocr.js | 6 +++--- |
8 | 1 file changed, 3 insertions(+), 3 deletions(-) |
9 | |
10 | diff --git a/usr/share/lfp/ocr/ocr.js b/usr/share/lfp/ocr/ocr.js |
11 | index c87a373..e4af7b0 100755 |
12 | --- a/usr/share/lfp/ocr/ocr.js |
13 | +++ b/usr/share/lfp/ocr/ocr.js |
14 | @@ -24,7 +24,9 @@ const sh = (cmd, opts) => new Promise((resolve, reject) => { |
15 | |
16 | const ocr = async (imagePath) => { |
17 | const Type = await sh(`file -b --mime-type ${imagePath}`); |
18 | - if (Type === 'PDF document') { |
19 | + if (await sh(`file -bL --mime-encoding ${imagePath}`) === 'utf-8') { |
20 | + return await fsp.readFile(imagePath, 'utf8'); |
21 | + } else if (Type === 'PDF document') { |
22 | await sh(`convert ${imagePath} -background white ${imagePath}.png`); |
23 | const res = await tesseract.recognize(`${imagePath}.png`, config); |
24 | await fs.unlinkSync(`${imagePath}.png`); |
25 | @@ -32,8 +34,6 @@ const ocr = async (imagePath) => { |
26 | } else if(Type === 'PNG image data' || Type === 'JPEG image data' || Type === 'TIFF image data' || Type === 'image/x-portable-bitmap' || Type === 'image/png' || Type === 'image/jpeg' || Type === 'image/tiff' || Type === 'PC bitmap') { |
27 | const image = await fsp.readFile(imagePath); |
28 | return await tesseract.recognize(image, config); |
29 | - } else if (Type === 'Unicode text') { |
30 | - return await fsp.readFile(imagePath, 'utf8'); |
31 | } else { |
32 | throw new Error('Unknown image type'); |
33 | } |