lfp


Logs | Files | README | README | LICENSE | GitLab


1
commit 03ee7bbb016621559434e19f72daba45383ff954
2
Author: Connor Etherington <[email protected]>
3
Date:   Wed Oct 25 10:35:45 2023 +0200
4
5
    Auto-Commit Update - 20231025
6
---
7
 usr/share/lfp/ocr/ocr.js | 6 +++---
8
 1 file changed, 3 insertions(+), 3 deletions(-)
9
10
diff --git a/usr/share/lfp/ocr/ocr.js b/usr/share/lfp/ocr/ocr.js
11
index c87a373..e4af7b0 100755
12
--- a/usr/share/lfp/ocr/ocr.js
13
+++ b/usr/share/lfp/ocr/ocr.js
14
@@ -24,7 +24,9 @@ const sh = (cmd, opts) => new Promise((resolve, reject) => {
15
 
16
 const ocr = async (imagePath) => {
17
   const Type = await sh(`file -b --mime-type ${imagePath}`);
18
-  if (Type === 'PDF document') {
19
+  if (await sh(`file -bL --mime-encoding ${imagePath}`) === 'utf-8') {
20
+    return await fsp.readFile(imagePath, 'utf8');
21
+  } else if (Type === 'PDF document') {
22
     await sh(`convert ${imagePath} -background white ${imagePath}.png`);
23
     const res = await tesseract.recognize(`${imagePath}.png`, config);
24
     await fs.unlinkSync(`${imagePath}.png`);
25
@@ -32,8 +34,6 @@ const ocr = async (imagePath) => {
26
   } else if(Type === 'PNG image data' || Type === 'JPEG image data' || Type === 'TIFF image data' || Type === 'image/x-portable-bitmap' || Type === 'image/png' || Type === 'image/jpeg' || Type === 'image/tiff' || Type === 'PC bitmap') {
27
     const image = await fsp.readFile(imagePath);
28
     return await tesseract.recognize(image, config);
29
-  } else if (Type === 'Unicode text') {
30
-    return await fsp.readFile(imagePath, 'utf8');
31
   } else {
32
     throw new Error('Unknown image type');
33
   }