summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-22 04:03:47 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-22 04:03:47 +0100
commitd5d96dba1ff5c9cd66c295665fb422c8e930ff9d (patch)
tree4c6671fc23845dc738cfffed7e18569bd14f0cd2
parenteb584575ca1fb1420c217452d659460aa5736663 (diff)
fix: fix `originalText`
-rw-r--r--app/Main.hs16
1 files changed, 8 insertions, 8 deletions
diff --git a/app/Main.hs b/app/Main.hs
index 4ee4913..920d3a6 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -23,6 +23,7 @@ import Data.Maybe (fromMaybe)
import Data.String (IsString (fromString))
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
+import Data.Text.IO qualified as T
import Debug.Trace
import GHC.Conc (getNumProcessors)
import GHC.Generics (Generic)
@@ -91,7 +92,11 @@ main = do
mapM_ putStrLn
=<< parMapM (consume1 keep) (map (cwd </>) filePaths)
Args {cmd = List} -> do
- mapM_ (putStrLn . takeBaseName . fst)
+ mapM_
+ ( \(iFileName, index) -> do
+ putStrLn (takeBaseName iFileName)
+ T.putStrLn index.originalText
+ )
=<< parMapM
( \iFileName ->
(,) iFileName <$> decodeFile @Index ("index" </> iFileName)
@@ -188,14 +193,9 @@ ocr input =
T.unlines <$> mapM (ocr1 tmp . (tmp </>)) imageFiles
ocr1 :: FilePath -> FilePath -> IO T.Text
-ocr1 tmp input = do
+ocr1 tmp input =
T.decodeUtf8 . LB.toStrict
- <$> sh
- ( printf
- "tesseract '%s' '%s' pdf"
- (tmp </> input)
- (tmp </> takeBaseName input)
- )
+ <$> sh (printf "tesseract '%s' -" (tmp </> input))
data Index = Index
{ originalText :: T.Text