diff options
author | Alexander Foremny <aforemny@posteo.de> | 2023-12-22 04:03:47 +0100 |
---|---|---|
committer | Alexander Foremny <aforemny@posteo.de> | 2023-12-22 04:03:47 +0100 |
commit | d5d96dba1ff5c9cd66c295665fb422c8e930ff9d (patch) | |
tree | 4c6671fc23845dc738cfffed7e18569bd14f0cd2 | |
parent | eb584575ca1fb1420c217452d659460aa5736663 (diff) |
fix: fix `originalText`
-rw-r--r-- | app/Main.hs | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/app/Main.hs b/app/Main.hs index 4ee4913..920d3a6 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -23,6 +23,7 @@ import Data.Maybe (fromMaybe) import Data.String (IsString (fromString)) import Data.Text qualified as T import Data.Text.Encoding qualified as T +import Data.Text.IO qualified as T import Debug.Trace import GHC.Conc (getNumProcessors) import GHC.Generics (Generic) @@ -91,7 +92,11 @@ main = do mapM_ putStrLn =<< parMapM (consume1 keep) (map (cwd </>) filePaths) Args {cmd = List} -> do - mapM_ (putStrLn . takeBaseName . fst) + mapM_ + ( \(iFileName, index) -> do + putStrLn (takeBaseName iFileName) + T.putStrLn index.originalText + ) =<< parMapM ( \iFileName -> (,) iFileName <$> decodeFile @Index ("index" </> iFileName) @@ -188,14 +193,9 @@ ocr input = T.unlines <$> mapM (ocr1 tmp . (tmp </>)) imageFiles ocr1 :: FilePath -> FilePath -> IO T.Text -ocr1 tmp input = do +ocr1 tmp input = T.decodeUtf8 . LB.toStrict - <$> sh - ( printf - "tesseract '%s' '%s' pdf" - (tmp </> input) - (tmp </> takeBaseName input) - ) + <$> sh (printf "tesseract '%s' -" (tmp </> input)) data Index = Index { originalText :: T.Text |