diff options
-rw-r--r-- | app/Main.hs | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/app/Main.hs b/app/Main.hs index 4ee4913..920d3a6 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -23,6 +23,7 @@ import Data.Maybe (fromMaybe) import Data.String (IsString (fromString)) import Data.Text qualified as T import Data.Text.Encoding qualified as T +import Data.Text.IO qualified as T import Debug.Trace import GHC.Conc (getNumProcessors) import GHC.Generics (Generic) @@ -91,7 +92,11 @@ main = do mapM_ putStrLn =<< parMapM (consume1 keep) (map (cwd </>) filePaths) Args {cmd = List} -> do - mapM_ (putStrLn . takeBaseName . fst) + mapM_ + ( \(iFileName, index) -> do + putStrLn (takeBaseName iFileName) + T.putStrLn index.originalText + ) =<< parMapM ( \iFileName -> (,) iFileName <$> decodeFile @Index ("index" </> iFileName) @@ -188,14 +193,9 @@ ocr input = T.unlines <$> mapM (ocr1 tmp . (tmp </>)) imageFiles ocr1 :: FilePath -> FilePath -> IO T.Text -ocr1 tmp input = do +ocr1 tmp input = T.decodeUtf8 . LB.toStrict - <$> sh - ( printf - "tesseract '%s' '%s' pdf" - (tmp </> input) - (tmp </> takeBaseName input) - ) + <$> sh (printf "tesseract '%s' -" (tmp </> input)) data Index = Index { originalText :: T.Text |