diff options
-rw-r--r-- | apaperless.cabal | 35 | ||||
-rw-r--r-- | app/Main.hs | 3 |
2 files changed, 20 insertions, 18 deletions
diff --git a/apaperless.cabal b/apaperless.cabal index 8e26457..08001b4 100644 --- a/apaperless.cabal +++ b/apaperless.cabal @@ -28,32 +28,33 @@ executable apaperless Store -- other-extensions: build-depends: + aeson, + attoparsec, base, - typed-process, + binary, bytestring, - temporary, + containers, + data-default, + deepseq, directory, filepath, - text, - containers, - attoparsec, - parallel-io, - aeson, + lock-file, optparse-applicative, + parallel, + parallel-io, + regex, SHA, - lock-file, - data-default, + split, tags, + temporary, + text, time, - regex, - yaml, - xdg-basedir, - binary, - vector, - parallel, - deepseq, + typed-process, + unicode-transforms, unordered-containers, + vector, vector-binary-instances, - split + xdg-basedir, + yaml hs-source-dirs: app default-language: GHC2021 diff --git a/app/Main.hs b/app/Main.hs index 479f3b0..0d4edcc 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -36,6 +36,7 @@ import Data.String (IsString (fromString)) import Data.Text qualified as T import Data.Text.Encoding qualified as T import Data.Text.IO qualified as T +import Data.Text.Normalize qualified as T import Data.Time.Clock (getCurrentTime) import Data.Time.Format.ISO8601 (iso8601ParseM) import Debug.Trace @@ -759,7 +760,7 @@ consume1 language force keep filePath = do let iFilePath = "index" </> fKey <.> "json" content <- do content' <- - T.decodeUtf8 . LB.toStrict + T.normalize T.NFC . T.decodeUtf8 . LB.toStrict <$> sh (printf "pdftotext -layout '%s' -" filePath) let hasText = (not . T.null) . T.strip $ content' if not hasText |