summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--apaperless.cabal35
-rw-r--r--app/Main.hs3
2 files changed, 20 insertions, 18 deletions
diff --git a/apaperless.cabal b/apaperless.cabal
index 8e26457..08001b4 100644
--- a/apaperless.cabal
+++ b/apaperless.cabal
@@ -28,32 +28,33 @@ executable apaperless
Store
-- other-extensions:
build-depends:
+ aeson,
+ attoparsec,
base,
- typed-process,
+ binary,
bytestring,
- temporary,
+ containers,
+ data-default,
+ deepseq,
directory,
filepath,
- text,
- containers,
- attoparsec,
- parallel-io,
- aeson,
+ lock-file,
optparse-applicative,
+ parallel,
+ parallel-io,
+ regex,
SHA,
- lock-file,
- data-default,
+ split,
tags,
+ temporary,
+ text,
time,
- regex,
- yaml,
- xdg-basedir,
- binary,
- vector,
- parallel,
- deepseq,
+ typed-process,
+ unicode-transforms,
unordered-containers,
+ vector,
vector-binary-instances,
- split
+ xdg-basedir,
+ yaml
hs-source-dirs: app
default-language: GHC2021
diff --git a/app/Main.hs b/app/Main.hs
index 479f3b0..0d4edcc 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -36,6 +36,7 @@ import Data.String (IsString (fromString))
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Data.Text.IO qualified as T
+import Data.Text.Normalize qualified as T
import Data.Time.Clock (getCurrentTime)
import Data.Time.Format.ISO8601 (iso8601ParseM)
import Debug.Trace
@@ -759,7 +760,7 @@ consume1 language force keep filePath = do
let iFilePath = "index" </> fKey <.> "json"
content <- do
content' <-
- T.decodeUtf8 . LB.toStrict
+ T.normalize T.NFC . T.decodeUtf8 . LB.toStrict
<$> sh (printf "pdftotext -layout '%s' -" filePath)
let hasText = (not . T.null) . T.strip $ content'
if not hasText