summaryrefslogtreecommitdiffstats
path: root/app/Main.hs
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2024-02-07 04:43:19 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2024-02-16 04:39:20 +0100
commitf86cbf2598c6421a4ceab55c5fd27da12989cda3 (patch)
tree79dadd8cb93d8ddbc6ca3ba76b2d45a43daf2aed /app/Main.hs
parent9ba726831b973ae0f57e640f3a0966da0ca1941d (diff)
chore: normalize UTF-8main
Diffstat (limited to 'app/Main.hs')
-rw-r--r--app/Main.hs3
1 files changed, 2 insertions, 1 deletions
diff --git a/app/Main.hs b/app/Main.hs
index 479f3b0..0d4edcc 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -36,6 +36,7 @@ import Data.String (IsString (fromString))
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Data.Text.IO qualified as T
+import Data.Text.Normalize qualified as T
import Data.Time.Clock (getCurrentTime)
import Data.Time.Format.ISO8601 (iso8601ParseM)
import Debug.Trace
@@ -759,7 +760,7 @@ consume1 language force keep filePath = do
let iFilePath = "index" </> fKey <.> "json"
content <- do
content' <-
- T.decodeUtf8 . LB.toStrict
+ T.normalize T.NFC . T.decodeUtf8 . LB.toStrict
<$> sh (printf "pdftotext -layout '%s' -" filePath)
let hasText = (not . T.null) . T.strip $ content'
if not hasText