diff options
author | Alexander Foremny <aforemny@posteo.de> | 2024-02-07 04:43:19 +0100 |
---|---|---|
committer | Alexander Foremny <aforemny@posteo.de> | 2024-02-16 04:39:20 +0100 |
commit | f86cbf2598c6421a4ceab55c5fd27da12989cda3 (patch) | |
tree | 79dadd8cb93d8ddbc6ca3ba76b2d45a43daf2aed /app/Main.hs | |
parent | 9ba726831b973ae0f57e640f3a0966da0ca1941d (diff) |
chore: normalize UTF-8main
Diffstat (limited to 'app/Main.hs')
-rw-r--r-- | app/Main.hs | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/app/Main.hs b/app/Main.hs index 479f3b0..0d4edcc 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -36,6 +36,7 @@ import Data.String (IsString (fromString)) import Data.Text qualified as T import Data.Text.Encoding qualified as T import Data.Text.IO qualified as T +import Data.Text.Normalize qualified as T import Data.Time.Clock (getCurrentTime) import Data.Time.Format.ISO8601 (iso8601ParseM) import Debug.Trace @@ -759,7 +760,7 @@ consume1 language force keep filePath = do let iFilePath = "index" </> fKey <.> "json" content <- do content' <- - T.decodeUtf8 . LB.toStrict + T.normalize T.NFC . T.decodeUtf8 . LB.toStrict <$> sh (printf "pdftotext -layout '%s' -" filePath) let hasText = (not . T.null) . T.strip $ content' if not hasText |