diff options
Diffstat (limited to 'app/Main.hs')
-rw-r--r-- | app/Main.hs | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/app/Main.hs b/app/Main.hs index 479f3b0..0d4edcc 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -36,6 +36,7 @@ import Data.String (IsString (fromString)) import Data.Text qualified as T import Data.Text.Encoding qualified as T import Data.Text.IO qualified as T +import Data.Text.Normalize qualified as T import Data.Time.Clock (getCurrentTime) import Data.Time.Format.ISO8601 (iso8601ParseM) import Debug.Trace @@ -759,7 +760,7 @@ consume1 language force keep filePath = do let iFilePath = "index" </> fKey <.> "json" content <- do content' <- - T.decodeUtf8 . LB.toStrict + T.normalize T.NFC . T.decodeUtf8 . LB.toStrict <$> sh (printf "pdftotext -layout '%s' -" filePath) let hasText = (not . T.null) . T.strip $ content' if not hasText |