summaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
Diffstat (limited to 'app')
-rw-r--r--app/Main.hs3
1 files changed, 2 insertions, 1 deletions
diff --git a/app/Main.hs b/app/Main.hs
index 479f3b0..0d4edcc 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -36,6 +36,7 @@ import Data.String (IsString (fromString))
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Data.Text.IO qualified as T
+import Data.Text.Normalize qualified as T
import Data.Time.Clock (getCurrentTime)
import Data.Time.Format.ISO8601 (iso8601ParseM)
import Debug.Trace
@@ -759,7 +760,7 @@ consume1 language force keep filePath = do
let iFilePath = "index" </> fKey <.> "json"
content <- do
content' <-
- T.decodeUtf8 . LB.toStrict
+ T.normalize T.NFC . T.decodeUtf8 . LB.toStrict
<$> sh (printf "pdftotext -layout '%s' -" filePath)
let hasText = (not . T.null) . T.strip $ content'
if not hasText