From 820d20e8f0689c49ce4ac59378182cc80cb48129 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Thu, 28 Dec 2023 04:06:50 +0100 Subject: chore: `originalText` -> `content` --- app/Main.hs | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'app/Main.hs') diff --git a/app/Main.hs b/app/Main.hs index 321cf4c..23c70d1 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -389,9 +389,9 @@ doRedoIf filters redo = =<< getDocuments where doRedo doc = do - originalText <- ocr doc.index.language doc.oFilePath + content <- ocr doc.index.language doc.oFilePath withGit do - J.encodeFile doc.iFilePath doc.index {originalText = originalText} + J.encodeFile doc.iFilePath doc.index {content = content} commitAll [doc.iFilePath] (printf "redo %s" (takeBaseName doc.iFilePath)) data Document = Document @@ -462,7 +462,7 @@ processDocumentsInteractively settings allDocs docs = case choice of "f" -> do printf "%s\n" (takeBaseName doc.iFileName) - printf "%s\n" doc.index.originalText + printf "%s\n" doc.index.content processDocumentInteractively n doc "p" -> tagDocumentInteractively settings allDocs doc "s" -> pure doc @@ -486,7 +486,7 @@ editDocuments docs = let fp doc = tmp takeBaseName doc.iFileName <.> "txt" mapM_ ( \doc -> - T.writeFile (fp doc) doc.index.originalText + T.writeFile (fp doc) doc.index.content ) docs sh_ @@ -498,11 +498,11 @@ editDocuments docs = ) mapM_ ( \doc -> do - originalText <- T.readFile (fp doc) + content <- T.readFile (fp doc) withGit do - J.encodeFile doc.iFilePath doc.index {originalText = originalText} + J.encodeFile doc.iFilePath doc.index {content = content} commitAll [doc.iFilePath] (printf "edit %s" (takeBaseName doc.iFilePath)) - pure originalText + pure content ) docs @@ -541,7 +541,7 @@ suggestTags settings allDocs doc = do S.SuggestTagByRE tagName pattern -> do let tagValues = nub . catMaybes . map R.matchedText . R.allMatches $ - doc.index.originalText + doc.index.content R.*=~ pattern pure (G.tag tagName (Just ""), map (G.tag tagName . Just) tagValues) S.SuggestTagByTags tagName -> do @@ -627,6 +627,13 @@ addTag tag doc = doc {index = doc.index {addedAt = addedAt}} ) (iso8601ParseM . T.unpack =<< (G.tagValue tag)) + | G.tagKey tag == "content" -> + maybe + (throw (AddTagException tag)) + ( \content -> + doc {index = doc.index {content = content}} + ) + (G.tagValue tag) | G.tagKey tag == "language" -> throw (AddTagException tag) | otherwise -> @@ -655,6 +662,8 @@ removeTag tag doc = throw (RemoveTagException tag) | G.tagKey tag == "language" -> throw (RemoveTagException tag) + | G.tagKey tag == "content" -> + throw (RemoveTagException tag) | otherwise -> doc { index = @@ -729,14 +738,14 @@ consume1 language force keep filePath = do when (originalExists && not force) do error (printf "error: error adding %s: duplicate of %s\n" filePath oFilePath) let iFilePath = "index" fKey <.> "json" - originalText <- do - originalText' <- + content <- do + content' <- T.decodeUtf8 . LB.toStrict <$> sh (printf "pdftotext -layout '%s' -" filePath) - let hasText = (not . T.null) . T.strip $ originalText' + let hasText = (not . T.null) . T.strip $ content' if not hasText then ocr language filePath - else pure originalText' + else pure content' addedAt <- getCurrentTime withGit do J.encodeFile iFilePath Index {tags = S.empty, todo = True, ..} @@ -801,7 +810,7 @@ ocr1 language tmp input = <$> sh (printf "tesseract '%s' - -l '%s' --oem 3 --psm 1" (tmp input) language) data Index = Index - { originalText :: T.Text, + { content :: T.Text, tags :: S.Set G.Tag, addedAt :: UTCTime, todo :: Bool, @@ -816,7 +825,7 @@ instance J.FromJSON Index instance HasField "shortText" Index T.Text where getField = (T.unlines . take 10 . T.lines) - . (.originalText) + . (.content) instance HasField "internalTags" Index (S.Set G.Tag) where getField index = @@ -828,7 +837,7 @@ instance HasField "internalTags" Index (S.Set G.Tag) where -- @related cache-probabilitymap instance HasField "originalWords" Index (S.Set T.Text) where getField index = - S.fromList (T.words index.originalText) + S.fromList (T.words index.content) internalTags :: Index -> S.Set G.Tag internalTags index = -- cgit v1.2.3