summaryrefslogtreecommitdiffstats
path: root/app/Main.hs
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-28 04:06:50 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-28 04:06:50 +0100
commit820d20e8f0689c49ce4ac59378182cc80cb48129 (patch)
tree602780faedacecca141b5914ab1447f8bc5b86c1 /app/Main.hs
parentd6e3c146d7cf0296c8c202aadf759265af8fd945 (diff)
chore: `originalText` -> `content`
Diffstat (limited to 'app/Main.hs')
-rw-r--r--app/Main.hs39
1 files changed, 24 insertions, 15 deletions
diff --git a/app/Main.hs b/app/Main.hs
index 321cf4c..23c70d1 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -389,9 +389,9 @@ doRedoIf filters redo =
=<< getDocuments
where
doRedo doc = do
- originalText <- ocr doc.index.language doc.oFilePath
+ content <- ocr doc.index.language doc.oFilePath
withGit do
- J.encodeFile doc.iFilePath doc.index {originalText = originalText}
+ J.encodeFile doc.iFilePath doc.index {content = content}
commitAll [doc.iFilePath] (printf "redo %s" (takeBaseName doc.iFilePath))
data Document = Document
@@ -462,7 +462,7 @@ processDocumentsInteractively settings allDocs docs =
case choice of
"f" -> do
printf "%s\n" (takeBaseName doc.iFileName)
- printf "%s\n" doc.index.originalText
+ printf "%s\n" doc.index.content
processDocumentInteractively n doc
"p" -> tagDocumentInteractively settings allDocs doc
"s" -> pure doc
@@ -486,7 +486,7 @@ editDocuments docs =
let fp doc = tmp </> takeBaseName doc.iFileName <.> "txt"
mapM_
( \doc ->
- T.writeFile (fp doc) doc.index.originalText
+ T.writeFile (fp doc) doc.index.content
)
docs
sh_
@@ -498,11 +498,11 @@ editDocuments docs =
)
mapM_
( \doc -> do
- originalText <- T.readFile (fp doc)
+ content <- T.readFile (fp doc)
withGit do
- J.encodeFile doc.iFilePath doc.index {originalText = originalText}
+ J.encodeFile doc.iFilePath doc.index {content = content}
commitAll [doc.iFilePath] (printf "edit %s" (takeBaseName doc.iFilePath))
- pure originalText
+ pure content
)
docs
@@ -541,7 +541,7 @@ suggestTags settings allDocs doc = do
S.SuggestTagByRE tagName pattern -> do
let tagValues =
nub . catMaybes . map R.matchedText . R.allMatches $
- doc.index.originalText
+ doc.index.content
R.*=~ pattern
pure (G.tag tagName (Just ""), map (G.tag tagName . Just) tagValues)
S.SuggestTagByTags tagName -> do
@@ -627,6 +627,13 @@ addTag tag doc =
doc {index = doc.index {addedAt = addedAt}}
)
(iso8601ParseM . T.unpack =<< (G.tagValue tag))
+ | G.tagKey tag == "content" ->
+ maybe
+ (throw (AddTagException tag))
+ ( \content ->
+ doc {index = doc.index {content = content}}
+ )
+ (G.tagValue tag)
| G.tagKey tag == "language" ->
throw (AddTagException tag)
| otherwise ->
@@ -655,6 +662,8 @@ removeTag tag doc =
throw (RemoveTagException tag)
| G.tagKey tag == "language" ->
throw (RemoveTagException tag)
+ | G.tagKey tag == "content" ->
+ throw (RemoveTagException tag)
| otherwise ->
doc
{ index =
@@ -729,14 +738,14 @@ consume1 language force keep filePath = do
when (originalExists && not force) do
error (printf "error: error adding %s: duplicate of %s\n" filePath oFilePath)
let iFilePath = "index" </> fKey <.> "json"
- originalText <- do
- originalText' <-
+ content <- do
+ content' <-
T.decodeUtf8 . LB.toStrict
<$> sh (printf "pdftotext -layout '%s' -" filePath)
- let hasText = (not . T.null) . T.strip $ originalText'
+ let hasText = (not . T.null) . T.strip $ content'
if not hasText
then ocr language filePath
- else pure originalText'
+ else pure content'
addedAt <- getCurrentTime
withGit do
J.encodeFile iFilePath Index {tags = S.empty, todo = True, ..}
@@ -801,7 +810,7 @@ ocr1 language tmp input =
<$> sh (printf "tesseract '%s' - -l '%s' --oem 3 --psm 1" (tmp </> input) language)
data Index = Index
- { originalText :: T.Text,
+ { content :: T.Text,
tags :: S.Set G.Tag,
addedAt :: UTCTime,
todo :: Bool,
@@ -816,7 +825,7 @@ instance J.FromJSON Index
instance HasField "shortText" Index T.Text where
getField =
(T.unlines . take 10 . T.lines)
- . (.originalText)
+ . (.content)
instance HasField "internalTags" Index (S.Set G.Tag) where
getField index =
@@ -828,7 +837,7 @@ instance HasField "internalTags" Index (S.Set G.Tag) where
-- @related cache-probabilitymap
instance HasField "originalWords" Index (S.Set T.Text) where
getField index =
- S.fromList (T.words index.originalText)
+ S.fromList (T.words index.content)
internalTags :: Index -> S.Set G.Tag
internalTags index =