summaryrefslogtreecommitdiffstats
path: root/app/Main.hs
diff options
context:
space:
mode:
Diffstat (limited to 'app/Main.hs')
-rw-r--r--app/Main.hs70
1 files changed, 61 insertions, 9 deletions
diff --git a/app/Main.hs b/app/Main.hs
index adb16b4..096f253 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
@@ -25,6 +26,7 @@ import Data.List
import Data.List.NonEmpty qualified as N
import Data.Map qualified as M
import Data.Maybe (catMaybes, fromMaybe, mapMaybe)
+import Data.Ord (comparing)
import Data.Set qualified as S
import Data.String (IsString (fromString))
import Data.Text qualified as T
@@ -291,7 +293,7 @@ printTags doc =
Nothing -> printf "@%s\n" (tagKey tag)
Just tagValue -> printf "@%s %s\n" (tagKey tag) tagValue
)
- doc.index.tags
+ (doc.index.tags `S.union` doc.index.internalTags)
doRedoIf :: [Filter] -> Bool -> IO ()
doRedoIf filters redo =
@@ -434,6 +436,45 @@ suggestTags settings allDocs doc = do
doc.index.originalText
R.*=~ pattern
pure (Tag tagName (Just ""), map (Tag tagName . Just) tagValues)
+ S.SuggestTagByTags tagName -> do
+ let allTags = foldl S.union S.empty (map (.index.tags) allDocs)
+ allWords = foldl S.union S.empty (map (.index.originalWords) allDocs)
+
+ hasWord word doc = S.member word doc.index.originalWords
+ hasTag tag doc = S.member tag doc.index.tags
+ hasWordAndTag word tag doc = hasTag tag doc && hasWord word doc
+ fi = fromIntegral
+
+ -- TODO Consider words that contribute to NOT adding a tag
+ --
+ -- If there is a document that should, say, not have a `@correspondant`, we should score words that contribute to that fact as well.
+ --
+ -- @topic probability-map
+
+ -- TODO Cache `probabilityMap`
+ probabilityMap =
+ [ let docs = filter (hasWordAndTag word tag) allDocs
+ p = fi (length docs) / fi (length allDocs)
+ in (word, tag, p)
+ | word <- S.toList allWords,
+ tag <- S.toList allTags
+ ]
+ let tagValues =
+ probabilityMap
+ & filter (\(word, tag, _) -> hasWordAndTag word tag doc)
+ & foldl'
+ ( \scorePerTagValue (_, tag, p) ->
+ M.insertWith
+ (+)
+ (tagValue tag)
+ p
+ scorePerTagValue
+ )
+ M.empty
+ & M.toList
+ & sortBy (comparing (negate . snd))
+ & map fst
+ pure (Tag tagName (Just ""), map (Tag tagName) tagValues)
autoApplySuggestedTags :: [(Tag, [Tag])] -> [Tag]
autoApplySuggestedTags =
@@ -456,8 +497,8 @@ processDocuments settings allDocs docs =
doc
{ index =
doc.index
- { tags =
- S.delete (Tag "todo" Nothing) (doc.index.tags `S.union` tags)
+ { tags = doc.index.tags `S.union` tags,
+ todo = False
}
}
withGit do
@@ -475,8 +516,8 @@ tagDocumentInteractively settings allDocs doc = do
doc
{ index =
doc.index
- { tags =
- S.delete (Tag "todo" Nothing) (doc.index.tags `S.union` tags)
+ { tags = doc.index.tags `S.union` tags,
+ todo = False
}
}
withGit do
@@ -536,7 +577,7 @@ consume1 force keep filePath = do
else pure originalText'
addedAt <- getCurrentTime
withGit do
- J.encodeFile iFilePath Index {tags = S.singleton (Tag "todo" Nothing), ..}
+ J.encodeFile iFilePath Index {tags = S.empty, todo = True, ..}
if keep
then copyFile filePath oFilePath
else renameFile filePath oFilePath
@@ -600,7 +641,8 @@ ocr1 tmp input =
data Index = Index
{ originalText :: T.Text,
tags :: S.Set Tag,
- addedAt :: UTCTime
+ addedAt :: UTCTime,
+ todo :: Bool
}
deriving (Show, Generic, Eq)
@@ -618,11 +660,21 @@ instance HasField "internalTags" Index (S.Set Tag) where
index.tags
`S.union` internalTags index
+-- TODO Cache `originalWords`
+--
+-- @related cache-probabilitymap
+instance HasField "originalWords" Index (S.Set T.Text) where
+ getField index =
+ S.fromList (T.words index.originalText)
+
internalTags :: Index -> S.Set Tag
internalTags index =
S.fromList
- [ Tag "addedAt" (Just (T.pack (iso8601Show index.addedAt)))
- ]
+ ( concat
+ [ [Tag "addedAt" (Just (T.pack (iso8601Show index.addedAt)))],
+ if index.todo then [Tag "todo" Nothing] else []
+ ]
+ )
data PdfInfo = PdfInfo
{ numPages :: Int,