summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-22 06:03:39 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-22 06:03:39 +0100
commitee67922575359b1e9e5480312fac634446fe5bd5 (patch)
tree8cb9bb83a1262bb9d7e7d8f29cde0f97f44b9cfc
parentd5d96dba1ff5c9cd66c295665fb422c8e930ff9d (diff)
feat: add command `todo`
-rw-r--r--app/Main.hs251
1 files changed, 237 insertions, 14 deletions
diff --git a/app/Main.hs b/app/Main.hs
index 920d3a6..32f4037 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -1,8 +1,10 @@
{-# LANGUAGE BlockArguments #-}
+{-# LANGUAGE DataKinds #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE NoFieldSelectors #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
@@ -17,9 +19,11 @@ import Data.Attoparsec.Text qualified as A
import Data.ByteString.Lazy qualified as LB
import Data.Default
import Data.Digest.Pure.SHA (sha256, showDigest)
+import Data.Function ((&))
import Data.List
import Data.Map qualified as M
-import Data.Maybe (fromMaybe)
+import Data.Maybe (catMaybes, fromMaybe)
+import Data.Set qualified as S
import Data.String (IsString (fromString))
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
@@ -27,10 +31,12 @@ import Data.Text.IO qualified as T
import Debug.Trace
import GHC.Conc (getNumProcessors)
import GHC.Generics (Generic)
+import GHC.Records (HasField (..))
import Options.Applicative qualified as O
import System.Directory
import System.Environment (getEnv)
import System.FilePath
+import System.IO
import System.IO.LockFile (withLockFile)
import System.IO.Temp (withSystemTempDirectory)
import System.Process.Typed
@@ -43,7 +49,8 @@ data Args = Args
data Cmd
= Consume Bool [FilePath]
- | List
+ | List [Filter]
+ | Todo
args :: O.Parser Args
args =
@@ -55,7 +62,9 @@ cmd =
[ O.command "consume" . O.info consumeCmd $
O.progDesc "Consume document(s)",
O.command "list" . O.info listCmd $
- O.progDesc "List document(s)"
+ O.progDesc "List document(s)",
+ O.command "todo" . O.info todoCmd $
+ O.progDesc "Interactively process new documents"
]
consumeCmd :: O.Parser Cmd
@@ -66,7 +75,12 @@ consumeCmd =
listCmd :: O.Parser Cmd
listCmd =
- pure List
+ List
+ <$> filterArg
+
+todoCmd :: O.Parser Cmd
+todoCmd =
+ pure Todo
filePathsArg :: O.Parser [FilePath]
filePathsArg =
@@ -79,6 +93,22 @@ keepArg =
<> O.help "Keep input document"
)
+filterArg :: O.Parser [Filter]
+filterArg =
+ O.many $
+ O.option
+ (O.maybeReader parse)
+ ( O.long "filter"
+ <> O.short 'f'
+ <> O.help "Filter documents by tag"
+ )
+ where
+ parse ('@' : tagKey) = Just (FilterByTag (T.pack tagKey))
+ parse _ = Nothing
+
+data Filter
+ = FilterByTag T.Text
+
main :: IO ()
main = do
cwd <- getCurrentDirectory
@@ -91,17 +121,183 @@ main = do
Args {cmd = Consume keep filePaths} ->
mapM_ putStrLn
=<< parMapM (consume1 keep) (map (cwd </>) filePaths)
- Args {cmd = List} -> do
+ Args {cmd = List filters} -> do
mapM_
- ( \(iFileName, index) -> do
- putStrLn (takeBaseName iFileName)
- T.putStrLn index.originalText
+ ( \(Document {iFileName, index}) -> do
+ if hasTag (Tag "todo" Nothing) index
+ then printf "TODO %s\n" (takeBaseName iFileName)
+ else printf " %s\n" (takeBaseName iFileName)
)
- =<< parMapM
- ( \iFileName ->
- (,) iFileName <$> decodeFile @Index ("index" </> iFileName)
+ . applyFilters filters
+ =<< getDocuments
+ Args {cmd = Todo} -> do
+ processDocuments
+ . applyFilters [FilterByTag "todo"]
+ =<< getDocuments
+
+data Document = Document
+ { iFileName :: String,
+ index :: Index
+ }
+ deriving (Show)
+
+instance HasField "oFilePath" Document FilePath where
+ getField doc = "originals" </> takeBaseName doc.iFileName <.> "pdf"
+
+instance HasField "iFilePath" Document FilePath where
+ getField doc = "index" </> doc.iFileName
+
+getDocuments :: IO [Document]
+getDocuments =
+ parMapM
+ ( \iFileName ->
+ Document iFileName
+ <$> decodeFile @Index ("index" </> iFileName)
+ )
+ =<< sort <$> listDirectory "index"
+
+applyFilters :: [Filter] -> [Document] -> [Document]
+applyFilters filters = filter (pred filters) `at` (.index)
+ where
+ pred1 (FilterByTag tagKey) = hasTag (Tag tagKey Nothing)
+ pred filters = \index -> all ($ index) (map pred1 filters)
+
+ at :: ([a] -> [a]) -> (b -> a) -> [b] -> [b]
+ at _ _ [] = []
+ at g f (x : xs)
+ | null (g [f x]) = at g f xs
+ | otherwise = x : at g f xs
+
+processDocuments :: [Document] -> IO ()
+processDocuments docs =
+ mapM_ (uncurry processDocuments') (zip [1 :: Int ..] docs)
+ where
+ numDocs = length docs
+ processDocuments' n (doc@Document {iFileName, index}) = do
+ choice <-
+ promptChoiceHelp
+ [ ("f", "view full text"),
+ ("p", "process document"),
+ ("s", "skip document"),
+ ("v", "view document")
+ ]
+ ( printf
+ "%s\n%s\n\n(%d/%d) Process this document?"
+ (takeBaseName iFileName)
+ index.shortText
+ n
+ numDocs
)
- =<< sort <$> listDirectory "index"
+ case choice of
+ "f" -> do
+ printf "%s\n" (takeBaseName doc.iFileName)
+ printf
+ "%s\n"
+ ( T.unlines
+ . filter (not . T.null)
+ . map T.strip
+ . T.lines
+ $ doc.index.originalText
+ )
+ processDocuments' n doc
+ "p" -> processDocument doc
+ "s" -> pure ()
+ "v" -> do
+ sh_ (printf "zathura '%s'" doc.oFilePath)
+ processDocuments' n doc
+
+processDocument :: Document -> IO ()
+processDocument (Document {iFileName, index}) = do
+ printf "%s\n" index.originalText
+ let suggestedTags =
+ [ Tag "correspondent" (Just ""),
+ Tag "invoice" Nothing,
+ Tag "bill" Nothing
+ ]
+ tags <-
+ S.fromList . catMaybes
+ <$> mapM processSuggestedTag suggestedTags
+ let tags' = S.delete (Tag "todo" Nothing) (index.tags `S.union` tags)
+ index' = index {tags = tags'}
+ iFilePath = "index" </> iFileName
+ withGit do
+ J.encodeFile iFilePath index'
+ commitAll [iFilePath] (printf "process %s (interactive)" iFilePath)
+
+processSuggestedTag :: Tag -> IO (Maybe Tag)
+processSuggestedTag tag@(Tag tagKey Nothing) = do
+ choice <- promptChoice (Just "n") ["n", "y"] (printf "tag with %s?" tagKey)
+ pure $ if (choice == "y") then Just tag else Nothing
+processSuggestedTag (Tag tagKey (Just _)) = do
+ tagValue <- promptString [] (printf "tag with %s?" tagKey)
+ pure $
+ if not (T.null tagValue)
+ then Just (Tag tagKey (Just tagValue))
+ else Nothing
+
+promptChoice :: Maybe T.Text -> [T.Text] -> String -> IO T.Text
+promptChoice mDef as s = do
+ a <-
+ T.toLower
+ <$> promptString
+ []
+ ( s
+ ++ ( T.unpack
+ ( " ["
+ <> T.intercalate "" (map capitalizeDef as)
+ <> "]"
+ )
+ )
+ )
+ case (a, mDef) of
+ ("", Just def) -> pure def
+ _ ->
+ if not (T.toLower a `elem` map T.toLower as)
+ then promptChoice mDef as s
+ else pure a
+ where
+ capitalizeDef a = (if Just a == mDef then T.toUpper else T.toLower) a
+
+promptChoiceHelp :: [(T.Text, T.Text)] -> String -> IO T.Text
+promptChoiceHelp as' s = do
+ a <-
+ T.toLower
+ <$> promptString
+ []
+ ( s
+ ++ ( T.unpack
+ (" [" <> T.intercalate "" (as ++ ["?"]) <> "]")
+ )
+ )
+ if a == "?"
+ then do
+ printHelp
+ promptChoiceHelp as' s
+ else
+ if not (T.toLower a `elem` map T.toLower as)
+ then promptChoiceHelp as' s
+ else pure a
+ where
+ as = map fst as'
+ printHelp = mapM_ (uncurry (printf "%s - %s\n")) as'
+
+promptString :: [T.Text] -> String -> IO T.Text
+promptString as s = do
+ if null as
+ then do
+ putStr (s <> "> ")
+ else do
+ putStrLn s
+ mapM_ (\(n, a) -> printf "[%d] %s\n" n a) (zip [1 :: Int ..] as)
+ putStr "> "
+ hFlush stdout
+ a <- T.strip <$> T.getLine
+ case (as, readMaybe (T.unpack a)) of
+ ((_ : _), Just n) ->
+ case drop (n - 1) as of
+ [] -> promptString as s
+ (a' : _) -> pure a'
+ _ -> pure a
ensureGit :: IO ()
ensureGit = do
@@ -137,7 +333,7 @@ consume1 keep filePath = do
then ocr filePath
else pure originalText'
withGit do
- J.encodeFile iFilePath Index {..}
+ J.encodeFile iFilePath Index {tags = S.singleton (Tag "todo" Nothing), ..}
if keep
then copyFile filePath oFilePath
else renameFile filePath oFilePath
@@ -198,7 +394,8 @@ ocr1 tmp input =
<$> sh (printf "tesseract '%s' -" (tmp </> input))
data Index = Index
- { originalText :: T.Text
+ { originalText :: T.Text,
+ tags :: S.Set Tag
}
deriving (Show, Generic, Eq)
@@ -206,6 +403,32 @@ instance J.ToJSON Index
instance J.FromJSON Index
+instance HasField "shortText" Index T.Text where
+ getField =
+ T.unlines
+ . take 10
+ . filter (not . T.null)
+ . map T.strip
+ . T.lines
+ . (.originalText)
+
+data Tag = Tag T.Text (Maybe T.Text)
+ deriving (Show, Generic, Eq, Ord)
+
+tagKey :: Tag -> T.Text
+tagKey (Tag x _) = x
+
+tagValue :: Tag -> Maybe T.Text
+tagValue (Tag _ x) = x
+
+hasTag :: Tag -> Index -> Bool
+hasTag tag =
+ (tagKey tag `S.member`) . S.map tagKey . (.tags)
+
+instance J.ToJSON Tag
+
+instance J.FromJSON Tag
+
data PdfInfo = PdfInfo
{ numPages :: Int,
pageSize :: (Double, Double)