From eca3ea77db5704a65b19b32abe4e37b1e997e426 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Tue, 26 Dec 2023 04:15:28 +0100 Subject: chore: add settings --- apaperless.cabal | 5 +- apaperless.yaml | 3 ++ app/Main.hs | 156 +++++++++++++++++++++++++++++++++++++------------------ app/Settings.hs | 92 ++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+), 52 deletions(-) create mode 100644 apaperless.yaml create mode 100644 app/Settings.hs diff --git a/apaperless.cabal b/apaperless.cabal index c9a61cb..480b605 100644 --- a/apaperless.cabal +++ b/apaperless.cabal @@ -21,6 +21,7 @@ executable apaperless main-is: Main.hs other-modules: Prompt + Settings -- other-extensions: build-depends: base, @@ -40,6 +41,8 @@ executable apaperless data-default, tags, time, - regex + regex, + yaml, + xdg-basedir hs-source-dirs: app default-language: GHC2021 diff --git a/apaperless.yaml b/apaperless.yaml new file mode 100644 index 0000000..de0da43 --- /dev/null +++ b/apaperless.yaml @@ -0,0 +1,3 @@ +suggestedTags: +- - createdAt + - '[0-9]{2}\.[0-9]{2}\.[0-9]{4}' diff --git a/app/Main.hs b/app/Main.hs index d10e407..11b7602 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -14,7 +14,7 @@ module Main where import Control.Arrow (second) import Control.Concurrent.ParallelIO.Local (parallel, withPool) import Control.Exception (Exception, throw, throwIO) -import Control.Monad (unless, when) +import Control.Monad (forM, unless, when) import Data.Aeson qualified as J import Data.Attoparsec.Text qualified as A import Data.ByteString.Lazy qualified as LB @@ -38,6 +38,7 @@ import GHC.Generics (Generic) import GHC.Records (HasField (..)) import Options.Applicative qualified as O import Prompt qualified as P +import Settings qualified as S import System.Directory import System.Environment (getEnv) import System.FilePath @@ -212,6 +213,9 @@ data Filter main :: IO () main = do + settings <- S.readSettings + S.writeSettings "apaperless.yaml" settings + cwd <- getCurrentDirectory setCurrentDirectory =<< getEnv "APAPERLESS_STORE_DIR" ensureGit @@ -219,11 +223,20 @@ main = do ensureDir "index" O.execParser (O.info (args O.<**> O.helper) O.idm) >>= \case - Args {cmd = Consume {keep, inputs, force}} -> do + Args {cmd = Consume {keep, inputs, force, prompt}} -> do indexNames <- parMapM (consume1 force keep) (map (cwd ) inputs) - documents <- mapM (readDocument . (<.> "json")) indexNames - processDocuments documents - mapM_ putStrLn indexNames + allDocs <- getDocuments + docs <- mapM (readDocument . (<.> "json")) indexNames + docs' <- + if prompt + then processDocumentsInteractively settings allDocs docs + else processDocuments settings allDocs docs + mapM_ + ( \doc -> do + printf "%s\n" (takeBaseName doc.iFileName) + print doc.index.tags + ) + docs' Args {cmd = Edit {indexNames}} -> do editDocuments =<< mapM (readDocument . (<.> "json")) indexNames @@ -244,13 +257,19 @@ main = do =<< getDocuments Args {cmd = List {filters, redo, todo = True}} -> do doRedoIf filters redo - processDocuments - . applyFilters filters - =<< getDocuments + allDocs <- getDocuments + _ <- + processDocumentsInteractively settings allDocs + . applyFilters filters + $ allDocs + pure () Args {cmd = Todo} -> do - processDocuments - . applyFilters [FilterByTag "todo"] - =<< getDocuments + allDocs <- getDocuments + _ <- + processDocumentsInteractively settings allDocs + . applyFilters [FilterByTag "todo"] + $ allDocs + pure () Args {cmd = List {filters, redo, view = True}} -> do doRedoIf filters redo viewDocuments @@ -279,6 +298,12 @@ data Document = Document } deriving (Show) +instance HasField "oFilePath" Document FilePath where + getField doc = "originals" takeBaseName doc.iFileName <.> "pdf" + +instance HasField "iFilePath" Document FilePath where + getField doc = "index" doc.iFileName + tagValues :: [Document] -> M.Map T.Text (S.Set T.Text) tagValues docs = M.unionsWith S.union $ @@ -288,12 +313,6 @@ tagValues docs = ) (S.toList (S.unions (map (.index.tags) docs))) -instance HasField "oFilePath" Document FilePath where - getField doc = "originals" takeBaseName doc.iFileName <.> "pdf" - -instance HasField "iFilePath" Document FilePath where - getField doc = "index" doc.iFileName - getDocuments :: IO [Document] getDocuments = parMapM readDocument @@ -316,20 +335,20 @@ applyFilters filters = filter (pred filters) `at` (.index.internalTags) | null (g [f x]) = at g f xs | otherwise = x : at g f xs -processDocuments :: [Document] -> IO () -processDocuments docs = - mapM_ (uncurry processDocument) (zip [1 :: Int ..] docs) +processDocumentsInteractively :: S.Settings -> [Document] -> [Document] -> IO [Document] +processDocumentsInteractively settings allDocs docs = + mapM (uncurry processDocumentInteractively) (zip [1 :: Int ..] docs) where numDocs = length docs tagValues' = tagValues docs - processDocument n (doc@Document {iFileName, index}) = do + processDocumentInteractively n doc = do choice <- P.prompt ( P.choice ( printf "%s\n%s\n\n(%d/%d) Process this document?" - (takeBaseName iFileName) - index.shortText + (takeBaseName doc.iFileName) + doc.index.shortText n numDocs ) @@ -346,12 +365,12 @@ processDocuments docs = "f" -> do printf "%s\n" (takeBaseName doc.iFileName) printf "%s\n" doc.index.originalText - processDocument n doc - "p" -> tagDocument tagValues' doc - "s" -> pure () + processDocumentInteractively n doc + "p" -> tagDocumentInteractively settings allDocs doc + "s" -> pure doc "v" -> do viewDocuments [doc] - processDocument n doc + processDocumentInteractively n doc viewDocuments :: [Document] -> IO () viewDocuments docs = @@ -389,38 +408,73 @@ editDocuments docs = ) docs -tagDocument :: M.Map T.Text (S.Set T.Text) -> Document -> IO () -tagDocument tagValues (Document {iFileName, index}) = do - let createdAts = - nub . catMaybes . map R.matchedText . R.allMatches $ - index.originalText - R.*=~ [R.re|[0-9]{2}\.[0-9]{2}\.[0-9]{4}|] - let suggestedTags = - [ ( Tag "createdAt" (Just ""), - map (Tag "createdAt" . Just) createdAts - ) - {-, - Tag "correspondent" (Just ""), - Tag "invoice" Nothing, - Tag "bill" Nothing-} - ] +suggestTags :: S.Settings -> [Document] -> Document -> IO [(Tag, [Tag])] +suggestTags settings allDocs doc = do + forM settings.suggestedTags $ \suggestedTag -> do + case suggestedTag of + S.SuggestTagByRE tagName pattern -> do + let tagValues = + nub . catMaybes . map R.matchedText . R.allMatches $ + doc.index.originalText + R.*=~ pattern + pure (Tag tagName (Just ""), map (Tag tagName . Just) tagValues) + +autoApplySuggestedTags :: [(Tag, [Tag])] -> [Tag] +autoApplySuggestedTags = + mapMaybe + ( \(_, suggestedTags) -> + if null suggestedTags + then Nothing + else Just (head suggestedTags) + ) + +processDocuments :: S.Settings -> [Document] -> [Document] -> IO [Document] +processDocuments settings allDocs docs = + mapM processDocument docs + where + processDocument doc = do + tags <- + S.fromList . autoApplySuggestedTags + <$> suggestTags settings allDocs doc + let doc' = + doc + { index = + doc.index + { tags = + S.delete (Tag "todo" Nothing) (doc.index.tags `S.union` tags) + } + } + withGit do + J.encodeFile doc.iFilePath doc'.index + commitAll [doc.iFilePath] (printf "process %s (auto)" doc.iFilePath) + pure doc' + +tagDocumentInteractively :: S.Settings -> [Document] -> Document -> IO Document +tagDocumentInteractively settings allDocs doc = do + suggestedTags <- suggestTags settings allDocs doc tags <- S.fromList . catMaybes - <$> mapM (uncurry tagDocument') suggestedTags - let tags' = S.delete (Tag "todo" Nothing) (index.tags `S.union` tags) - index' = index {tags = tags'} - iFilePath = "index" iFileName + <$> mapM (uncurry tagDocumentInteractively') suggestedTags + let doc' = + doc + { index = + doc.index + { tags = + S.delete (Tag "todo" Nothing) (doc.index.tags `S.union` tags) + } + } withGit do - J.encodeFile iFilePath index' - commitAll [iFilePath] (printf "process %s (interactive)" iFilePath) + J.encodeFile doc.iFilePath doc'.index + commitAll [doc.iFilePath] (printf "process %s (interactive)" doc.iFilePath) + pure doc' where - tagDocument' :: Tag -> [Tag] -> IO (Maybe Tag) - tagDocument' tag@(Tag tagKey Nothing) tags = do + tagDocumentInteractively' :: Tag -> [Tag] -> IO (Maybe Tag) + tagDocumentInteractively' tag@(Tag tagKey Nothing) tags = do choice <- P.prompt $ P.choice (printf "tag with %s?" tagKey) (("n" :: String) N.:| ["y"]) pure $ if (choice == "y") then Just tag else Nothing - tagDocument' (Tag tagKey (Just _)) tags = do + tagDocumentInteractively' (Tag tagKey (Just _)) tags = do tagValue <- fmap T.pack . P.prompt $ P.string diff --git a/app/Settings.hs b/app/Settings.hs new file mode 100644 index 0000000..5d4d55f --- /dev/null +++ b/app/Settings.hs @@ -0,0 +1,92 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedRecordDot #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Settings + ( Settings (..), + readSettings, + writeSettings, + SuggestedTag (..), + ) +where + +import Data.Aeson qualified as A +import Data.Function (on) +import Data.List (nub) +import Data.Text qualified as T +import Data.Yaml (decodeFileThrow, encodeFile) +import GHC.Generics (Generic) +import System.Directory (doesFileExist) +import System.Environment.XDG.BaseDir (getSystemConfigFiles, getUserConfigFile) +import Text.RE.TDFA.Text qualified as R + +data Settings = Settings + { suggestedTags :: [SuggestedTag] + } + deriving (Show, Generic) + +instance Semigroup Settings where + a <> b = + Settings + { suggestedTags = nub (a.suggestedTags <> b.suggestedTags) + } + +instance Monoid Settings where + mempty = + Settings + { suggestedTags = + [ -- TODO Add `SuggestedTagByDate` + -- + -- By having a dedicated constructor for dates wie can take the following improvements into account: + -- + -- - date formats that spell out the month name, ie. `1 Januar 1970` or `1 Jan 1970` + -- - perform minor corrections on OCR, ie. parse `0 1.01.1970` + SuggestTagByRE "createdAt" [R.re|[0-9]{2}\.[0-9]{2}\.[0-9]{4}|] + ] + } + +instance A.FromJSON Settings + +instance A.ToJSON Settings + +data SuggestedTag + = SuggestTagByRE T.Text R.RE + deriving (Show, Generic, Eq) + +instance Show R.RE where + show = R.reSource + +instance Eq R.RE where + (==) = (==) `on` show + +instance A.FromJSON SuggestedTag + +instance A.ToJSON SuggestedTag + +instance A.FromJSON R.RE where + parseJSON = A.withText "regular expression (POSIX)" (R.compileRegex . T.unpack) + +instance A.ToJSON R.RE where + toJSON = A.toJSON . R.reSource + +readSettings :: IO Settings +readSettings = + fmap mconcat + . mapM + ( \fp -> + doesFileExist fp >>= \case + True -> decodeFileThrow fp + False -> pure mempty + ) + =<< concat + <$> sequence + [ getSystemConfigFiles "apaperless" "settings.yaml", + ((: []) <$> getUserConfigFile "apaperless" "settings.yaml"), + pure ["apaperless.yaml"] + ] + +writeSettings :: FilePath -> Settings -> IO () +writeSettings = + encodeFile -- cgit v1.2.3