summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--apaperless.cabal5
-rw-r--r--apaperless.yaml3
-rw-r--r--app/Main.hs156
-rw-r--r--app/Settings.hs92
4 files changed, 204 insertions, 52 deletions
diff --git a/apaperless.cabal b/apaperless.cabal
index c9a61cb..480b605 100644
--- a/apaperless.cabal
+++ b/apaperless.cabal
@@ -21,6 +21,7 @@ executable apaperless
main-is: Main.hs
other-modules:
Prompt
+ Settings
-- other-extensions:
build-depends:
base,
@@ -40,6 +41,8 @@ executable apaperless
data-default,
tags,
time,
- regex
+ regex,
+ yaml,
+ xdg-basedir
hs-source-dirs: app
default-language: GHC2021
diff --git a/apaperless.yaml b/apaperless.yaml
new file mode 100644
index 0000000..de0da43
--- /dev/null
+++ b/apaperless.yaml
@@ -0,0 +1,3 @@
+suggestedTags:
+- - createdAt
+ - '[0-9]{2}\.[0-9]{2}\.[0-9]{4}'
diff --git a/app/Main.hs b/app/Main.hs
index d10e407..11b7602 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -14,7 +14,7 @@ module Main where
import Control.Arrow (second)
import Control.Concurrent.ParallelIO.Local (parallel, withPool)
import Control.Exception (Exception, throw, throwIO)
-import Control.Monad (unless, when)
+import Control.Monad (forM, unless, when)
import Data.Aeson qualified as J
import Data.Attoparsec.Text qualified as A
import Data.ByteString.Lazy qualified as LB
@@ -38,6 +38,7 @@ import GHC.Generics (Generic)
import GHC.Records (HasField (..))
import Options.Applicative qualified as O
import Prompt qualified as P
+import Settings qualified as S
import System.Directory
import System.Environment (getEnv)
import System.FilePath
@@ -212,6 +213,9 @@ data Filter
main :: IO ()
main = do
+ settings <- S.readSettings
+ S.writeSettings "apaperless.yaml" settings
+
cwd <- getCurrentDirectory
setCurrentDirectory =<< getEnv "APAPERLESS_STORE_DIR"
ensureGit
@@ -219,11 +223,20 @@ main = do
ensureDir "index"
O.execParser (O.info (args O.<**> O.helper) O.idm) >>= \case
- Args {cmd = Consume {keep, inputs, force}} -> do
+ Args {cmd = Consume {keep, inputs, force, prompt}} -> do
indexNames <- parMapM (consume1 force keep) (map (cwd </>) inputs)
- documents <- mapM (readDocument . (<.> "json")) indexNames
- processDocuments documents
- mapM_ putStrLn indexNames
+ allDocs <- getDocuments
+ docs <- mapM (readDocument . (<.> "json")) indexNames
+ docs' <-
+ if prompt
+ then processDocumentsInteractively settings allDocs docs
+ else processDocuments settings allDocs docs
+ mapM_
+ ( \doc -> do
+ printf "%s\n" (takeBaseName doc.iFileName)
+ print doc.index.tags
+ )
+ docs'
Args {cmd = Edit {indexNames}} -> do
editDocuments
=<< mapM (readDocument . (<.> "json")) indexNames
@@ -244,13 +257,19 @@ main = do
=<< getDocuments
Args {cmd = List {filters, redo, todo = True}} -> do
doRedoIf filters redo
- processDocuments
- . applyFilters filters
- =<< getDocuments
+ allDocs <- getDocuments
+ _ <-
+ processDocumentsInteractively settings allDocs
+ . applyFilters filters
+ $ allDocs
+ pure ()
Args {cmd = Todo} -> do
- processDocuments
- . applyFilters [FilterByTag "todo"]
- =<< getDocuments
+ allDocs <- getDocuments
+ _ <-
+ processDocumentsInteractively settings allDocs
+ . applyFilters [FilterByTag "todo"]
+ $ allDocs
+ pure ()
Args {cmd = List {filters, redo, view = True}} -> do
doRedoIf filters redo
viewDocuments
@@ -279,6 +298,12 @@ data Document = Document
}
deriving (Show)
+instance HasField "oFilePath" Document FilePath where
+ getField doc = "originals" </> takeBaseName doc.iFileName <.> "pdf"
+
+instance HasField "iFilePath" Document FilePath where
+ getField doc = "index" </> doc.iFileName
+
tagValues :: [Document] -> M.Map T.Text (S.Set T.Text)
tagValues docs =
M.unionsWith S.union $
@@ -288,12 +313,6 @@ tagValues docs =
)
(S.toList (S.unions (map (.index.tags) docs)))
-instance HasField "oFilePath" Document FilePath where
- getField doc = "originals" </> takeBaseName doc.iFileName <.> "pdf"
-
-instance HasField "iFilePath" Document FilePath where
- getField doc = "index" </> doc.iFileName
-
getDocuments :: IO [Document]
getDocuments =
parMapM readDocument
@@ -316,20 +335,20 @@ applyFilters filters = filter (pred filters) `at` (.index.internalTags)
| null (g [f x]) = at g f xs
| otherwise = x : at g f xs
-processDocuments :: [Document] -> IO ()
-processDocuments docs =
- mapM_ (uncurry processDocument) (zip [1 :: Int ..] docs)
+processDocumentsInteractively :: S.Settings -> [Document] -> [Document] -> IO [Document]
+processDocumentsInteractively settings allDocs docs =
+ mapM (uncurry processDocumentInteractively) (zip [1 :: Int ..] docs)
where
numDocs = length docs
tagValues' = tagValues docs
- processDocument n (doc@Document {iFileName, index}) = do
+ processDocumentInteractively n doc = do
choice <-
P.prompt
( P.choice
( printf
"%s\n%s\n\n(%d/%d) Process this document?"
- (takeBaseName iFileName)
- index.shortText
+ (takeBaseName doc.iFileName)
+ doc.index.shortText
n
numDocs
)
@@ -346,12 +365,12 @@ processDocuments docs =
"f" -> do
printf "%s\n" (takeBaseName doc.iFileName)
printf "%s\n" doc.index.originalText
- processDocument n doc
- "p" -> tagDocument tagValues' doc
- "s" -> pure ()
+ processDocumentInteractively n doc
+ "p" -> tagDocumentInteractively settings allDocs doc
+ "s" -> pure doc
"v" -> do
viewDocuments [doc]
- processDocument n doc
+ processDocumentInteractively n doc
viewDocuments :: [Document] -> IO ()
viewDocuments docs =
@@ -389,38 +408,73 @@ editDocuments docs =
)
docs
-tagDocument :: M.Map T.Text (S.Set T.Text) -> Document -> IO ()
-tagDocument tagValues (Document {iFileName, index}) = do
- let createdAts =
- nub . catMaybes . map R.matchedText . R.allMatches $
- index.originalText
- R.*=~ [R.re|[0-9]{2}\.[0-9]{2}\.[0-9]{4}|]
- let suggestedTags =
- [ ( Tag "createdAt" (Just ""),
- map (Tag "createdAt" . Just) createdAts
- )
- {-,
- Tag "correspondent" (Just ""),
- Tag "invoice" Nothing,
- Tag "bill" Nothing-}
- ]
+suggestTags :: S.Settings -> [Document] -> Document -> IO [(Tag, [Tag])]
+suggestTags settings allDocs doc = do
+ forM settings.suggestedTags $ \suggestedTag -> do
+ case suggestedTag of
+ S.SuggestTagByRE tagName pattern -> do
+ let tagValues =
+ nub . catMaybes . map R.matchedText . R.allMatches $
+ doc.index.originalText
+ R.*=~ pattern
+ pure (Tag tagName (Just ""), map (Tag tagName . Just) tagValues)
+
+autoApplySuggestedTags :: [(Tag, [Tag])] -> [Tag]
+autoApplySuggestedTags =
+ mapMaybe
+ ( \(_, suggestedTags) ->
+ if null suggestedTags
+ then Nothing
+ else Just (head suggestedTags)
+ )
+
+processDocuments :: S.Settings -> [Document] -> [Document] -> IO [Document]
+processDocuments settings allDocs docs =
+ mapM processDocument docs
+ where
+ processDocument doc = do
+ tags <-
+ S.fromList . autoApplySuggestedTags
+ <$> suggestTags settings allDocs doc
+ let doc' =
+ doc
+ { index =
+ doc.index
+ { tags =
+ S.delete (Tag "todo" Nothing) (doc.index.tags `S.union` tags)
+ }
+ }
+ withGit do
+ J.encodeFile doc.iFilePath doc'.index
+ commitAll [doc.iFilePath] (printf "process %s (auto)" doc.iFilePath)
+ pure doc'
+
+tagDocumentInteractively :: S.Settings -> [Document] -> Document -> IO Document
+tagDocumentInteractively settings allDocs doc = do
+ suggestedTags <- suggestTags settings allDocs doc
tags <-
S.fromList . catMaybes
- <$> mapM (uncurry tagDocument') suggestedTags
- let tags' = S.delete (Tag "todo" Nothing) (index.tags `S.union` tags)
- index' = index {tags = tags'}
- iFilePath = "index" </> iFileName
+ <$> mapM (uncurry tagDocumentInteractively') suggestedTags
+ let doc' =
+ doc
+ { index =
+ doc.index
+ { tags =
+ S.delete (Tag "todo" Nothing) (doc.index.tags `S.union` tags)
+ }
+ }
withGit do
- J.encodeFile iFilePath index'
- commitAll [iFilePath] (printf "process %s (interactive)" iFilePath)
+ J.encodeFile doc.iFilePath doc'.index
+ commitAll [doc.iFilePath] (printf "process %s (interactive)" doc.iFilePath)
+ pure doc'
where
- tagDocument' :: Tag -> [Tag] -> IO (Maybe Tag)
- tagDocument' tag@(Tag tagKey Nothing) tags = do
+ tagDocumentInteractively' :: Tag -> [Tag] -> IO (Maybe Tag)
+ tagDocumentInteractively' tag@(Tag tagKey Nothing) tags = do
choice <-
P.prompt $
P.choice (printf "tag with %s?" tagKey) (("n" :: String) N.:| ["y"])
pure $ if (choice == "y") then Just tag else Nothing
- tagDocument' (Tag tagKey (Just _)) tags = do
+ tagDocumentInteractively' (Tag tagKey (Just _)) tags = do
tagValue <-
fmap T.pack . P.prompt $
P.string
diff --git a/app/Settings.hs b/app/Settings.hs
new file mode 100644
index 0000000..5d4d55f
--- /dev/null
+++ b/app/Settings.hs
@@ -0,0 +1,92 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedRecordDot #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module Settings
+ ( Settings (..),
+ readSettings,
+ writeSettings,
+ SuggestedTag (..),
+ )
+where
+
+import Data.Aeson qualified as A
+import Data.Function (on)
+import Data.List (nub)
+import Data.Text qualified as T
+import Data.Yaml (decodeFileThrow, encodeFile)
+import GHC.Generics (Generic)
+import System.Directory (doesFileExist)
+import System.Environment.XDG.BaseDir (getSystemConfigFiles, getUserConfigFile)
+import Text.RE.TDFA.Text qualified as R
+
+data Settings = Settings
+ { suggestedTags :: [SuggestedTag]
+ }
+ deriving (Show, Generic)
+
+instance Semigroup Settings where
+ a <> b =
+ Settings
+ { suggestedTags = nub (a.suggestedTags <> b.suggestedTags)
+ }
+
+instance Monoid Settings where
+ mempty =
+ Settings
+ { suggestedTags =
+ [ -- TODO Add `SuggestedTagByDate`
+ --
+ -- By having a dedicated constructor for dates wie can take the following improvements into account:
+ --
+ -- - date formats that spell out the month name, ie. `1 Januar 1970` or `1 Jan 1970`
+ -- - perform minor corrections on OCR, ie. parse `0 1.01.1970`
+ SuggestTagByRE "createdAt" [R.re|[0-9]{2}\.[0-9]{2}\.[0-9]{4}|]
+ ]
+ }
+
+instance A.FromJSON Settings
+
+instance A.ToJSON Settings
+
+data SuggestedTag
+ = SuggestTagByRE T.Text R.RE
+ deriving (Show, Generic, Eq)
+
+instance Show R.RE where
+ show = R.reSource
+
+instance Eq R.RE where
+ (==) = (==) `on` show
+
+instance A.FromJSON SuggestedTag
+
+instance A.ToJSON SuggestedTag
+
+instance A.FromJSON R.RE where
+ parseJSON = A.withText "regular expression (POSIX)" (R.compileRegex . T.unpack)
+
+instance A.ToJSON R.RE where
+ toJSON = A.toJSON . R.reSource
+
+readSettings :: IO Settings
+readSettings =
+ fmap mconcat
+ . mapM
+ ( \fp ->
+ doesFileExist fp >>= \case
+ True -> decodeFileThrow fp
+ False -> pure mempty
+ )
+ =<< concat
+ <$> sequence
+ [ getSystemConfigFiles "apaperless" "settings.yaml",
+ ((: []) <$> getUserConfigFile "apaperless" "settings.yaml"),
+ pure ["apaperless.yaml"]
+ ]
+
+writeSettings :: FilePath -> Settings -> IO ()
+writeSettings =
+ encodeFile