From eca3ea77db5704a65b19b32abe4e37b1e997e426 Mon Sep 17 00:00:00 2001
From: Alexander Foremny <aforemny@posteo.de>
Date: Tue, 26 Dec 2023 04:15:28 +0100
Subject: chore: add settings

---
 app/Main.hs     | 156 ++++++++++++++++++++++++++++++++++++++------------------
 app/Settings.hs |  92 +++++++++++++++++++++++++++++++++
 2 files changed, 197 insertions(+), 51 deletions(-)
 create mode 100644 app/Settings.hs

(limited to 'app')

diff --git a/app/Main.hs b/app/Main.hs
index d10e407..11b7602 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -14,7 +14,7 @@ module Main where
 import Control.Arrow (second)
 import Control.Concurrent.ParallelIO.Local (parallel, withPool)
 import Control.Exception (Exception, throw, throwIO)
-import Control.Monad (unless, when)
+import Control.Monad (forM, unless, when)
 import Data.Aeson qualified as J
 import Data.Attoparsec.Text qualified as A
 import Data.ByteString.Lazy qualified as LB
@@ -38,6 +38,7 @@ import GHC.Generics (Generic)
 import GHC.Records (HasField (..))
 import Options.Applicative qualified as O
 import Prompt qualified as P
+import Settings qualified as S
 import System.Directory
 import System.Environment (getEnv)
 import System.FilePath
@@ -212,6 +213,9 @@ data Filter
 
 main :: IO ()
 main = do
+  settings <- S.readSettings
+  S.writeSettings "apaperless.yaml" settings
+
   cwd <- getCurrentDirectory
   setCurrentDirectory =<< getEnv "APAPERLESS_STORE_DIR"
   ensureGit
@@ -219,11 +223,20 @@ main = do
   ensureDir "index"
 
   O.execParser (O.info (args O.<**> O.helper) O.idm) >>= \case
-    Args {cmd = Consume {keep, inputs, force}} -> do
+    Args {cmd = Consume {keep, inputs, force, prompt}} -> do
       indexNames <- parMapM (consume1 force keep) (map (cwd </>) inputs)
-      documents <- mapM (readDocument . (<.> "json")) indexNames
-      processDocuments documents
-      mapM_ putStrLn indexNames
+      allDocs <- getDocuments
+      docs <- mapM (readDocument . (<.> "json")) indexNames
+      docs' <-
+        if prompt
+          then processDocumentsInteractively settings allDocs docs
+          else processDocuments settings allDocs docs
+      mapM_
+        ( \doc -> do
+            printf "%s\n" (takeBaseName doc.iFileName)
+            print doc.index.tags
+        )
+        docs'
     Args {cmd = Edit {indexNames}} -> do
       editDocuments
         =<< mapM (readDocument . (<.> "json")) indexNames
@@ -244,13 +257,19 @@ main = do
         =<< getDocuments
     Args {cmd = List {filters, redo, todo = True}} -> do
       doRedoIf filters redo
-      processDocuments
-        . applyFilters filters
-        =<< getDocuments
+      allDocs <- getDocuments
+      _ <-
+        processDocumentsInteractively settings allDocs
+          . applyFilters filters
+          $ allDocs
+      pure ()
     Args {cmd = Todo} -> do
-      processDocuments
-        . applyFilters [FilterByTag "todo"]
-        =<< getDocuments
+      allDocs <- getDocuments
+      _ <-
+        processDocumentsInteractively settings allDocs
+          . applyFilters [FilterByTag "todo"]
+          $ allDocs
+      pure ()
     Args {cmd = List {filters, redo, view = True}} -> do
       doRedoIf filters redo
       viewDocuments
@@ -279,6 +298,12 @@ data Document = Document
   }
   deriving (Show)
 
+instance HasField "oFilePath" Document FilePath where
+  getField doc = "originals" </> takeBaseName doc.iFileName <.> "pdf"
+
+instance HasField "iFilePath" Document FilePath where
+  getField doc = "index" </> doc.iFileName
+
 tagValues :: [Document] -> M.Map T.Text (S.Set T.Text)
 tagValues docs =
   M.unionsWith S.union $
@@ -288,12 +313,6 @@ tagValues docs =
       )
       (S.toList (S.unions (map (.index.tags) docs)))
 
-instance HasField "oFilePath" Document FilePath where
-  getField doc = "originals" </> takeBaseName doc.iFileName <.> "pdf"
-
-instance HasField "iFilePath" Document FilePath where
-  getField doc = "index" </> doc.iFileName
-
 getDocuments :: IO [Document]
 getDocuments =
   parMapM readDocument
@@ -316,20 +335,20 @@ applyFilters filters = filter (pred filters) `at` (.index.internalTags)
       | null (g [f x]) = at g f xs
       | otherwise = x : at g f xs
 
-processDocuments :: [Document] -> IO ()
-processDocuments docs =
-  mapM_ (uncurry processDocument) (zip [1 :: Int ..] docs)
+processDocumentsInteractively :: S.Settings -> [Document] -> [Document] -> IO [Document]
+processDocumentsInteractively settings allDocs docs =
+  mapM (uncurry processDocumentInteractively) (zip [1 :: Int ..] docs)
   where
     numDocs = length docs
     tagValues' = tagValues docs
-    processDocument n (doc@Document {iFileName, index}) = do
+    processDocumentInteractively n doc = do
       choice <-
         P.prompt
           ( P.choice
               ( printf
                   "%s\n%s\n\n(%d/%d) Process this document?"
-                  (takeBaseName iFileName)
-                  index.shortText
+                  (takeBaseName doc.iFileName)
+                  doc.index.shortText
                   n
                   numDocs
               )
@@ -346,12 +365,12 @@ processDocuments docs =
         "f" -> do
           printf "%s\n" (takeBaseName doc.iFileName)
           printf "%s\n" doc.index.originalText
-          processDocument n doc
-        "p" -> tagDocument tagValues' doc
-        "s" -> pure ()
+          processDocumentInteractively n doc
+        "p" -> tagDocumentInteractively settings allDocs doc
+        "s" -> pure doc
         "v" -> do
           viewDocuments [doc]
-          processDocument n doc
+          processDocumentInteractively n doc
 
 viewDocuments :: [Document] -> IO ()
 viewDocuments docs =
@@ -389,38 +408,73 @@ editDocuments docs =
       )
       docs
 
-tagDocument :: M.Map T.Text (S.Set T.Text) -> Document -> IO ()
-tagDocument tagValues (Document {iFileName, index}) = do
-  let createdAts =
-        nub . catMaybes . map R.matchedText . R.allMatches $
-          index.originalText
-            R.*=~ [R.re|[0-9]{2}\.[0-9]{2}\.[0-9]{4}|]
-  let suggestedTags =
-        [ ( Tag "createdAt" (Just ""),
-            map (Tag "createdAt" . Just) createdAts
-          )
-          {-,
-                                     Tag "correspondent" (Just ""),
-                                     Tag "invoice" Nothing,
-                                     Tag "bill" Nothing-}
-        ]
+suggestTags :: S.Settings -> [Document] -> Document -> IO [(Tag, [Tag])]
+suggestTags settings allDocs doc = do
+  forM settings.suggestedTags $ \suggestedTag -> do
+    case suggestedTag of
+      S.SuggestTagByRE tagName pattern -> do
+        let tagValues =
+              nub . catMaybes . map R.matchedText . R.allMatches $
+                doc.index.originalText
+                  R.*=~ pattern
+        pure (Tag tagName (Just ""), map (Tag tagName . Just) tagValues)
+
+autoApplySuggestedTags :: [(Tag, [Tag])] -> [Tag]
+autoApplySuggestedTags =
+  mapMaybe
+    ( \(_, suggestedTags) ->
+        if null suggestedTags
+          then Nothing
+          else Just (head suggestedTags)
+    )
+
+processDocuments :: S.Settings -> [Document] -> [Document] -> IO [Document]
+processDocuments settings allDocs docs =
+  mapM processDocument docs
+  where
+    processDocument doc = do
+      tags <-
+        S.fromList . autoApplySuggestedTags
+          <$> suggestTags settings allDocs doc
+      let doc' =
+            doc
+              { index =
+                  doc.index
+                    { tags =
+                        S.delete (Tag "todo" Nothing) (doc.index.tags `S.union` tags)
+                    }
+              }
+      withGit do
+        J.encodeFile doc.iFilePath doc'.index
+        commitAll [doc.iFilePath] (printf "process %s (auto)" doc.iFilePath)
+      pure doc'
+
+tagDocumentInteractively :: S.Settings -> [Document] -> Document -> IO Document
+tagDocumentInteractively settings allDocs doc = do
+  suggestedTags <- suggestTags settings allDocs doc
   tags <-
     S.fromList . catMaybes
-      <$> mapM (uncurry tagDocument') suggestedTags
-  let tags' = S.delete (Tag "todo" Nothing) (index.tags `S.union` tags)
-      index' = index {tags = tags'}
-      iFilePath = "index" </> iFileName
+      <$> mapM (uncurry tagDocumentInteractively') suggestedTags
+  let doc' =
+        doc
+          { index =
+              doc.index
+                { tags =
+                    S.delete (Tag "todo" Nothing) (doc.index.tags `S.union` tags)
+                }
+          }
   withGit do
-    J.encodeFile iFilePath index'
-    commitAll [iFilePath] (printf "process %s (interactive)" iFilePath)
+    J.encodeFile doc.iFilePath doc'.index
+    commitAll [doc.iFilePath] (printf "process %s (interactive)" doc.iFilePath)
+    pure doc'
   where
-    tagDocument' :: Tag -> [Tag] -> IO (Maybe Tag)
-    tagDocument' tag@(Tag tagKey Nothing) tags = do
+    tagDocumentInteractively' :: Tag -> [Tag] -> IO (Maybe Tag)
+    tagDocumentInteractively' tag@(Tag tagKey Nothing) tags = do
       choice <-
         P.prompt $
           P.choice (printf "tag with %s?" tagKey) (("n" :: String) N.:| ["y"])
       pure $ if (choice == "y") then Just tag else Nothing
-    tagDocument' (Tag tagKey (Just _)) tags = do
+    tagDocumentInteractively' (Tag tagKey (Just _)) tags = do
       tagValue <-
         fmap T.pack . P.prompt $
           P.string
diff --git a/app/Settings.hs b/app/Settings.hs
new file mode 100644
index 0000000..5d4d55f
--- /dev/null
+++ b/app/Settings.hs
@@ -0,0 +1,92 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedRecordDot #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module Settings
+  ( Settings (..),
+    readSettings,
+    writeSettings,
+    SuggestedTag (..),
+  )
+where
+
+import Data.Aeson qualified as A
+import Data.Function (on)
+import Data.List (nub)
+import Data.Text qualified as T
+import Data.Yaml (decodeFileThrow, encodeFile)
+import GHC.Generics (Generic)
+import System.Directory (doesFileExist)
+import System.Environment.XDG.BaseDir (getSystemConfigFiles, getUserConfigFile)
+import Text.RE.TDFA.Text qualified as R
+
+data Settings = Settings
+  { suggestedTags :: [SuggestedTag]
+  }
+  deriving (Show, Generic)
+
+instance Semigroup Settings where
+  a <> b =
+    Settings
+      { suggestedTags = nub (a.suggestedTags <> b.suggestedTags)
+      }
+
+instance Monoid Settings where
+  mempty =
+    Settings
+      { suggestedTags =
+          [ -- TODO Add `SuggestedTagByDate`
+            --
+            -- By having a dedicated constructor for dates wie can take the following improvements into account:
+            --
+            -- -   date formats that spell out the month name, ie. `1 Januar 1970` or `1 Jan 1970`
+            -- -   perform minor corrections on OCR, ie. parse `0 1.01.1970`
+            SuggestTagByRE "createdAt" [R.re|[0-9]{2}\.[0-9]{2}\.[0-9]{4}|]
+          ]
+      }
+
+instance A.FromJSON Settings
+
+instance A.ToJSON Settings
+
+data SuggestedTag
+  = SuggestTagByRE T.Text R.RE
+  deriving (Show, Generic, Eq)
+
+instance Show R.RE where
+  show = R.reSource
+
+instance Eq R.RE where
+  (==) = (==) `on` show
+
+instance A.FromJSON SuggestedTag
+
+instance A.ToJSON SuggestedTag
+
+instance A.FromJSON R.RE where
+  parseJSON = A.withText "regular expression (POSIX)" (R.compileRegex . T.unpack)
+
+instance A.ToJSON R.RE where
+  toJSON = A.toJSON . R.reSource
+
+readSettings :: IO Settings
+readSettings =
+  fmap mconcat
+    . mapM
+      ( \fp ->
+          doesFileExist fp >>= \case
+            True -> decodeFileThrow fp
+            False -> pure mempty
+      )
+    =<< concat
+      <$> sequence
+        [ getSystemConfigFiles "apaperless" "settings.yaml",
+          ((: []) <$> getUserConfigFile "apaperless" "settings.yaml"),
+          pure ["apaperless.yaml"]
+        ]
+
+writeSettings :: FilePath -> Settings -> IO ()
+writeSettings =
+  encodeFile
-- 
cgit v1.2.3