From ea1236f2cf6d3ef4b739b2ca28f47a3bbed42295 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Tue, 17 Oct 2023 14:14:48 +0200 Subject: refactor history --- app/History/Cache.hs | 26 +++++++++ app/History/CommitHash.hs | 23 ++++++++ app/History/CommitInfo.hs | 122 +++++++++++++++++++++++++++++++++++++++ app/History/IssueEvent.hs | 19 ++++++ app/History/PartialCommitInfo.hs | 97 +++++++++++++++++++++++++++++++ 5 files changed, 287 insertions(+) create mode 100644 app/History/Cache.hs create mode 100644 app/History/CommitHash.hs create mode 100644 app/History/CommitInfo.hs create mode 100644 app/History/IssueEvent.hs create mode 100644 app/History/PartialCommitInfo.hs (limited to 'app/History') diff --git a/app/History/Cache.hs b/app/History/Cache.hs new file mode 100644 index 0000000..af40a84 --- /dev/null +++ b/app/History/Cache.hs @@ -0,0 +1,26 @@ +module History.Cache (cached) where + +import Data.Binary (Binary, decodeFileOrFail, encodeFile) +import Data.Text qualified as T +import System.Directory (createDirectoryIfMissing, doesFileExist, getCurrentDirectory) + +cached :: Binary a => T.Text -> (T.Text -> IO a) -> IO a +cached hash func = do + -- FIXME Cache inside Git root + -- + -- The cache location should not be dependant on the current directory, but + -- should be placed alongside the `.git` directory. + cwd <- getCurrentDirectory + createDirectoryIfMissing True (cwd ++ "/.anissue") + let file = (cwd ++ "/.anissue/" ++ T.unpack hash) + doesFileExist file >>= \case + True -> + decodeFileOrFail file >>= \case + Left _ -> generate file + Right blob -> pure blob + False -> generate file + where + generate file = do + blob <- func hash + encodeFile file blob + pure blob diff --git a/app/History/CommitHash.hs b/app/History/CommitHash.hs new file mode 100644 index 0000000..3fcbb90 --- /dev/null +++ b/app/History/CommitHash.hs @@ -0,0 +1,23 @@ +module History.CommitHash + ( CommitHash (..), + toShortText, + toText, + ) +where + +import Data.Binary (Binary) +import Data.Text qualified as T +import GHC.Generics (Generic) + +data CommitHash + = WorkingTree + | Commit T.Text + deriving (Show, Binary, Generic) + +toShortText :: CommitHash -> T.Text +toShortText WorkingTree = "" +toShortText (Commit hash) = T.take 7 hash + +toText :: CommitHash -> T.Text +toText WorkingTree = "" +toText (Commit hash) = hash diff --git a/app/History/CommitInfo.hs b/app/History/CommitInfo.hs new file mode 100644 index 0000000..8461b8e --- /dev/null +++ b/app/History/CommitInfo.hs @@ -0,0 +1,122 @@ +module History.CommitInfo + ( CommitInfo (..), + fromPartialCommitInfos, + issueEvents, + diffCommitInfos, + ) +where + +import Data.Binary (Binary) +import Data.Function (on) +import Data.List (deleteFirstsBy, find) +import Data.Maybe (catMaybes, isJust) +import GHC.Generics (Generic) +import History.CommitHash (CommitHash) +import History.IssueEvent (IssueEvent (..)) +import History.PartialCommitInfo (PartialCommitInfo (..)) +import Issue (Issue (..), id) +import Issue.Tag qualified as I +import TreeGrepper.Match (Position (..)) +import Prelude hiding (id) + +data CommitInfo = CommitInfo + { hash :: CommitHash, + filesChanged :: [FilePath], + issues :: [Issue] + } + deriving (Show, Binary, Generic) + +fromPartialCommitInfos :: [PartialCommitInfo] -> [CommitInfo] +fromPartialCommitInfos [] = [] +fromPartialCommitInfos (partialCommitInfo : partialCommitInfos) = + scanl propagate (assume partialCommitInfo) partialCommitInfos + where + assume :: PartialCommitInfo -> CommitInfo + assume (PartialCommitInfo {..}) = CommitInfo {..} + + propagate :: CommitInfo -> PartialCommitInfo -> CommitInfo + propagate oldInfo newInfo@(PartialCommitInfo {..}) = + CommitInfo + { issues = + catMaybes $ + mergeListsBy + eq + ( \old new -> + Just + new + { provenance = old.provenance, + internalTags = I.internalTags new.title old.provenance + } + ) + ( \old -> + if elemBy eq old newInfo.issues + || not (old.file `elem` newInfo.filesChanged) + then Just old + else Nothing + ) + (\new -> Just new) + oldInfo.issues + newInfo.issues, + .. + } + + eq = (==) `on` id + +issueEvents :: [CommitInfo] -> [(CommitHash, [IssueEvent])] +issueEvents xs = zip (map (.hash) xs') (zipWith diffCommitInfos xs xs') + where + xs' = tail xs + +diffCommitInfos :: CommitInfo -> CommitInfo -> [IssueEvent] +diffCommitInfos oldInfo newInfo = + concat + [ [IssueCreated newHash issue | issue <- deleteFirstsBy eq newIssues oldIssues], + [ IssueChanged newHash (last issues) + | issues <- intersectBy' eq newIssues oldIssues, + not (null [(x, y) | x <- issues, y <- issues, clear x /= clear y]) + ], + [IssueDeleted newHash issue | issue <- deleteFirstsBy eq oldIssues newIssues] + ] + where + newHash = newInfo.hash + newIssues = newInfo.issues + oldIssues = oldInfo.issues + + -- TODO Fix issue comparison + -- + -- Because issues carry `provenance` and `internalTags`, issues compare + -- unequally when we want them to be equal. + clear i = + i + { provenance = Nothing, + internalTags = [], + start = Position 0 0, + end = Position 0 0, + file = "" + } + + eq = (==) `on` id + +mergeListsBy :: (a -> a -> Bool) -> (a -> a -> b) -> (a -> b) -> (a -> b) -> [a] -> [a] -> [b] +mergeListsBy eq onBoth onLeft onRight lefts rights = + concat + [ [ maybe (onLeft left) (onBoth left) right + | left <- lefts, + right <- + let rights' = filter (eq left) rights + in if null rights' then [Nothing] else (map Just rights') + ], + [ onRight right + | right <- rights, + not (elemBy eq right lefts) + ] + ] + +-- | A variant of `Data.List.intersectBy` that retuns the witnesses of the +-- intersection. +intersectBy' :: (a -> a -> Bool) -> [a] -> [a] -> [[a]] +intersectBy' eq xs ys = filter (not . null) (map (\x -> x : filter (eq x) ys) xs) + +-- | A variant of `elem` that uses a custom comparison function. +elemBy :: (a -> a -> Bool) -> a -> [a] -> Bool +elemBy eq x xs = isJust $ find (eq x) xs diff --git a/app/History/IssueEvent.hs b/app/History/IssueEvent.hs new file mode 100644 index 0000000..88886dd --- /dev/null +++ b/app/History/IssueEvent.hs @@ -0,0 +1,19 @@ +module History.IssueEvent (IssueEvent (..)) where + +import History.CommitHash (CommitHash) +import Issue (Issue) + +data IssueEvent + = IssueCreated + { hash :: CommitHash, + issue :: Issue + } + | IssueChanged + { hash :: CommitHash, + issue :: Issue + } + | IssueDeleted + { hash :: CommitHash, + issue :: Issue + } + deriving (Show) diff --git a/app/History/PartialCommitInfo.hs b/app/History/PartialCommitInfo.hs new file mode 100644 index 0000000..fb53fbf --- /dev/null +++ b/app/History/PartialCommitInfo.hs @@ -0,0 +1,97 @@ +module History.PartialCommitInfo + ( PartialCommitInfo (..), + getPartialCommitInfos, + ) +where + +import Control.Exception (catch) +import Data.Binary (Binary) +import Data.ByteString.Lazy.Char8 qualified as LB8 +import Data.Function ((&)) +import Data.Text qualified as T +import Die (die) +import Exception qualified as E +import GHC.Generics (Generic) +import Git qualified +import History.Cache (cached) +import History.CommitHash (CommitHash (..)) +import Issue (Issue, getIssuesPar) +import Process (proc, sh) +import System.Directory (getCurrentDirectory) +import System.FilePath (()) +import System.IO.Temp (withSystemTempDirectory) +import System.Process.Typed (setWorkingDir) + +-- | `PartialCommitInfo` records the complete issues ONLY in files that have +-- been changed in the commit. +data PartialCommitInfo = PartialCommitInfo + { hash :: CommitHash, + filesChanged :: [FilePath], + issues :: [Issue] + } + deriving (Show, Binary, Generic) + +getPartialCommitInfos :: IO [PartialCommitInfo] +getPartialCommitInfos = do + -- TODO Revise `getCommitHashes` + -- + -- - Should throw if no commits. + -- - Should always be reversed? + commitHashes <- reverse <$> Git.getCommitHashes + mapM getCommitInfoOf (map Commit commitHashes ++ [WorkingTree]) + +getCommitInfoOf :: CommitHash -> IO PartialCommitInfo +getCommitInfoOf WorkingTree = do + (issuesWorkingTreeChanged, filesChanged) <- getIssuesAndFilesWorkingTreeChanged [] + pure $ + PartialCommitInfo + { hash = WorkingTree, + filesChanged = filesChanged, + issues = issuesWorkingTreeChanged + } +getCommitInfoOf (Commit hash) = cached (hash <> (T.pack ".changed")) $ \_ -> do + (issuesCommitChanged, filesChanged) <- getIssuesAndFilesCommitChanged hash + pure $ + PartialCommitInfo + { hash = Commit hash, + filesChanged = filesChanged, + issues = issuesCommitChanged + } + +-- | Given the hash of a commit, get all issues in the files which have +-- been changed by this commit, as well as all changed files. +getIssuesAndFilesCommitChanged :: T.Text -> IO ([Issue], [FilePath]) +getIssuesAndFilesCommitChanged hash = do + withSystemTempDirectory "history" $ \tmp -> do + let cwd = tmp T.unpack hash + Git.withWorkingTree cwd hash do + files <- gitShowChanged cwd + issues <- concat <$> catch (getIssuesPar cwd files) dieOfInvalidTreeGrepperResult + pure (issues, files) + +dieOfInvalidTreeGrepperResult :: E.InvalidTreeGrepperResult -> IO a +dieOfInvalidTreeGrepperResult (E.InvalidTreeGrepperResult e) = + die e + +-- | Gets issues in all files which have been changed in your current +-- [working +-- - tree](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefworkingtreeaworkingtree) +getIssuesAndFilesWorkingTreeChanged :: [FilePath] -> IO ([Issue], [FilePath]) +getIssuesAndFilesWorkingTreeChanged paths = do + cwd <- getCurrentDirectory + files <- gitLsFilesModifiedIn cwd paths + issues <- concat <$> catch (getIssuesPar cwd files) dieOfInvalidTreeGrepperResult + pure (issues, files) + +gitShowChanged :: FilePath -> IO [FilePath] +gitShowChanged cwd = + Prelude.lines . LB8.unpack + <$> sh ("git show -p --name-only --format=''" & setWorkingDir cwd) + +gitLsFilesModifiedIn :: FilePath -> [FilePath] -> IO [FilePath] +gitLsFilesModifiedIn cwd paths = + Prelude.lines . LB8.unpack + <$> sh + ( proc "git ls-files --modified %" ("--" : paths) + & setWorkingDir cwd + ) -- cgit v1.2.3