From ea1236f2cf6d3ef4b739b2ca28f47a3bbed42295 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Tue, 17 Oct 2023 14:14:48 +0200 Subject: refactor history --- app/History.hs | 392 ++++----------------------------------------------------- 1 file changed, 25 insertions(+), 367 deletions(-) (limited to 'app/History.hs') diff --git a/app/History.hs b/app/History.hs index 002ca32..efb7015 100644 --- a/app/History.hs +++ b/app/History.hs @@ -1,373 +1,31 @@ module History - ( getIssues, - listIssues, - listEvents, - IssueEvent (..), + ( getHistory, ) where -import Control.Exception (Exception, catch, handle, throw) -import Data.Aeson (eitherDecode) -import Data.Binary (Binary, decodeFileOrFail, encodeFile) -import Data.ByteString.Lazy.Char8 qualified as L8 -import Data.Function ((&)) -import Data.List (foldl') -import Data.Maybe (catMaybes, mapMaybe) -import Data.Text (Text, append, isPrefixOf, lines, pack, unpack) -import Data.Text.Encoding (decodeUtf8) -import GHC.Generics (Generic) -import Git qualified -import Issue (Issue (..), fromMatch, id) -import Issue.Filter (Filter, applyFilters) -import Issue.Sort (Sort, applySorts) -import Issue.Tag qualified as I -import Parallel (parMapM) -import Process (proc, sh) -import System.Directory (createDirectoryIfMissing, doesFileExist, getCurrentDirectory) -import System.Exit (ExitCode (ExitFailure), exitWith) -import System.FilePath (takeExtension, ()) -import System.IO.Temp (withSystemTempDirectory) -import System.Process.Typed (setWorkingDir) -import Text.Printf (printf) -import TreeGrepper.Match qualified as G -import TreeGrepper.Result qualified as G +import History.CommitHash (CommitHash) +import History.CommitInfo (CommitInfo (..), fromPartialCommitInfos, issueEvents) +import History.IssueEvent (IssueEvent (..)) +import History.PartialCommitInfo (getPartialCommitInfos) +import Issue (Issue) import Prelude hiding (id, lines) -import Prelude qualified as Prelude -listEvents :: IO [[IssueEvent]] -listEvents = do - commitHashes <- fmap reverse getCommitHashes - case commitHashes of - [] -> pure [] - hashFirst : hashesRest -> do - issuesInitial <- getIssuesCommitAll hashFirst - commitInfos <- mapM getCommitInfo hashesRest - commitInfoWorkingTree <- getCommitInfoWorkingTree [] - let eventses = getEvents hashFirst issuesInitial (commitInfos ++ [commitInfoWorkingTree]) - pure eventses - -listIssues :: [Sort] -> [Filter] -> [FilePath] -> IO [Issue] -listIssues sort filters paths = do - commitHashes <- fmap reverse getCommitHashes - case commitHashes of - [] -> - pure [] - hashFirst : hashesRest -> do - -- TODO Reduce cached data size - -- - -- Right now we are caching complete `Issue` instances, which - -- contain the full issue title and description. For a fast - -- lookup it may already be enough to only store the issue's - -- - -- \* filename - -- \* start position - -- \* end position - -- - -- With this information we can use git to quickly look up the - -- complete issue text and parse it. - -- - -- @topic caching - issuesInitial <- getIssuesCommitAll hashFirst - commitInfos <- mapM getCommitInfo hashesRest - commitInfoWorkingTree <- getCommitInfoWorkingTree paths - let eventses = getEvents hashFirst issuesInitial (commitInfos ++ [commitInfoWorkingTree]) - let issues = mapMaybe issueFromIssueEvents eventses - issuesFiltered = applyFilters filters issues - issuesSorted = applySorts sort issuesFiltered - issuesWithinPaths = - case paths of - [] -> - issuesSorted - _ -> - filter withinPaths issuesSorted - pure issuesWithinPaths - where - withinPaths issue = - any (\path -> isPrefixOf (pack path) (pack issue.file)) paths - -getCommitHashes :: IO [Text] -getCommitHashes = - fmap (lines . decodeUtf8 . L8.toStrict) $ sh "git log --format=%H" - -data IssueEvent - = IssueCreated - { hash :: Maybe Text, - issue :: Issue - } - | IssueChanged - { hash :: Maybe Text, - issue :: Issue - } - | IssueDeleted - { hash :: Maybe Text - } - deriving (Show) - -issueFromIssueEvent :: IssueEvent -> Maybe Issue -issueFromIssueEvent issueEvent = - case issueEvent of - IssueCreated {issue} -> - Just issue - IssueChanged {issue} -> - Just issue - IssueDeleted _ -> - Nothing - -data CommitInfo = CommitInfo - -- TODO Extract CommitInfo so we can change hash' -> hash - -- - -- @topic refactoring - { hash' :: Maybe Text, - filesChanged :: [FilePath], - issues :: [Issue] - } - deriving (Show, Binary, Generic) - -getCommitInfo :: Text -> IO CommitInfo -getCommitInfo hash = cached (append hash (pack ".changed")) $ \_ -> do - (issuesCommitChanged, filesChanged) <- getIssuesAndFilesCommitChanged hash - pure $ - CommitInfo - { hash' = Just hash, - filesChanged = filesChanged, - issues = issuesCommitChanged - } - -getCommitInfoWorkingTree :: [FilePath] -> IO CommitInfo -getCommitInfoWorkingTree paths = do - (issuesWorkingTreeChanged, filesChanged) <- getIssuesAndFilesWorkingTreeChanged paths - pure $ - CommitInfo - { hash' = Nothing, - filesChanged = filesChanged, - issues = issuesWorkingTreeChanged - } - -getEvents :: Text -> [Issue] -> [CommitInfo] -> [[IssueEvent]] -getEvents hashInitial issuesInitial commitInfos = - let issueEventsesInitial = - map - ( \issueInitial -> - [ IssueCreated - { hash = Just hashInitial, - issue = issueInitial - } - ] - ) - issuesInitial - addIssueEventsFromCommitInfo issueEventses commitInfo = - let issuesCreated = - map - ( \issue -> - [ IssueCreated - { hash = commitInfo.hash', - issue = issue - } - ] - ) - $ filter isNewIssue commitInfo.issues - isNewIssue issue = - all - (\issueOther -> id issueOther /= id issue) - (mapMaybe issueFromIssueEvents $ issueEventses) - addIssueChangedOrDeleted issueEventses' = - map - ( \issueEvents -> - case issueFromIssueEvent $ head issueEvents of - Nothing -> - issueEvents - Just issue -> - case filter isSameIssue commitInfo.issues of - [] -> - if any isSameFile commitInfo.filesChanged - then - IssueDeleted - { hash = commitInfo.hash' - } - : issueEvents - else issueEvents - issueCommit : _ -> - IssueChanged - { hash = commitInfo.hash', - issue = issueCommit - } - : issueEvents - where - isSameIssue issueCommit = - id issueCommit == id issue - isSameFile fileChanged = - fileChanged == issue.file - ) - issueEventses' - in issuesCreated ++ addIssueChangedOrDeleted issueEventses - in foldl' - ( addIssueEventsFromCommitInfo - ) - issueEventsesInitial - commitInfos - -issueFromIssueEvents :: [IssueEvent] -> Maybe Issue -issueFromIssueEvents issueEvents = - case issueEvents of - IssueCreated {issue} : [] -> - Just issue - IssueChanged {issue} : _ -> do - issueFirst <- issueFromIssueEvent $ head $ reverse issueEvents - pure $ - issue - { provenance = issueFirst.provenance, - internalTags = I.internalTags issue.title issueFirst.provenance - } - IssueDeleted _ : _ -> - Nothing - _ -> - Nothing - --- | Gets issues in all files which have been changed in your current --- [working --- - tree](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefworkingtreeaworkingtree) -getIssuesAndFilesWorkingTreeChanged :: [FilePath] -> IO ([Issue], [FilePath]) -getIssuesAndFilesWorkingTreeChanged paths = do - cwd <- getCurrentDirectory - files <- gitLsFilesModifiedIn cwd paths - issues <- concat <$> catch (getIssuesPar cwd files) dieOfInvalidTreeGrepperResult - pure (issues, files) - --- | Given the hash of a commit, get all issues in all files at the --- [tree](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddeftreeatree) --- of this commit. -getIssuesCommitAll :: Text -> IO [Issue] -getIssuesCommitAll hash = cached (append hash (pack ".all")) $ \_ -> do - withSystemTempDirectory "history" $ \tmp -> do - let cwd = tmp unpack hash - Git.withWorkingTree cwd hash do - files <- gitLsFilesAll cwd - concat <$> catch (getIssuesPar cwd files) (dieOfInvalidTreeGrepperResult) - --- | Given the hash of a commit, get all issues in the files which have --- been changed by this commit, as well as all changed files. -getIssuesAndFilesCommitChanged :: Text -> IO ([Issue], [FilePath]) -getIssuesAndFilesCommitChanged hash = do - withSystemTempDirectory "history" $ \tmp -> do - let cwd = tmp unpack hash - Git.withWorkingTree cwd hash do - files <- gitShowChanged cwd - issues <- concat <$> catch (getIssuesPar cwd files) (dieOfInvalidTreeGrepperResult) - pure (issues, files) - -gitLsFilesAll :: FilePath -> IO [FilePath] -gitLsFilesAll cwd = - Prelude.lines . L8.unpack - <$> sh ("git ls-files --cached --exclude-standard --other" & setWorkingDir cwd) - -gitShowChanged :: FilePath -> IO [FilePath] -gitShowChanged cwd = - Prelude.lines . L8.unpack - <$> sh ("git show -p --name-only --format=''" & setWorkingDir cwd) - -gitLsFilesModifiedIn :: FilePath -> [FilePath] -> IO [FilePath] -gitLsFilesModifiedIn cwd paths = - Prelude.lines . L8.unpack - <$> sh - ( proc "git ls-files --modified %" ("--" : paths) - & setWorkingDir cwd - ) - --- | Get all issues in the given directory and files. Runs --- parallelized. -getIssuesPar :: FilePath -> [FilePath] -> IO [[Issue]] -getIssuesPar cwd files = - parMapM (handle forgetGetIssuesExceptions . getIssues cwd) files - -data UnknownFileExtension = UnknownFileExtension - { extension :: String - } - deriving (Show) - -instance Exception UnknownFileExtension - -forgetGetIssuesExceptions :: UnknownFileExtension -> IO [a] -forgetGetIssuesExceptions _ = pure [] - -data InvalidTreeGrepperResult = InvalidTreeGrepperResult - { error :: String - } - deriving (Show) - -instance Exception InvalidTreeGrepperResult - -dieOfInvalidTreeGrepperResult :: InvalidTreeGrepperResult -> IO a -dieOfInvalidTreeGrepperResult (InvalidTreeGrepperResult e) = - die e - --- | Get all issues in the given directory and file. -getIssues :: FilePath -> FilePath -> IO [Issue] -getIssues cwd filename = do - let extension = takeExtension filename - treeGrepperLanguage = - -- TODO Add support for all tree-grepper supported files - -- - -- tree-grepper supported files can be listed through `tree-grepper - -- --languages`. - case extension of - ".elm" -> "elm" - ".hs" -> "haskell" - ".nix" -> "nix" - ".sh" -> "sh" - _ -> throw (UnknownFileExtension extension) - treeGrepperQuery = - case extension of - ".elm" -> "([(line_comment) (block_comment)])" - ".hs" -> "(comment)" - ".nix" -> "(comment)" - ".sh" -> "(comment)" - _ -> throw (UnknownFileExtension extension) - decode raw = - case eitherDecode raw of - Left e -> throw (InvalidTreeGrepperResult e) - Right treeGrepperResult -> treeGrepperResult - - matches <- - concatMap (\result -> map ((,) result) result.matches) - . map fixTreeGrepper - . decode - <$> sh - ( proc - "tree-grepper --query % % --format json %" - (treeGrepperLanguage :: String) - (treeGrepperQuery :: String) - filename - & setWorkingDir cwd - ) - - catMaybes <$> mapM (uncurry (fromMatch cwd)) matches - -fixTreeGrepper :: G.Result -> G.Result -fixTreeGrepper treeGrepperResult = - treeGrepperResult {G.matches = G.merge treeGrepperResult.matches} - -cached :: Binary a => Text -> (Text -> IO a) -> IO a -cached hash func = do - -- FIXME Cache inside Git root - -- - -- The cache location should not be dependant on the current directory, but - -- should be placed alongside the `.git` directory. - cwd <- getCurrentDirectory - createDirectoryIfMissing True (cwd ++ "/.anissue") - let file = (cwd ++ "/.anissue/" ++ unpack hash) - doesFileExist file >>= \case - True -> - decodeFileOrFail file >>= \case - Left _ -> generate file - Right blob -> pure blob - False -> generate file - where - generate file = do - blob <- func hash - encodeFile file blob - pure blob - -die :: String -> IO a -die s = do - printf "error: %s\n" s - exitWith (ExitFailure 1) +-- TODO Reduce cached data size +-- +-- Right now we are caching complete `Issue` instances, which +-- contain the full issue title and description. For a fast +-- lookup it may already be enough to only store the issue's +-- +-- \* filename +-- \* start position +-- \* end position +-- +-- With this information we can use git to quickly look up the +-- complete issue text and parse it. +-- +-- @topic caching + +getHistory :: IO ([Issue], [(CommitHash, [IssueEvent])]) +getHistory = do + commitInfos <- fromPartialCommitInfos <$> getPartialCommitInfos + pure ((last commitInfos).issues, issueEvents commitInfos) -- cgit v1.2.3