From 3c6e62b75293b6625509ade3c278fc2d4d147c30 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Thu, 7 Dec 2023 03:55:45 +0100 Subject: chore: increase performance by caching everything Initial cache generation is slower, as we are losing out on parallelism. --- app/History.hs | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 254 insertions(+), 13 deletions(-) (limited to 'app/History.hs') diff --git a/app/History.hs b/app/History.hs index 6a4ddbe..e1ea0ab 100644 --- a/app/History.hs +++ b/app/History.hs @@ -1,15 +1,44 @@ module History - ( getHistory, + ( History (..), + getHistory, ) where -import History.CommitHash (CommitHash) -import History.CommitInfo (CommitInfo (..), fromPartialCommitInfos, issueEvents) +import CMark qualified as D +import Control.Exception (catch, handle, try) +import Data.Binary (Binary) +import Data.ByteString.Lazy qualified as LB +import Data.Function (on, (&)) +import Data.List (deleteFirstsBy, find) +import Data.List.NonEmpty qualified as N +import Data.Maybe (catMaybes, isJust) +import Data.Text qualified as T +import Data.Text.Encoding qualified as T +import Data.Text.IO qualified as T +import Data.Text.Lazy qualified as LT +import Data.Text.Lazy.Encoding qualified as LT +import Die (die) +import Exception qualified as E +import GHC.Generics (Generic) +import Git qualified +import History.Cache (cachedMaybe) +import History.CommitHash (CommitHash (..)) +import History.CommitHash qualified as C import History.IssueEvent (IssueEvent (..)) -import History.PartialCommitInfo (getPartialCommitInfos) -import Issue (Issue) +import Issue qualified as I +import Issue.Parser qualified as I +import Issue.Provenance qualified as I +import Issue.Tag qualified as I +import Issue.Text qualified as I +import Patch qualified as A +import Process (proc, sh) +import Render qualified as P +import System.Directory (getCurrentDirectory) +import System.FilePath (()) +import System.IO.Temp (withSystemTempDirectory) +import System.Process.Typed (setWorkingDir) +import TreeGrepper.Comment qualified as G import Tuple () -import Prelude hiding (id, lines) -- TODO Reduce cached data size -- @@ -27,10 +56,222 @@ import Prelude hiding (id, lines) -- @topic caching -- @backlog -getHistory :: IO [(CommitHash, [IssueEvent], [Issue])] -getHistory = do - commitInfos <- fromPartialCommitInfos <$> getPartialCommitInfos - let commitHashes = map (.hash) commitInfos - issueses = map (.issues) commitInfos - issueEventses <- map (._2) <$> issueEvents commitInfos - pure (zip3 commitHashes issueEventses issueses) +data History = History + { commitHash :: CommitHash, + issues :: [I.Issue], + issueEvents :: [IssueEvent] + } + deriving (Show, Generic, Binary) + +getHistory :: IO History +getHistory = getHistoryOf WorkingTree + +getHistoryOf :: CommitHash -> IO History +getHistoryOf commitHash = cachedMaybe (C.toText commitHash) do + maybeParentCommitHash <- getParentCommitHashOf commitHash + case maybeParentCommitHash of + Just parentCommitHash -> do + parentHistory <- getHistoryOf parentCommitHash + scramble <- getScrambleOf commitHash + propagate commitHash parentHistory scramble + Nothing -> unsafeAssume commitHash =<< getScrambleOf commitHash + +getParentCommitHashOf :: CommitHash -> IO (Maybe CommitHash) +getParentCommitHashOf commitHash = + either + (\_ -> Nothing) + (Just . Commit . T.strip . T.decodeUtf8 . LB.toStrict) + <$> try @E.ProcessException + ( case commitHash of + WorkingTree -> sh "git show -s --format=%H HEAD" + Commit hash -> sh (proc "git show -s --format=%%H %^" hash) + ) + +-- | `Scramble` records the complete issues ONLY in files that have +-- been changed in the commit. +data Scramble = Scramble + { commitHash :: CommitHash, + filesChanged :: [FilePath], + issues :: [I.Issue] + } + deriving (Show, Binary, Generic) + +getScrambleOf :: CommitHash -> IO Scramble +getScrambleOf commitHash@WorkingTree = do + (issues, filesChanged) <- getIssuesAndFilesWorkingTreeChanged [] + pure $ Scramble {..} +getScrambleOf commitHash@(Commit hash) = do + (issues, filesChanged) <- getIssuesAndFilesCommitChanged hash + pure $ Scramble {..} + +-- | Given the hash of a commit, get all issues in the files which have +-- been changed by this commit, as well as all changed files. +getIssuesAndFilesCommitChanged :: T.Text -> IO ([I.Issue], [FilePath]) +getIssuesAndFilesCommitChanged hash = do + withSystemTempDirectory "history" $ \tmp -> do + let cwd = tmp T.unpack hash + Git.withWorkingTree cwd hash do + files <- gitShowChanged cwd + issues <- concat <$> catch (mapM (getIssues cwd) files) dieOfInvalidTreeGrepperResult + pure (issues, files) + +-- | Get all issues in the given directory and file. +getIssues :: FilePath -> FilePath -> IO [I.Issue] +getIssues cwd filename = + handle (\(_ :: E.UnknownFileExtension) -> pure []) $ + fmap catMaybes . mapM (fromComment cwd) + =<< G.getComments cwd filename + +-- | Note that `provenance` is trivial and needs to be fixed up later. +fromComment :: FilePath -> G.Comment -> IO (Maybe I.Issue) +fromComment cwd comment = do + commit <- I.commitFromHEAD cwd + let provenance = I.Provenance commit commit + + pure $ + ( \parseResult -> + let (markers, title) = + I.stripIssueMarkers (T.pack (show (P.render parseResult.heading))) + in I.Issue + { title = title, + description = N.nonEmpty parseResult.paragraphs, + file = comment.file, + provenance = provenance, + start = comment.start, + end = comment.end, + tags = I.extractTags parseResult.tags, + markers = markers, + rawText = rawText, + commentStyle = commentStyle, + comments = N.nonEmpty parseResult.comments, + closed = False + } + ) + <$> I.parse I.issueMarkers (D.commonmarkToNode [] rawText) + where + (commentStyle, rawText) = G.uncomment comment.file_type comment.text + +dieOfInvalidTreeGrepperResult :: E.InvalidTreeGrepperResult -> IO a +dieOfInvalidTreeGrepperResult (E.InvalidTreeGrepperResult e) = + die e + +-- | Gets issues in all files which have been changed in your current +-- [working +-- - tree](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefworkingtreeaworkingtree) +getIssuesAndFilesWorkingTreeChanged :: [FilePath] -> IO ([I.Issue], [FilePath]) +getIssuesAndFilesWorkingTreeChanged paths = do + cwd <- getCurrentDirectory + files <- gitLsFilesModifiedIn cwd paths + issues <- concat <$> catch (mapM (getIssues cwd) files) dieOfInvalidTreeGrepperResult + pure (issues, files) + +gitShowChanged :: FilePath -> IO [FilePath] +gitShowChanged cwd = + map T.unpack . T.lines . T.decodeUtf8 . LB.toStrict + <$> sh ("git show -p --name-only --format=''" & setWorkingDir cwd) + +gitLsFilesModifiedIn :: FilePath -> [FilePath] -> IO [FilePath] +gitLsFilesModifiedIn cwd paths = + map T.unpack . T.lines . T.decodeUtf8 . LB.toStrict + <$> sh + ( proc "git ls-files --modified %" ("--" : paths) + & setWorkingDir cwd + ) + +propagate :: CommitHash -> History -> Scramble -> IO History +propagate commitHash oldHistory scramble = do + let issues = propagateIssues oldHistory.issues scramble + issueEvents <- propagateIssueEvents oldHistory.issueEvents oldHistory.issues commitHash issues + pure $ History {..} + +propagateIssues :: [I.Issue] -> Scramble -> [I.Issue] +propagateIssues oldIssues partialCommitInfo = + mergeListsBy + eq + ( \old new -> + new + { I.provenance = + I.Provenance + { first = old.provenance.first, + last = + if ((/=) `on` (.rawText)) old new + then new.provenance.last + else old.provenance.last + }, + I.closed = False + } + ) + ( \old -> + if elemBy eq old partialCommitInfo.issues + || not (old.file `elem` partialCommitInfo.filesChanged) + then old + else old {I.closed = True} + ) + id + oldIssues + partialCommitInfo.issues + +propagateIssueEvents :: [IssueEvent] -> [I.Issue] -> CommitHash -> [I.Issue] -> IO [IssueEvent] +propagateIssueEvents oldIssueEvents oldIssues commitHash issues = + fmap (oldIssueEvents ++) $ newIssueEvents oldIssues commitHash issues + +newIssueEvents :: [I.Issue] -> CommitHash -> [I.Issue] -> IO [IssueEvent] +newIssueEvents oldIssues' commitHash issues' = + sequence $ + concat + [ [IssueCreated commitHash issue <$> patchCreated issue | issue <- deleteFirstsBy eq issues oldIssues], + [ IssueChanged commitHash oldIssue newIssue <$> patchChanged oldIssue newIssue + | (newIssue : oldIssue : _) <- intersectBy' eq issues oldIssues, + neq newIssue oldIssue + ], + [IssueDeleted commitHash issue <$> patchDeleted issue | issue <- deleteFirstsBy eq oldIssues issues] + ] + where + issues = filter (not . (.closed)) issues' + oldIssues = filter (not . (.closed)) oldIssues' + + patchCreated new = diff "" new.rawText + patchChanged old new = diff old.rawText new.rawText + patchDeleted old = diff old.rawText "" + + diff old new = withSystemTempDirectory "diff" $ \tmp -> do + let cwd = tmp + T.writeFile (tmp "old") old + T.writeFile (tmp "new") new + A.parse . LT.toStrict . LT.decodeUtf8 <$> sh ("git diff --no-index -- old new || :" & setWorkingDir cwd) + +unsafeAssume :: CommitHash -> Scramble -> IO History +unsafeAssume commitHash scramble = do + let issues = scramble.issues + issueEvents <- propagateIssueEvents [] [] commitHash issues + pure $ History {..} + +eq :: I.Issue -> I.Issue -> Bool +eq = (==) `on` (.id) + +neq :: I.Issue -> I.Issue -> Bool +neq = (/=) `on` (.rawText) + +mergeListsBy :: (a -> a -> Bool) -> (a -> a -> b) -> (a -> b) -> (a -> b) -> [a] -> [a] -> [b] +mergeListsBy eq onBoth onLeft onRight lefts rights = + concat + [ [ maybe (onLeft left) (onBoth left) right + | left <- lefts, + right <- + let rights' = filter (eq left) rights + in if null rights' then [Nothing] else (map Just rights') + ], + [ onRight right + | right <- rights, + not (elemBy eq right lefts) + ] + ] + +-- | A variant of `Data.List.intersectBy` that retuns the witnesses of the +-- intersection. +intersectBy' :: (a -> a -> Bool) -> [a] -> [a] -> [[a]] +intersectBy' eq xs ys = filter (not . null) (map (\x -> x : filter (eq x) ys) xs) + +-- | A variant of `elem` that uses a custom comparison function. +elemBy :: (a -> a -> Bool) -> a -> [a] -> Bool +elemBy eq x xs = isJust $ find (eq x) xs -- cgit v1.2.3