module History ( History (..), getHistory, ) where import CMark qualified as D import Control.Exception (catch, handle, try) import Data.Binary (Binary) import Data.ByteString.Lazy qualified as LB import Data.Function (on, (&)) import Data.List (deleteFirstsBy, find) import Data.List.NonEmpty qualified as N import Data.Maybe (catMaybes, isJust) import Data.Text qualified as T import Data.Text.Encoding qualified as T import Data.Text.IO qualified as T import Data.Text.Lazy qualified as LT import Data.Text.Lazy.Encoding qualified as LT import Die (die) import Exception qualified as E import GHC.Generics (Generic) import Git qualified import History.Cache (cachedMaybe) import History.CommitHash (CommitHash (..)) import History.CommitHash qualified as C import History.IssueEvent (IssueEvent (..)) import Issue qualified as I import Issue.Parser qualified as I import Issue.Provenance qualified as I import Issue.Tag qualified as I import Issue.Text qualified as I import Patch qualified as A import Process (proc, sh) import Render qualified as P import System.Directory (getCurrentDirectory) import System.FilePath (()) import System.IO.Temp (withSystemTempDirectory) import System.Process.Typed (setWorkingDir) import TreeGrepper.Comment qualified as G import Tuple () -- TODO Reduce cached data size -- -- Right now we are caching complete `Issue` instances, which -- contain the full issue title and description. For a fast -- lookup it may already be enough to only store the issue's -- -- \* filename -- \* start position -- \* end position -- -- With this information we can use git to quickly look up the -- complete issue text and parse it. -- -- @topic caching -- @backlog data History = History { commitHash :: CommitHash, issues :: [I.Issue], issueEvents :: [IssueEvent] } deriving (Show, Generic, Binary) getHistory :: IO History getHistory = getHistoryOf WorkingTree getHistoryOf :: CommitHash -> IO History getHistoryOf commitHash = cachedMaybe (C.toText commitHash) do maybeParentCommitHash <- getParentCommitHashOf commitHash case maybeParentCommitHash of Just parentCommitHash -> do parentHistory <- getHistoryOf parentCommitHash scramble <- getScrambleOf commitHash propagate commitHash parentHistory scramble Nothing -> unsafeAssume commitHash =<< getScrambleOf commitHash getParentCommitHashOf :: CommitHash -> IO (Maybe CommitHash) getParentCommitHashOf commitHash = either (\_ -> Nothing) (Just . Commit . T.strip . T.decodeUtf8 . LB.toStrict) <$> try @E.ProcessException ( case commitHash of WorkingTree -> sh "git show -s --format=%H HEAD" Commit hash -> sh (proc "git show -s --format=%%H %^" hash) ) -- | `Scramble` records the complete issues ONLY in files that have -- been changed in the commit. data Scramble = Scramble { commitHash :: CommitHash, filesChanged :: [FilePath], issues :: [I.Issue] } deriving (Show, Binary, Generic) getScrambleOf :: CommitHash -> IO Scramble getScrambleOf commitHash@WorkingTree = do (issues, filesChanged) <- getIssuesAndFilesWorkingTreeChanged [] pure $ Scramble {..} getScrambleOf commitHash@(Commit hash) = do (issues, filesChanged) <- getIssuesAndFilesCommitChanged hash pure $ Scramble {..} -- | Given the hash of a commit, get all issues in the files which have -- been changed by this commit, as well as all changed files. getIssuesAndFilesCommitChanged :: T.Text -> IO ([I.Issue], [FilePath]) getIssuesAndFilesCommitChanged hash = do withSystemTempDirectory "history" $ \tmp -> do let cwd = tmp T.unpack hash Git.withWorkingTree cwd hash do files <- gitShowChanged cwd issues <- concat <$> catch (mapM (getIssues cwd) files) dieOfInvalidTreeGrepperResult pure (issues, files) -- | Get all issues in the given directory and file. getIssues :: FilePath -> FilePath -> IO [I.Issue] getIssues cwd filename = handle (\(_ :: E.UnknownFileExtension) -> pure []) $ fmap catMaybes . mapM (fromComment cwd) =<< G.getComments cwd filename -- | Note that `provenance` is trivial and needs to be fixed up later. fromComment :: FilePath -> G.Comment -> IO (Maybe I.Issue) fromComment cwd comment = do commit <- I.commitFromHEAD cwd let provenance = I.Provenance commit commit pure $ ( \parseResult -> let (markers, title) = I.stripIssueMarkers (T.pack (show (P.render parseResult.heading))) in I.Issue { title = title, description = N.nonEmpty parseResult.paragraphs, file = comment.file, provenance = provenance, start = comment.start, end = comment.end, tags = I.extractTags parseResult.tags, markers = markers, rawText = rawText, commentStyle = commentStyle, comments = N.nonEmpty parseResult.comments, closed = False } ) <$> I.parse I.issueMarkers (D.commonmarkToNode [] rawText) where (commentStyle, rawText) = G.uncomment comment.file_type comment.text dieOfInvalidTreeGrepperResult :: E.InvalidTreeGrepperResult -> IO a dieOfInvalidTreeGrepperResult (E.InvalidTreeGrepperResult e) = die e -- | Gets issues in all files which have been changed in your current -- [working -- - tree](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefworkingtreeaworkingtree) getIssuesAndFilesWorkingTreeChanged :: [FilePath] -> IO ([I.Issue], [FilePath]) getIssuesAndFilesWorkingTreeChanged paths = do cwd <- getCurrentDirectory files <- gitLsFilesModifiedIn cwd paths issues <- concat <$> catch (mapM (getIssues cwd) files) dieOfInvalidTreeGrepperResult pure (issues, files) gitShowChanged :: FilePath -> IO [FilePath] gitShowChanged cwd = map T.unpack . T.lines . T.decodeUtf8 . LB.toStrict <$> sh ("git show -p --name-only --format=''" & setWorkingDir cwd) gitLsFilesModifiedIn :: FilePath -> [FilePath] -> IO [FilePath] gitLsFilesModifiedIn cwd paths = map T.unpack . T.lines . T.decodeUtf8 . LB.toStrict <$> sh ( proc "git ls-files --modified %" ("--" : paths) & setWorkingDir cwd ) propagate :: CommitHash -> History -> Scramble -> IO History propagate commitHash oldHistory scramble = do let issues = propagateIssues oldHistory.issues scramble issueEvents <- propagateIssueEvents oldHistory.issueEvents oldHistory.issues commitHash issues pure $ History {..} propagateIssues :: [I.Issue] -> Scramble -> [I.Issue] propagateIssues oldIssues partialCommitInfo = mergeListsBy eq ( \old new -> new { I.provenance = I.Provenance { first = old.provenance.first, last = if ((/=) `on` (.rawText)) old new then new.provenance.last else old.provenance.last }, I.closed = False } ) ( \old -> if elemBy eq old partialCommitInfo.issues || not (old.file `elem` partialCommitInfo.filesChanged) then old else old {I.closed = True} ) id oldIssues partialCommitInfo.issues propagateIssueEvents :: [IssueEvent] -> [I.Issue] -> CommitHash -> [I.Issue] -> IO [IssueEvent] propagateIssueEvents oldIssueEvents oldIssues commitHash issues = fmap (oldIssueEvents ++) $ newIssueEvents oldIssues commitHash issues newIssueEvents :: [I.Issue] -> CommitHash -> [I.Issue] -> IO [IssueEvent] newIssueEvents oldIssues' commitHash issues' = sequence $ concat [ [IssueCreated commitHash issue <$> patchCreated issue | issue <- deleteFirstsBy eq issues oldIssues], [ IssueChanged commitHash oldIssue newIssue <$> patchChanged oldIssue newIssue | (newIssue : oldIssue : _) <- intersectBy' eq issues oldIssues, neq newIssue oldIssue ], [IssueDeleted commitHash issue <$> patchDeleted issue | issue <- deleteFirstsBy eq oldIssues issues] ] where issues = filter (not . (.closed)) issues' oldIssues = filter (not . (.closed)) oldIssues' patchCreated new = diff "" new.rawText patchChanged old new = diff old.rawText new.rawText patchDeleted old = diff old.rawText "" diff old new = withSystemTempDirectory "diff" $ \tmp -> do let cwd = tmp T.writeFile (tmp "old") old T.writeFile (tmp "new") new A.parse . LT.toStrict . LT.decodeUtf8 <$> sh ("git diff --no-index -- old new || :" & setWorkingDir cwd) unsafeAssume :: CommitHash -> Scramble -> IO History unsafeAssume commitHash scramble = do let issues = scramble.issues issueEvents <- propagateIssueEvents [] [] commitHash issues pure $ History {..} eq :: I.Issue -> I.Issue -> Bool eq = (==) `on` (.id) neq :: I.Issue -> I.Issue -> Bool neq = (/=) `on` (.rawText) mergeListsBy :: (a -> a -> Bool) -> (a -> a -> b) -> (a -> b) -> (a -> b) -> [a] -> [a] -> [b] mergeListsBy eq onBoth onLeft onRight lefts rights = concat [ [ maybe (onLeft left) (onBoth left) right | left <- lefts, right <- let rights' = filter (eq left) rights in if null rights' then [Nothing] else (map Just rights') ], [ onRight right | right <- rights, not (elemBy eq right lefts) ] ] -- | A variant of `Data.List.intersectBy` that retuns the witnesses of the -- intersection. intersectBy' :: (a -> a -> Bool) -> [a] -> [a] -> [[a]] intersectBy' eq xs ys = filter (not . null) (map (\x -> x : filter (eq x) ys) xs) -- | A variant of `elem` that uses a custom comparison function. elemBy :: (a -> a -> Bool) -> a -> [a] -> Bool elemBy eq x xs = isJust $ find (eq x) xs