module History ( Issues (..), getIssues, IssueEvents (..), getIssueEvents, getIssuesOfFile, ) where import CMark qualified as D import Cache (cachedMaybe) import Comment qualified as G import Control.Arrow (first) import Control.Exception (Handler (..), catch, catches, try) import Data.Binary (Binary) import Data.ByteString.Lazy qualified as LB import Data.Digest.Pure.SHA qualified as S import Data.Function (on) import Data.Map qualified as M import Data.Maybe (catMaybes) import Data.Text qualified as T import Data.Text.Encoding qualified as T import Data.Text.Lazy qualified as LT import Data.Text.Lazy.Encoding qualified as LT import Die (die) import Exception qualified as E import GHC.Generics (Generic) import Git qualified import Git.CommitHash (CommitHash (..)) import Git.CommitHash qualified as C import Issue qualified as I import Issue.Parser qualified as I import Issue.Tag qualified as I import Issue.Text qualified as I import IssueEvent (IssueEvent (..)) import Parallel (parMapM) import Process (proc, sh) import Render qualified as P import Tuple () -- TODO Reduce cached data size -- -- Right now we are caching complete `Issue` instances, which -- contain the full issue title and description. For a fast -- lookup it may already be enough to only store the issue's -- -- \* filename -- \* start position -- \* end position -- -- With this information we can use git to quickly look up the -- complete issue text and parse it. -- -- @topic caching -- @backlog data Issues = Issues { commitHash :: CommitHash, issues :: M.Map T.Text I.Issue } deriving (Show, Generic, Binary) getIssues :: IO Issues getIssues = getIssuesOf WorkingTree getIssuesOf :: CommitHash -> IO Issues getIssuesOf commitHash = cachedMaybe (fmap ("issues-" <>) (C.toText commitHash)) do maybeParentCommitHash <- getParentCommitHashOf commitHash case maybeParentCommitHash of Just parentCommitHash -> do oldIssues <- (.issues) <$> getIssuesOf parentCommitHash scramble <- getScrambleOf commitHash let issues = propagateIssues oldIssues scramble pure Issues {..} Nothing -> do scramble <- getScrambleOf commitHash let issues = scramble.issues pure Issues {..} data IssueEvents = IssueEvents { commitHash :: CommitHash, issueEvents :: [IssueEvent] } deriving (Show, Generic, Binary) getIssueEvents :: IO IssueEvents getIssueEvents = getIssueEventsOf WorkingTree getIssueEventsOf :: CommitHash -> IO IssueEvents getIssueEventsOf commitHash = cachedMaybe (fmap ("events-" <>) (C.toText commitHash)) do maybeParentCommitHash <- getParentCommitHashOf commitHash case maybeParentCommitHash of Just parentCommitHash -> do oldIssues <- (.issues) <$> getIssuesOf parentCommitHash issues <- (.issues) <$> getIssuesOf commitHash oldIssueEvents <- (.issueEvents) <$> getIssueEventsOf parentCommitHash let issueEvents = propagateIssueEvents oldIssueEvents oldIssues commitHash issues pure IssueEvents {..} Nothing -> do scramble <- getScrambleOf commitHash let issueEvents = propagateIssueEvents [] M.empty commitHash scramble.issues pure IssueEvents {..} getParentCommitHashOf :: CommitHash -> IO (Maybe CommitHash) getParentCommitHashOf commitHash = either (\_ -> Nothing) (Just . Commit . T.strip . T.decodeUtf8 . LB.toStrict) <$> try @E.ProcessException ( case commitHash of WorkingTree -> sh "git show -s --format=%H HEAD" Commit hash -> sh (proc "git show -s --format=%%H %^" hash) ) -- | `Scramble` records the complete issues ONLY in files that have -- been changed in the commit. data Scramble = Scramble { commitHash :: CommitHash, filesChanged :: [FilePath], issues :: M.Map T.Text I.Issue } deriving (Show, Binary, Generic) getScrambleOf :: CommitHash -> IO Scramble getScrambleOf commitHash = do (issues, filesChanged) <- first (M.fromList . map (\i -> (i.id, i))) <$> getIssuesAndFilesChanged commitHash pure $ Scramble {..} getIssuesAndFilesChanged :: CommitHash -> IO ([I.Issue], [FilePath]) getIssuesAndFilesChanged commitHash = do files <- Git.getChangedFilesOf commitHash issues <- concat <$> catch (parMapM (getIssuesOfFile commitHash) files) (\(e :: E.InvalidTreeGrepperResult) -> die (show e)) pure (issues, files) -- | Get all issues in the given directory and file. getIssuesOfFile :: CommitHash -> FilePath -> IO [I.Issue] getIssuesOfFile commitHash filename = ( fmap catMaybes . parMapM (fromComment commitHash) =<< G.getComments commitHash filename ) `catches` [ Handler \(_ :: E.UnknownFile) -> pure [], Handler \(_ :: E.UnsupportedLanguage) -> pure [] ] -- | Note that `provenance` is trivial and needs to be fixed up later. fromComment :: CommitHash -> G.Comment -> IO (Maybe I.Issue) fromComment commitHash comment = do commit <- Git.getCommitOf commitHash let provenance = I.Provenance commit commit pure $ ( \parseResult -> let (markers, title) = I.stripIssueMarkers (T.pack (show (P.render parseResult.heading))) in I.Issue { commitHash = commitHash, language = comment.language, rawTextHash = S.sha1 (LT.encodeUtf8 (LT.fromStrict rawText)), title = title, file = comment.filePath, provenance = provenance, startByte = comment.startByte, endByte = comment.endByte, startPoint = comment.startPoint, endPoint = comment.endPoint, tags = I.extractTags parseResult.tags, markers = markers, commentStyle = commentStyle, closed = False } ) <$> I.parse I.issueMarkers (D.commonmarkToNode [] rawText) where (commentStyle, rawText) = G.uncomment comment.language comment.text propagateIssues :: M.Map T.Text I.Issue -> Scramble -> M.Map T.Text I.Issue propagateIssues oldIssues scramble = M.mergeWithKey ( \_ old new -> Just $ new { I.provenance = I.Provenance { first = old.provenance.first, last = if ((/=) `on` (.rawTextHash)) old new then new.provenance.last else old.provenance.last }, I.closed = False } ) ( M.map ( \old -> if M.member old.id scramble.issues || not (old.file `elem` scramble.filesChanged) then old else old {I.closed = True} ) ) id oldIssues scramble.issues propagateIssueEvents :: [IssueEvent] -> M.Map T.Text I.Issue -> CommitHash -> M.Map T.Text I.Issue -> [IssueEvent] propagateIssueEvents oldIssueEvents oldIssues commitHash issues = oldIssueEvents ++ newIssueEvents oldIssues commitHash issues newIssueEvents :: M.Map T.Text I.Issue -> CommitHash -> M.Map T.Text I.Issue -> [IssueEvent] newIssueEvents oldIssues' commitHash issues' = concat [ [ IssueCreated commitHash issue | issue <- M.elems (issues `M.difference` oldIssues) ], [ IssueChanged commitHash oldIssue newIssue | (newIssue, oldIssue) <- M.elems (M.intersectionWith (,) issues oldIssues), ((/=) `on` (.rawTextHash)) newIssue oldIssue ], [ IssueDeleted commitHash issue {I.closed = True} | issue <- M.elems (oldIssues `M.difference` issues) ] ] where issues = M.filter (not . (.closed)) issues' oldIssues = M.filter (not . (.closed)) oldIssues'