From fc0afaaa273f5b5d3696df87d70d5347a13bb9ac Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Mon, 18 Dec 2023 13:50:22 +0100 Subject: feat: compute history top to bottom Disables caching. --- anissue.cabal | 24 +++++ app/Cache.hs | 16 ++++ app/Git.hs | 41 +++++++-- app/Git/CommitHash.hs | 2 +- app/History.hs | 217 ++++----------------------------------------- app/History/IssueEvents.hs | 183 ++++++++++++++++++++++++++++++++++++++ app/History/Issues.hs | 72 +++++++++++++++ app/History/Plan.hs | 207 ++++++++++++++++++++++++++++++++++++++++++ app/History/Scramble.hs | 76 ++++++++++++++++ app/Issue.hs | 2 +- app/Main.hs | 2 +- app/Parallel.hs | 9 +- default.nix | 8 +- nix/sources.json | 6 ++ test/Main.hs | 85 ++++++++++++++++++ 15 files changed, 734 insertions(+), 216 deletions(-) create mode 100644 app/History/IssueEvents.hs create mode 100644 app/History/Issues.hs create mode 100644 app/History/Plan.hs create mode 100644 app/History/Scramble.hs create mode 100644 test/Main.hs diff --git a/anissue.cabal b/anissue.cabal index 465e81d..35c64ee 100644 --- a/anissue.cabal +++ b/anissue.cabal @@ -27,6 +27,10 @@ executable anissue Git Git.CommitHash History + History.Plan + History.IssueEvents + History.Issues + History.Scramble Issue IssueEvent Issue.Filter @@ -174,20 +178,40 @@ executable anissue directory, filepath, generic-deriving, + gitlib, + gitlib-libgit2, lingo, megaparsec, mtl, nonempty-zipper, optparse-applicative, + parallel, parallel-io, prettyprinter, prettyprinter-ansi-terminal, regex, + safe, SHA, + stm, + system-time-monotonic, + tagged, temporary, terminal-size, text, time, typed-process, + utf8-string, xdg-basedir, yaml + +test-suite test + type: exitcode-stdio-1.0 + main-is: Main.hs + hs-source-dirs: test + default-language: GHC2021 + default-extensions: BlockArguments QuasiQuotes + ghc-options: -Wall + build-depends: + base, + hspec, + sh diff --git a/app/Cache.hs b/app/Cache.hs index 7af9ee7..4540fa4 100644 --- a/app/Cache.hs +++ b/app/Cache.hs @@ -4,6 +4,22 @@ module Cache ) where +-- TODO Reduce cached data size +-- +-- Right now we are caching complete `Issue` instances, which +-- contain the full issue title and description. For a fast +-- lookup it may already be enough to only store the issue's +-- +-- \* filename +-- \* start position +-- \* end position +-- +-- With this information we can use git to quickly look up the +-- complete issue text and parse it. +-- +-- @topic caching +-- @backlog + import Data.Binary (Binary, decodeFileOrFail, encodeFile) import Data.Text qualified as T import Git qualified diff --git a/app/Git.hs b/app/Git.hs index 65ecf89..25c9149 100644 --- a/app/Git.hs +++ b/app/Git.hs @@ -2,6 +2,7 @@ module Git ( module Git.CommitHash, getCommitHashes, getRootDir, + getFilesOf, getChangedFilesOf, Commit (..), Author (..), @@ -14,14 +15,13 @@ module Git ) where -import Control.Exception (IOException, catch, throw, throwIO) +import Control.Exception (IOException, catch, throwIO) import Data.Binary (Binary) import Data.Binary.Instances () import Data.ByteString.Lazy qualified as LB -import Data.List.NonEmpty (NonEmpty) -import Data.List.NonEmpty qualified as N import Data.Maybe (fromMaybe) import Data.Text qualified as T +import Data.Text.Encoding qualified as T import Data.Text.Lazy qualified as LT import Data.Text.Lazy.Encoding qualified as LT import Data.Text.Lazy.IO qualified as LT @@ -33,8 +33,25 @@ import Patch qualified as A import Process (proc, sh, sh_) import Text.Printf (printf) -getCommitHashes :: IO (NonEmpty T.Text) -getCommitHashes = fromMaybe (throw E.NoCommits) . N.nonEmpty . reverse . T.lines <$> sh "git log --format=%H" +getCommitHashes :: Maybe CommitHash -> Maybe CommitHash -> IO [CommitHash] +getCommitHashes maybeBottomCommit Nothing = + getCommitHashes maybeBottomCommit (Just WorkingTree) +getCommitHashes (Just WorkingTree) (Just WorkingTree) = + pure [WorkingTree] +getCommitHashes (Just WorkingTree) (Just (Commit _)) = + pure [] +getCommitHashes Nothing (Just WorkingTree) = + (WorkingTree :) . map Commit . T.lines + <$> sh (proc "git log --format=%%H HEAD") +getCommitHashes (Just (Commit bottomHash)) (Just WorkingTree) = + (WorkingTree :) . map Commit . T.lines + <$> sh (proc "git log --format=%%H %..HEAD" bottomHash) +getCommitHashes Nothing (Just (Commit topHash)) = + map Commit . T.lines + <$> sh (proc "git log --format=%%H %" topHash) +getCommitHashes (Just (Commit bottomHash)) (Just (Commit topHash)) = + map Commit . T.lines + <$> sh (proc "git log --format=%%H %..%" bottomHash topHash) getRootDir :: IO FilePath getRootDir = @@ -43,13 +60,21 @@ getRootDir = where stripTrailingNL s = fromMaybe s $ T.stripSuffix "\n" s +getFilesOf :: CommitHash -> IO [FilePath] +getFilesOf WorkingTree = + map T.unpack . T.lines + <$> sh "git ls-files --cached --modified --others --exclude-standard --full-name" +getFilesOf (Commit hash) = + map T.unpack . T.lines + <$> sh (proc "git ls-tree -r --name-only --full-name --full-tree %" hash) + getChangedFilesOf :: CommitHash -> IO [FilePath] getChangedFilesOf WorkingTree = map T.unpack . T.lines - <$> sh "git ls-files --modified" -getChangedFilesOf (Commit hash) = + <$> sh "git ls-files --modified --others --exclude-standard --full-name" +getChangedFilesOf (Commit hash) = do map T.unpack . T.lines - <$> sh (proc "git show -p --name-only --format= %" hash) + <$> sh (proc "git diff-tree -r --name-only %" hash) data Commit = Commit' { commitHash :: CommitHash, diff --git a/app/Git/CommitHash.hs b/app/Git/CommitHash.hs index 0caecf4..f791af8 100644 --- a/app/Git/CommitHash.hs +++ b/app/Git/CommitHash.hs @@ -15,7 +15,7 @@ import Render qualified as P data CommitHash = WorkingTree | Commit T.Text - deriving (Eq, Show, Binary, Generic) + deriving (Eq, Ord, Show, Binary, Generic) toShortText :: CommitHash -> Maybe T.Text toShortText = fmap (T.take 7) . toText diff --git a/app/History.hs b/app/History.hs index c0427fa..d423907 100644 --- a/app/History.hs +++ b/app/History.hs @@ -1,143 +1,38 @@ module History ( Issues (..), - getIssues, IssueEvents (..), + getIssues, getIssueEvents, getIssuesOfFile, ) where -import CMark qualified as D -import Cache (cachedMaybe) import Comment qualified as G -import Control.Arrow (first) -import Control.Exception (Handler (..), catch, catches, try) -import Data.Binary (Binary) -import Data.ByteString.Lazy qualified as LB -import Data.Digest.Pure.SHA qualified as S -import Data.Function (on) -import Data.Map qualified as M +import Control.Exception (Handler (..), catches) +import Data.List.NonEmpty qualified as NE import Data.Maybe (catMaybes) -import Data.Text qualified as T -import Data.Text.Encoding qualified as T -import Data.Text.Lazy qualified as LT -import Data.Text.Lazy.Encoding qualified as LT -import Die (die) +import Data.Proxy (Proxy (Proxy)) import Exception qualified as E -import GHC.Generics (Generic) import Git qualified -import Git.CommitHash (CommitHash (..)) -import Git.CommitHash qualified as C +import History.IssueEvents (IssueEvents (..)) +import History.Issues (Issues (..)) +import History.Plan (formulate, realise) +import History.Scramble (fromComment) import Issue qualified as I -import Issue.Parser qualified as I -import Issue.Tag qualified as I -import Issue.Text qualified as I -import IssueEvent (IssueEvent (..)) import Parallel (parMapM) -import Process (proc, sh) -import Render qualified as P -import Tuple () - --- TODO Reduce cached data size --- --- Right now we are caching complete `Issue` instances, which --- contain the full issue title and description. For a fast --- lookup it may already be enough to only store the issue's --- --- \* filename --- \* start position --- \* end position --- --- With this information we can use git to quickly look up the --- complete issue text and parse it. --- --- @topic caching --- @backlog - -data Issues = Issues - { commitHash :: CommitHash, - issues :: M.Map T.Text I.Issue - } - deriving (Show, Generic, Binary) getIssues :: IO Issues -getIssues = getIssuesOf WorkingTree - -getIssuesOf :: CommitHash -> IO Issues -getIssuesOf commitHash = cachedMaybe (fmap ("issues-" <>) (C.toText commitHash)) do - maybeParentCommitHash <- getParentCommitHashOf commitHash - case maybeParentCommitHash of - Just parentCommitHash -> do - oldIssues <- (.issues) <$> getIssuesOf parentCommitHash - scramble <- getScrambleOf commitHash - let issues = propagateIssues oldIssues scramble - pure Issues {..} - Nothing -> do - scramble <- getScrambleOf commitHash - let issues = scramble.issues - pure Issues {..} - -data IssueEvents = IssueEvents - { commitHash :: CommitHash, - issueEvents :: [IssueEvent] - } - deriving (Show, Generic, Binary) +getIssues = + realise . (formulate Proxy) . NE.fromList + =<< Git.getCommitHashes Nothing (Just Git.WorkingTree) getIssueEvents :: IO IssueEvents -getIssueEvents = getIssueEventsOf WorkingTree - -getIssueEventsOf :: CommitHash -> IO IssueEvents -getIssueEventsOf commitHash = cachedMaybe (fmap ("events-" <>) (C.toText commitHash)) do - maybeParentCommitHash <- getParentCommitHashOf commitHash - case maybeParentCommitHash of - Just parentCommitHash -> do - oldIssues <- (.issues) <$> getIssuesOf parentCommitHash - issues <- (.issues) <$> getIssuesOf commitHash - oldIssueEvents <- (.issueEvents) <$> getIssueEventsOf parentCommitHash - let issueEvents = propagateIssueEvents oldIssueEvents oldIssues commitHash issues - pure IssueEvents {..} - Nothing -> do - scramble <- getScrambleOf commitHash - let issueEvents = propagateIssueEvents [] M.empty commitHash scramble.issues - pure IssueEvents {..} - -getParentCommitHashOf :: CommitHash -> IO (Maybe CommitHash) -getParentCommitHashOf commitHash = - either - (\_ -> Nothing) - (Just . Commit . T.strip . T.decodeUtf8 . LB.toStrict) - <$> try @E.ProcessException - ( case commitHash of - WorkingTree -> sh "git show -s --format=%H HEAD" - Commit hash -> sh (proc "git show -s --format=%%H %^" hash) - ) - --- | `Scramble` records the complete issues ONLY in files that have --- been changed in the commit. -data Scramble = Scramble - { commitHash :: CommitHash, - filesChanged :: [FilePath], - issues :: M.Map T.Text I.Issue - } - deriving (Show, Binary, Generic) - -getScrambleOf :: CommitHash -> IO Scramble -getScrambleOf commitHash = do - (issues, filesChanged) <- first (M.fromList . map (\i -> (i.id, i))) <$> getIssuesAndFilesChanged commitHash - pure $ Scramble {..} - -getIssuesAndFilesChanged :: CommitHash -> IO ([I.Issue], [FilePath]) -getIssuesAndFilesChanged commitHash = do - files <- Git.getChangedFilesOf commitHash - issues <- - concat - <$> catch - (parMapM (getIssuesOfFile commitHash) files) - (\(e :: E.InvalidTreeGrepperResult) -> die (show e)) - pure (issues, files) +getIssueEvents = + realise . (formulate Proxy) . NE.fromList + =<< Git.getCommitHashes Nothing (Just Git.WorkingTree) -- | Get all issues in the given directory and file. -getIssuesOfFile :: CommitHash -> FilePath -> IO [I.Issue] +getIssuesOfFile :: Git.CommitHash -> FilePath -> IO [I.Issue] getIssuesOfFile commitHash filename = ( fmap catMaybes . parMapM (fromComment commitHash) =<< G.getComments commitHash filename @@ -145,85 +40,3 @@ getIssuesOfFile commitHash filename = `catches` [ Handler \(_ :: E.UnknownFile) -> pure [], Handler \(_ :: E.UnsupportedLanguage) -> pure [] ] - --- | Note that `provenance` is trivial and needs to be fixed up later. -fromComment :: CommitHash -> G.Comment -> IO (Maybe I.Issue) -fromComment commitHash comment = do - commit <- Git.getCommitOf commitHash - let provenance = I.Provenance commit commit - - pure $ - ( \parseResult -> - let (markers, title) = - I.stripIssueMarkers (T.pack (show (P.render parseResult.heading))) - in I.Issue - { commitHash = commitHash, - language = comment.language, - rawTextHash = S.sha1 (LT.encodeUtf8 (LT.fromStrict rawText)), - title = title, - file = comment.filePath, - provenance = provenance, - startByte = comment.startByte, - endByte = comment.endByte, - startPoint = comment.startPoint, - endPoint = comment.endPoint, - tags = I.extractTags parseResult.tags, - markers = markers, - commentStyle = commentStyle, - closed = False - } - ) - <$> I.parse I.issueMarkers (D.commonmarkToNode [] rawText) - where - (commentStyle, rawText) = G.uncomment comment.language comment.text - -propagateIssues :: M.Map T.Text I.Issue -> Scramble -> M.Map T.Text I.Issue -propagateIssues oldIssues scramble = - M.mergeWithKey - ( \_ old new -> - Just $ - new - { I.provenance = - I.Provenance - { first = old.provenance.first, - last = - if ((/=) `on` (.rawTextHash)) old new - then new.provenance.last - else old.provenance.last - }, - I.closed = False - } - ) - ( M.map - ( \old -> - if M.member old.id scramble.issues - || not (old.file `elem` scramble.filesChanged) - then old - else old {I.closed = True} - ) - ) - id - oldIssues - scramble.issues - -propagateIssueEvents :: [IssueEvent] -> M.Map T.Text I.Issue -> CommitHash -> M.Map T.Text I.Issue -> [IssueEvent] -propagateIssueEvents oldIssueEvents oldIssues commitHash issues = - oldIssueEvents ++ newIssueEvents oldIssues commitHash issues - -newIssueEvents :: M.Map T.Text I.Issue -> CommitHash -> M.Map T.Text I.Issue -> [IssueEvent] -newIssueEvents oldIssues' commitHash issues' = - concat - [ [ IssueCreated commitHash issue - | issue <- M.elems (issues `M.difference` oldIssues) - ], - [ IssueChanged commitHash oldIssue newIssue - | (newIssue, oldIssue) <- M.elems (M.intersectionWith (,) issues oldIssues), - ((/=) `on` (.rawTextHash)) newIssue oldIssue - ], - [ IssueDeleted commitHash issue {I.closed = True} - | issue <- M.elems (oldIssues `M.difference` issues) - ] - ] - where - issues = M.filter (not . (.closed)) issues' - oldIssues = M.filter (not . (.closed)) oldIssues' diff --git a/app/History/IssueEvents.hs b/app/History/IssueEvents.hs new file mode 100644 index 0000000..176d660 --- /dev/null +++ b/app/History/IssueEvents.hs @@ -0,0 +1,183 @@ +module History.IssueEvents + ( IssueEvents (..), + ) +where + +import Data.Binary (Binary) +import Data.Function (on) +import Data.List +import Data.List.NonEmpty qualified as NE +import Data.Map qualified as M +import Data.Maybe (fromMaybe) +import Data.Ord (comparing) +import Data.Proxy (Proxy) +import GHC.Generics (Generic) +import Git qualified +import History.Plan (Id, Planable, Proto, assume, propagate, protoOf) +import History.Scramble (Scramble (..), getIssuesOfFile) +import Issue (Issue (..)) +import IssueEvent (IssueEvent (..)) +import IssueEvent qualified as E + +data IssueEvents = IssueEvents + { commitHash :: Git.CommitHash, + issueEvents :: [E.IssueEvent] + } + deriving (Show, Generic, Binary) + +instance Planable IssueEvents where + type Id IssueEvents = Git.CommitHash + type Proto IssueEvents = Scramble + protoOf :: Proxy IssueEvents -> Git.CommitHash -> IO Scramble + protoOf _ commitHash@Git.WorkingTree = do + filesChanged <- Git.getFilesOf commitHash + issues <- concat <$> mapM (getIssuesOfFile commitHash) filesChanged + pure $ + Scramble + { issues = + M.unions + [ M.singleton issue.id issue | issue <- issues + ], + .. + } + protoOf _ commitHash@(Git.Commit _) = do + filesChanged <- Git.getChangedFilesOf commitHash + issues <- concat <$> mapM (getIssuesOfFile commitHash) filesChanged + pure $ + Scramble + { issues = + M.unions + [ M.singleton issue.id issue | issue <- issues + ], + .. + } + + assume :: Scramble -> IssueEvents + assume Scramble {..} = + IssueEvents + { issueEvents = + [ IssueCreated commitHash issue + | issue <- M.elems issues + ], + .. + } + + propagate :: + [Git.CommitHash] -> + IssueEvents -> + Scramble -> + IssueEvents + propagate = propagateIssueEvents + +-- | Propagates issue events from a top commit to a consecutive bottom commit. +-- +-- Because we are building the history of issue events backwards, we use *top* and *bottom* to refer to the later and earlier commit within the Git history, respectively. We use *earlier* and *later* to refer to the top and bottom commits within the application of this function. In particular, top commits are processed earlier, and bottom commits are processed later. +-- +-- For the top/earlier commit, issue events are known. For the bottom/later commit, only the scramble is known. +-- +-- The scramble is said to *witness* an issue, if its changed files contain the issue's file. Thus, in addition to the issues a scramble contains, it witnesses the issues that have been deleted in the scramble's commit. +-- +-- Because we are building the history from top/earlier to bottom/later commits, we have to assume that at any top/earlier commit, the issues present have been created in that commit. This function advances the issue's original commit as we learn about bottom/later commits from scrambles. +-- +-- A scramble *advances* an issue's original commit if contains it or does not witness it. In particular, a scramble does NOT advance an issue's original commit, if it *just witnesses* it (ie. witnesses it, but does not contain it). To understand this, consider the following scenarios (A) and (B). Suppose the top/ earlier commit is `commit-c`, and the bottom/ later commit is `commit-a`. Between them, the commit `commit-b` is intermediary. Suppose for scenario (A) that `issue-1` is created in `commit-a`, deleted in `commit-b` and re-opened in `commit-c`. Suppose for scenario (B) that `issue-1` is originally created in `commit-c`. Then, at `commit-b` the following holds true: +-- +-- - The issue `issue-1` has been created in `commit-c`, because it is present. +-- - The issue `issue-1 is not present in `commit-b`. +-- - The scramble of `commit-b` can witness `issue-1` in both scenarios (A), (B) even though it does not contain it. +-- +-- Thus, we cannot decide at `commit-b` whether `issue-1` has been initially created in the top/ earlier commit `commit-c`, or whether is has been re-opened in `commit-c`. We need the information of presence in `commit-a` to decide this. +-- +-- The most confusing edge case it the re-opening of issues. Suppose `issue-1` has been created in `commit-a`, deleted in `commit-b` and re-opened in `commit-c`. Contrast this with the scenario that `issue-1` has initially been created in `commit-c`. Observe the following as we propagate events from `commmit-c` to `commit-a`: +-- +-- - At `commit-c`: `issue-1` is present in both scenarios, thus it is initially tracked as "created" in `commit-c`. +-- - At `commit-b`: `issue-1` is NOT present in both scenarios. Note that the scramble can witness `issue-1` in either cases. +-- - At `commit-a`: `issue-1` is present in the first scenario, but not in the second. +-- +-- So, in the case `issue-1` has been re-opened, we cannot track its deletion at `commit-b`, because whether it was re-opened or originally created at `commit-b` depends on whether `issue-1` is present in the bottom `commit-a`, that we process only later. Thus, the scramble of commit `commit-b` cannot safely advance the issue's original commit, and we use this information to track re-opening of commits at later commits. +-- +-- Note that in the whole process, issue change events and issue deletion events can never be bottom-most/latest events, as they would depend on information not yet known, ie. the first commit can neither change nor delete an issue. +propagateIssueEvents :: [Git.CommitHash] -> IssueEvents -> Scramble -> IssueEvents +propagateIssueEvents log topIssueEvents bottomScramble = + IssueEvents + { commitHash = bottomScramble.commitHash, + issueEvents = + sortOn logOrder $ + let issueEventsPerIssue = + groupPerIssue topIssueEvents.issueEvents + in concat + [ -- CASE 1. The issue is present in the top/earlier history and bottom/later scramble. We can safely advance the issue's original commit. + concat + [ NE.init issueEvents ++ case NE.last issueEvents of + (IssueCreated commitHash issue) -> + if commitHash /= topCommitHash + then + [ IssueCreated commitHash issue, + IssueDeleted topCommitHash issue, + IssueCreated bottomCommitHash bottomIssue + ] + else + concat + [ if topIssue `neq` bottomIssue + then [IssueChanged commitHash bottomIssue issue] + else [], + [IssueCreated bottomCommitHash bottomIssue] + ] + _ -> error "bottom issues can only be created" + | issueEvents <- issueEventsPerIssue, + let topIssue = issue (NE.last issueEvents), + bottomIssue <- bottomIssues, + bottomIssue `sym` topIssue + ], + concat + [ -- CASE 2. The issue is present in the top/earlier history, not contained in the bottom/later scramble, but witnessed by the bottom/later scramble. We cannot safely advance the issue's original commit. + NE.init issueEvents ++ case NE.last issueEvents of + (IssueCreated commitHash issue) -> [IssueCreated commitHash issue] + _ -> error "bottom issues can only be created" + | issueEvents <- issueEventsPerIssue, + let topIssue = issue (NE.last issueEvents), + all (`nsym` topIssue) bottomIssues, + topIssue.file `elem` bottomScramble.filesChanged + ], + concat + [ -- CASE 3. The issue is present in the top/earlier history, but not witnessed by the bottom/later scramble. It is safe to advance the issue's commit hash. + NE.init issueEvents ++ case NE.last issueEvents of + (IssueCreated _ issue) -> [IssueCreated bottomCommitHash issue] + _ -> error "bottom issues can only be created" + | issueEvents <- issueEventsPerIssue, + let topIssue = issue (NE.last issueEvents), + all (`nsym` topIssue) bottomIssues, + not (topIssue.file `elem` bottomScramble.filesChanged) + ], + concat + [ -- CASE 4. The issue it not present in the top/earlier history, but contained in the bottom scramble. It had to be deleted by the top/ earlier commit. + [ IssueDeleted topIssueEvents.commitHash bottomIssue, + IssueCreated bottomCommitHash bottomIssue + ] + | bottomIssue <- bottomIssues, + all ((`nsym` bottomIssue) . issue . NE.last) issueEventsPerIssue + ] + ] + } + where + groupPerIssue = + map (NE.sortBy (comparing logOrder)) . NE.groupBy (sym `on` issue) . sortOn ((.id) . issue) + + topCommitHash = topIssueEvents.commitHash + bottomCommitHash = bottomScramble.commitHash + + bottomIssues = M.elems bottomScramble.issues + + neq, sym, nsym :: Issue -> Issue -> Bool + sym = (==) `on` (.id) + nsym = (/=) `on` (.id) + neq = (/=) `on` (.rawTextHash) + + commitHash (IssueChanged commitHash _ _) = commitHash + commitHash (IssueCreated commitHash _) = commitHash + commitHash (IssueDeleted commitHash _) = commitHash + + issue (IssueChanged _ _ issue) = issue + issue (IssueCreated _ issue) = issue + issue (IssueDeleted _ issue) = issue + + logOrder = fromMaybe (-1) . (`elemIndex` log) . commitHash diff --git a/app/History/Issues.hs b/app/History/Issues.hs new file mode 100644 index 0000000..08ad772 --- /dev/null +++ b/app/History/Issues.hs @@ -0,0 +1,72 @@ +module History.Issues + ( Issues (..), + ) +where + +import Data.Binary (Binary) +import Data.Function (on) +import Data.Map qualified as M +import Data.Proxy (Proxy) +import Data.Text qualified as T +import GHC.Generics (Generic) +import Git qualified +import History.Plan (Id, Planable, Proto, assume, propagate, protoOf) +import History.Scramble (Scramble (..), getIssuesOfFile) +import Issue qualified as I + +data Issues = Issues + { commitHash :: Git.CommitHash, + issues :: M.Map T.Text I.Issue + } + deriving (Show, Generic, Binary) + +instance Planable Issues where + type Id Issues = Git.CommitHash + type Proto Issues = Scramble + protoOf :: Proxy Issues -> Git.CommitHash -> IO Scramble + protoOf _ commitHash = do + filesChanged <- Git.getChangedFilesOf commitHash + issues <- concat <$> mapM (getIssuesOfFile commitHash) filesChanged + pure $ + Scramble + { issues = + M.unions + [ M.singleton issue.id issue | issue <- issues + ], + .. + } + + assume :: Scramble -> Issues + assume (Scramble {..}) = Issues {..} + + propagate :: [Id Issues] -> Issues -> Scramble -> Issues + propagate _ topIssues bottomScramble = + Issues + { commitHash = topIssues.commitHash, + issues = + M.mergeWithKey + ( \_ topIssue bottomIssue -> + Just $ + topIssue + { I.provenance = + I.Provenance + { first = bottomIssue.provenance.first, + last = + if ((/=) `on` (.rawTextHash)) topIssue bottomIssue + then topIssue.provenance.last + else bottomIssue.provenance.last + } + } + ) + ( \topIssues -> topIssues + ) + ( \bottomIssues -> + M.map + ( \bottomIssue -> + bottomIssue {I.closed = True} + ) + bottomIssues + ) + topIssues.issues + bottomScramble.issues + } diff --git a/app/History/Plan.hs b/app/History/Plan.hs new file mode 100644 index 0000000..3dec391 --- /dev/null +++ b/app/History/Plan.hs @@ -0,0 +1,207 @@ +module History.Plan + ( Planable (..), + formulate, + realise, + ) +where + +import Control.Concurrent (forkIO, killThread) +import Control.Concurrent.STM (TQueue, TVar, atomically, newTQueueIO, newTVarIO, readTQueue, readTVar, retry, writeTQueue, writeTVar) +import Control.Monad (forever) +import Data.Kind (Type) +import Data.List (intercalate) +import Data.List.NonEmpty qualified as NE +import Data.Map qualified as M +import Data.Proxy (Proxy (Proxy)) +import Data.Time.Clock +import Parallel (parMapM_) +import System.IO (hFlush, hPutStr, stderr, stdout) +import System.Time.Monotonic (Clock, clockGetTime, newClock) +import Text.Printf (printf) + +-- | Left-associative monadic fold of a structure with automatic parallelism, status-reporting and intermittent results (cacelability). +-- +-- Semantically, `realise (formulate xs)` is equivalent to +-- +-- ```haskell +-- \(x:xs) -> foldM propagate (assume <$> protoOf x) =<< mapM protoOf xs +-- ``` +-- +-- However, this module provides the following features: +-- +-- - `protoOf` and `propagate` are computed in parallel, +-- - `propagate` is scheduled to terminate as soon as possible, +-- +-- This makes it possible to scale the computation across multiple cores, but still interrupt the process for intermittent results. +-- +-- Additionally, a progress report is presented on stdout. +class Planable output where + type Id output :: Type + type Proto output :: Type + protoOf :: Proxy output -> Id output -> IO (Proto output) + assume :: Proto output -> output + propagate :: [Id output] -> output -> Proto output -> output + +data Plan output = Plan (NE.NonEmpty (Id output)) [Task output] + +data Task output + = Compute (Id output) + | Propagate (Maybe (Id output)) (Id output) + +isCompute, isPropagate :: Task output -> Bool +isCompute (Compute _) = True +isCompute _ = False +isPropagate (Propagate _ _) = True +isPropagate _ = False + +formulate :: + Planable output => + Proxy output -> + NE.NonEmpty (Id output) -> + Plan output +formulate _ ids@(NE.uncons -> (id, (maybe [] NE.toList -> rest))) = + Plan ids $ + Compute id + : Propagate Nothing id + : concat + ( zipWith + ( \id id' -> + [ Compute id, + Propagate id' id + ] + ) + rest + (map Just (id : rest)) + ) + +data State output = State + { tasks :: [Task output], + protos :: M.Map (Id output) (Proto output), + outputs :: M.Map (Id output) output, + elapsed :: Clock + } + +realise :: (Ord (Id output), Planable output) => Plan output -> IO output +realise plan@(Plan ids tasks) = do + elapsed <- newClock + let state0 = + State + { tasks = tasks, + protos = M.empty, + outputs = M.empty, + elapsed = elapsed + } + stateT <- newTVarIO state0 + statusT <- newTQueueIO + tid <- forkIO $ forever do + state <- atomically (readTQueue statusT) + hPutStr stderr . printStatus =<< status state0 state + hFlush stdout + parMapM_ (step Proxy plan statusT stateT) tasks + let id = NE.last ids + output <- atomically $ do + maybe retry pure . M.lookup id . (.outputs) =<< readTVar stateT + killThread tid + pure output + +step :: + (Ord (Id output), Planable output) => + Proxy output -> + Plan output -> + TQueue (State output) -> + TVar (State output) -> + Task output -> + IO () +step proxy _ statusT stateT (Compute id) = do + proto <- protoOf proxy id + atomically do + state <- readTVar stateT + let state' = state {protos = M.insert id proto state.protos} + writeTVar stateT state' + writeTQueue statusT state' + pure () +step _ (Plan ids _) statusT stateT (Propagate (Just id') id) = do + (output, proto) <- atomically do + state <- readTVar stateT + output <- maybe retry pure (M.lookup id' state.outputs) + proto <- maybe retry pure (M.lookup id state.protos) + pure (output, proto) + let output' = propagate (NE.toList ids) output proto + atomically do + state <- readTVar stateT + let state' = state {outputs = M.insert id output' state.outputs} + writeTVar stateT state' + writeTQueue statusT state' +step _ _ statusT stateT (Propagate Nothing id) = do + proto <- atomically do + state <- readTVar stateT + maybe retry pure (M.lookup id state.protos) + let output = assume proto + atomically do + state <- readTVar stateT + let state' = state {outputs = M.insert id output state.outputs} + writeTVar stateT state' + writeTQueue statusT state' + +data Status = Status + { numTasks :: Progress Int, + numProtos :: Progress Int, + numOutputs :: Progress Int, + elapsed :: DiffTime + } + deriving (Show, Eq) + +data Progress a = Progress + { total :: a, + completed :: a + } + deriving (Show, Eq) + +status :: State output -> State output -> IO Status +status state0 state = do + let totalTasks = length state0.tasks + totalProtos = length (filter isCompute state0.tasks) + totalOutputs = length (filter isPropagate state0.tasks) + completedTasks = completedProtos + completedOutputs + completedProtos = M.size state.protos + completedOutputs = M.size state.outputs + elapsed <- clockGetTime state.elapsed + pure + Status + { numTasks = Progress totalTasks completedTasks, + numProtos = Progress totalProtos completedProtos, + numOutputs = Progress totalOutputs completedOutputs, + elapsed = elapsed + } + +printStatus :: Status -> String +printStatus Status {..} = do + let formatProgress completed total = pad total completed <> "/" <> total + pad total completed = replicate (length total - length completed) ' ' <> completed + eta = + formatEta + ( (fromIntegral numTasks.total * (realToFrac elapsed)) + / fromIntegral numTasks.completed + ) + (<> "\r") . intercalate " " $ + [ formatProgress (show numTasks.completed) (show numTasks.total), + formatProgress (show numProtos.completed) (show numProtos.total), + formatProgress (show numOutputs.completed) (show numOutputs.total), + "ETA " <> eta + ] + +formatEta :: Double -> String +formatEta s = do + if s < 60 + then printf "%.1fs" s + else do + let m = s / 60 + if m < 60 + then printf "%.1fm" m + else do + let h = m / 60 + if h < 24 + then printf "%.1hf" h + else do + let d = h / 24 + printf "%.1fd" d diff --git a/app/History/Scramble.hs b/app/History/Scramble.hs new file mode 100644 index 0000000..9004dbf --- /dev/null +++ b/app/History/Scramble.hs @@ -0,0 +1,76 @@ +module History.Scramble + ( Scramble (..), + getIssuesOfFile, + fromComment, + ) +where + +import CMark qualified as D +import Comment qualified as G +import Control.Exception (Handler (..), catches) +import Data.Binary (Binary) +import Data.Digest.Pure.SHA qualified as S +import Data.Map qualified as M +import Data.Maybe (catMaybes) +import Data.Text qualified as T +import Data.Text.Lazy qualified as LT +import Data.Text.Lazy.Encoding qualified as LT +import Exception qualified as E +import GHC.Generics (Generic) +import Git qualified +import Issue qualified as I +import Issue.Parser qualified as I +import Issue.Tag qualified as I +import Issue.Text qualified as I +import Parallel (parMapM) +import Render qualified as P + +-- | `Scramble` records the complete issues ONLY in files that have +-- been changed in the commit. +data Scramble = Scramble + { commitHash :: Git.CommitHash, + filesChanged :: [FilePath], + issues :: M.Map T.Text I.Issue + } + deriving (Show, Binary, Generic) + +-- | Get all issues in the given directory and file. +getIssuesOfFile :: Git.CommitHash -> FilePath -> IO [I.Issue] +getIssuesOfFile commitHash filename = + ( fmap catMaybes . parMapM (fromComment commitHash) + =<< G.getComments commitHash filename + ) + `catches` [ Handler \(_ :: E.UnknownFile) -> pure [], + Handler \(_ :: E.UnsupportedLanguage) -> pure [] + ] + +-- | Note that `provenance` is trivial and needs to be fixed up later. +fromComment :: Git.CommitHash -> G.Comment -> IO (Maybe I.Issue) +fromComment commitHash comment = do + commit <- Git.getCommitOf commitHash + let provenance = I.Provenance commit commit + + pure $ + ( \parseResult -> + let (markers, title) = + I.stripIssueMarkers (T.pack (show (P.render parseResult.heading))) + in I.Issue + { commitHash = commitHash, + language = comment.language, + rawTextHash = S.sha1 (LT.encodeUtf8 (LT.fromStrict rawText)), + title = title, + file = comment.filePath, + provenance = provenance, + startByte = comment.startByte, + endByte = comment.endByte, + startPoint = comment.startPoint, + endPoint = comment.endPoint, + tags = I.extractTags parseResult.tags, + markers = markers, + commentStyle = commentStyle, + closed = False + } + ) + <$> I.parse I.issueMarkers (D.commonmarkToNode [] rawText) + where + (commentStyle, rawText) = G.uncomment comment.language comment.text diff --git a/app/Issue.hs b/app/Issue.hs index 2743f45..b6ddad6 100644 --- a/app/Issue.hs +++ b/app/Issue.hs @@ -25,7 +25,7 @@ import Data.Time.Clock (UTCTime (utctDay)) import GHC.Generics (Generic) import GHC.Records (HasField (..)) import Git (Author (..), Commit (..)) -import Git qualified as Git +import Git qualified import Issue.Parser qualified as I import Issue.Provenance (Provenance (..)) import Issue.Tag (Tag (..)) diff --git a/app/Main.hs b/app/Main.hs index f9fedea..1a1bf6b 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -654,7 +654,7 @@ main = do putDoc colorize noPager width . (P.vsep . intersperse "") $ map (if detailed then (P.render . P.Detailed) else (P.render . P.Summarized)) issues Options {colorize, noPager, width, command = Log {patch}} -> do - es <- reverse . (.issueEvents) <$> H.getIssueEvents + es <- (.issueEvents) <$> H.getIssueEvents putDoc colorize noPager width $ if patch then P.vsep . intersperse P.emptyDoc $ map (P.render . P.Detailed) es diff --git a/app/Parallel.hs b/app/Parallel.hs index 1687364..e590164 100644 --- a/app/Parallel.hs +++ b/app/Parallel.hs @@ -1,6 +1,6 @@ -module Parallel (parMapM, parSequence) where +module Parallel (parMapM, parMapM_, parSequence) where -import Control.Concurrent.ParallelIO.Local (parallel, withPool) +import Control.Concurrent.ParallelIO.Local (parallel, parallel_, withPool) import GHC.Conc (getNumProcessors) parMapM :: (a -> IO b) -> [a] -> IO [b] @@ -8,6 +8,11 @@ parMapM f xs = do n <- getNumProcessors withPool n $ \pool -> parallel pool (map f xs) +parMapM_ :: (a -> IO ()) -> [a] -> IO () +parMapM_ f xs = do + n <- getNumProcessors + withPool n $ \pool -> parallel_ pool (map f xs) + parSequence :: [IO a] -> IO [a] parSequence xs = do n <- getNumProcessors diff --git a/default.nix b/default.nix index 4caf76a..0d16bcb 100644 --- a/default.nix +++ b/default.nix @@ -33,7 +33,12 @@ (self: super: { anissue = pkgs.writers.writeDashBin "anissue" '' set -efu - exec cabal run anissue -- "$@" + cwd=$PWD + cd ${self.lib.escapeShellArg (toString ./.)} + cabal build anissue + anissue=$(find dist-newstyle -type f -executable -name anissue -exec realpath '{}' \; | head -1) + cd $cwd + exec $anissue "$@" ''; }) ]; @@ -48,6 +53,7 @@ let ./diff-parse.patch; lingo = pkgs.haskell.lib.doJailbreak (pkgs.haskell.lib.markUnbroken super.lingo); + sh = pkgs.haskell.lib.dontCheck (super.callCabal2nix "sh" (import ./nix/sources.nix).sh { }); anissue = (super.callCabal2nix "anissue" ./. ({ inherit (pkgs) tree-sitter; } // pkgs.lib.filterAttrs (_: pkgs.lib.isDerivation) diff --git a/nix/sources.json b/nix/sources.json index 926d13a..d08a31b 100644 --- a/nix/sources.json +++ b/nix/sources.json @@ -10,5 +10,11 @@ "type": "tarball", "url": "https://github.com/NixOS/nixpkgs/archive/70bdadeb94ffc8806c0570eb5c2695ad29f0e421.tar.gz", "url_template": "https://github.com///archive/.tar.gz" + }, + "sh": { + "branch": "main", + "repo": "git@code.nomath.org:~/sh", + "rev": "3ea4e6459333409c60f66a5745bb472d136da741", + "type": "git" } } diff --git a/test/Main.hs b/test/Main.hs new file mode 100644 index 0000000..ae11911 --- /dev/null +++ b/test/Main.hs @@ -0,0 +1,85 @@ +module Main (main) where + +import Process.Shell (sh) +import Test.Hspec (describe, hspec, it, shouldBe) + +main :: IO () +main = do + anissue :: String <- [sh|realpath dist-newstyle/build/*/*/anissue-*/x/anissue/build/anissue|] + hspec do + describe "issue events" do + it "smoke" do + ( `shouldBe` + concat + [ "df7c026 created issue 1\n", + "55b493c deleted issue 1\n", + "618907d changed issue 1\n", + "618907d created issue 2\n", + "e95b4ec created issue 1" + ] + ) + . fst @String @() + =<< [sh| +set -efu +PATH=#{anissue}${PATH+:$PATH}; export PATH +readonly tmp=$(mktemp -d) +trap 'rm -rf "$tmp"' EXIT +cd "$tmp" + +GIT_AUTHOR_DATE="Thu Jan 1 00:00:00 1970 +0000"; export GIT_AUTHOR_DATE +GIT_AUTHOR_EMAIL="jane@example.com"; export GIT_AUTHOR_EMAIL +GIT_AUTHOR_NAME="Jane Doe"; export GIT_AUTHOR_NAME +GIT_COMMITTER_DATE="Thu Jan 1 00:00:00 1970 +0000"; export GIT_COMMITTER_DATE +GIT_COMMITTER_EMAIL="jane@example.com"; export GIT_COMMITTER_EMAIL +GIT_COMMITTER_NAME="Jane Doe"; export GIT_COMMITTER_NAME + +( +git init --initial-branch=main + +cat >main.hs <<'EOF' +module Main where +EOF +git add main.hs +git commit -m 'no issues' + +cat >main.hs <<'EOF' +module Main where + +-- TODO issue 1 +EOF +git add main.hs +git commit -m 'create issue 1' + +cat >main.hs <<'EOF' +module Main where + +-- TODO issue 2 +EOF +cat >lib.hs <<'EOF' +module Main where + +-- TODO issue 1 +-- +-- remark: file modifications are not considered changes! +EOF +git add main.hs lib.hs +git commit -m 'create issue 2, modify issue 1' + +cat >lib.hs <<'EOF' +module Main where +EOF +git add lib.hs +git commit -m 'close issue 1' + +cat >lib.hs <<'EOF' +module Main where + +-- TODO issue 1 +-- +-- remark: file changes are not considered changes! +EOF +git add main.hs lib.hs +git commit -m 'reopen issue 1' +) 1>/dev/null + +anissue log|] -- cgit v1.2.3