aboutsummaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
Diffstat (limited to 'app')
-rw-r--r--app/Cache.hs16
-rw-r--r--app/Git.hs41
-rw-r--r--app/Git/CommitHash.hs2
-rw-r--r--app/History.hs217
-rw-r--r--app/History/IssueEvents.hs183
-rw-r--r--app/History/Issues.hs72
-rw-r--r--app/History/Plan.hs207
-rw-r--r--app/History/Scramble.hs76
-rw-r--r--app/Issue.hs2
-rw-r--r--app/Main.hs2
-rw-r--r--app/Parallel.hs9
11 files changed, 612 insertions, 215 deletions
diff --git a/app/Cache.hs b/app/Cache.hs
index 7af9ee7..4540fa4 100644
--- a/app/Cache.hs
+++ b/app/Cache.hs
@@ -4,6 +4,22 @@ module Cache
)
where
+-- TODO Reduce cached data size
+--
+-- Right now we are caching complete `Issue` instances, which
+-- contain the full issue title and description. For a fast
+-- lookup it may already be enough to only store the issue's
+--
+-- \* filename
+-- \* start position
+-- \* end position
+--
+-- With this information we can use git to quickly look up the
+-- complete issue text and parse it.
+--
+-- @topic caching
+-- @backlog
+
import Data.Binary (Binary, decodeFileOrFail, encodeFile)
import Data.Text qualified as T
import Git qualified
diff --git a/app/Git.hs b/app/Git.hs
index 65ecf89..25c9149 100644
--- a/app/Git.hs
+++ b/app/Git.hs
@@ -2,6 +2,7 @@ module Git
( module Git.CommitHash,
getCommitHashes,
getRootDir,
+ getFilesOf,
getChangedFilesOf,
Commit (..),
Author (..),
@@ -14,14 +15,13 @@ module Git
)
where
-import Control.Exception (IOException, catch, throw, throwIO)
+import Control.Exception (IOException, catch, throwIO)
import Data.Binary (Binary)
import Data.Binary.Instances ()
import Data.ByteString.Lazy qualified as LB
-import Data.List.NonEmpty (NonEmpty)
-import Data.List.NonEmpty qualified as N
import Data.Maybe (fromMaybe)
import Data.Text qualified as T
+import Data.Text.Encoding qualified as T
import Data.Text.Lazy qualified as LT
import Data.Text.Lazy.Encoding qualified as LT
import Data.Text.Lazy.IO qualified as LT
@@ -33,8 +33,25 @@ import Patch qualified as A
import Process (proc, sh, sh_)
import Text.Printf (printf)
-getCommitHashes :: IO (NonEmpty T.Text)
-getCommitHashes = fromMaybe (throw E.NoCommits) . N.nonEmpty . reverse . T.lines <$> sh "git log --format=%H"
+getCommitHashes :: Maybe CommitHash -> Maybe CommitHash -> IO [CommitHash]
+getCommitHashes maybeBottomCommit Nothing =
+ getCommitHashes maybeBottomCommit (Just WorkingTree)
+getCommitHashes (Just WorkingTree) (Just WorkingTree) =
+ pure [WorkingTree]
+getCommitHashes (Just WorkingTree) (Just (Commit _)) =
+ pure []
+getCommitHashes Nothing (Just WorkingTree) =
+ (WorkingTree :) . map Commit . T.lines
+ <$> sh (proc "git log --format=%%H HEAD")
+getCommitHashes (Just (Commit bottomHash)) (Just WorkingTree) =
+ (WorkingTree :) . map Commit . T.lines
+ <$> sh (proc "git log --format=%%H %..HEAD" bottomHash)
+getCommitHashes Nothing (Just (Commit topHash)) =
+ map Commit . T.lines
+ <$> sh (proc "git log --format=%%H %" topHash)
+getCommitHashes (Just (Commit bottomHash)) (Just (Commit topHash)) =
+ map Commit . T.lines
+ <$> sh (proc "git log --format=%%H %..%" bottomHash topHash)
getRootDir :: IO FilePath
getRootDir =
@@ -43,13 +60,21 @@ getRootDir =
where
stripTrailingNL s = fromMaybe s $ T.stripSuffix "\n" s
+getFilesOf :: CommitHash -> IO [FilePath]
+getFilesOf WorkingTree =
+ map T.unpack . T.lines
+ <$> sh "git ls-files --cached --modified --others --exclude-standard --full-name"
+getFilesOf (Commit hash) =
+ map T.unpack . T.lines
+ <$> sh (proc "git ls-tree -r --name-only --full-name --full-tree %" hash)
+
getChangedFilesOf :: CommitHash -> IO [FilePath]
getChangedFilesOf WorkingTree =
map T.unpack . T.lines
- <$> sh "git ls-files --modified"
-getChangedFilesOf (Commit hash) =
+ <$> sh "git ls-files --modified --others --exclude-standard --full-name"
+getChangedFilesOf (Commit hash) = do
map T.unpack . T.lines
- <$> sh (proc "git show -p --name-only --format= %" hash)
+ <$> sh (proc "git diff-tree -r --name-only %" hash)
data Commit = Commit'
{ commitHash :: CommitHash,
diff --git a/app/Git/CommitHash.hs b/app/Git/CommitHash.hs
index 0caecf4..f791af8 100644
--- a/app/Git/CommitHash.hs
+++ b/app/Git/CommitHash.hs
@@ -15,7 +15,7 @@ import Render qualified as P
data CommitHash
= WorkingTree
| Commit T.Text
- deriving (Eq, Show, Binary, Generic)
+ deriving (Eq, Ord, Show, Binary, Generic)
toShortText :: CommitHash -> Maybe T.Text
toShortText = fmap (T.take 7) . toText
diff --git a/app/History.hs b/app/History.hs
index c0427fa..d423907 100644
--- a/app/History.hs
+++ b/app/History.hs
@@ -1,143 +1,38 @@
module History
( Issues (..),
- getIssues,
IssueEvents (..),
+ getIssues,
getIssueEvents,
getIssuesOfFile,
)
where
-import CMark qualified as D
-import Cache (cachedMaybe)
import Comment qualified as G
-import Control.Arrow (first)
-import Control.Exception (Handler (..), catch, catches, try)
-import Data.Binary (Binary)
-import Data.ByteString.Lazy qualified as LB
-import Data.Digest.Pure.SHA qualified as S
-import Data.Function (on)
-import Data.Map qualified as M
+import Control.Exception (Handler (..), catches)
+import Data.List.NonEmpty qualified as NE
import Data.Maybe (catMaybes)
-import Data.Text qualified as T
-import Data.Text.Encoding qualified as T
-import Data.Text.Lazy qualified as LT
-import Data.Text.Lazy.Encoding qualified as LT
-import Die (die)
+import Data.Proxy (Proxy (Proxy))
import Exception qualified as E
-import GHC.Generics (Generic)
import Git qualified
-import Git.CommitHash (CommitHash (..))
-import Git.CommitHash qualified as C
+import History.IssueEvents (IssueEvents (..))
+import History.Issues (Issues (..))
+import History.Plan (formulate, realise)
+import History.Scramble (fromComment)
import Issue qualified as I
-import Issue.Parser qualified as I
-import Issue.Tag qualified as I
-import Issue.Text qualified as I
-import IssueEvent (IssueEvent (..))
import Parallel (parMapM)
-import Process (proc, sh)
-import Render qualified as P
-import Tuple ()
-
--- TODO Reduce cached data size
---
--- Right now we are caching complete `Issue` instances, which
--- contain the full issue title and description. For a fast
--- lookup it may already be enough to only store the issue's
---
--- \* filename
--- \* start position
--- \* end position
---
--- With this information we can use git to quickly look up the
--- complete issue text and parse it.
---
--- @topic caching
--- @backlog
-
-data Issues = Issues
- { commitHash :: CommitHash,
- issues :: M.Map T.Text I.Issue
- }
- deriving (Show, Generic, Binary)
getIssues :: IO Issues
-getIssues = getIssuesOf WorkingTree
-
-getIssuesOf :: CommitHash -> IO Issues
-getIssuesOf commitHash = cachedMaybe (fmap ("issues-" <>) (C.toText commitHash)) do
- maybeParentCommitHash <- getParentCommitHashOf commitHash
- case maybeParentCommitHash of
- Just parentCommitHash -> do
- oldIssues <- (.issues) <$> getIssuesOf parentCommitHash
- scramble <- getScrambleOf commitHash
- let issues = propagateIssues oldIssues scramble
- pure Issues {..}
- Nothing -> do
- scramble <- getScrambleOf commitHash
- let issues = scramble.issues
- pure Issues {..}
-
-data IssueEvents = IssueEvents
- { commitHash :: CommitHash,
- issueEvents :: [IssueEvent]
- }
- deriving (Show, Generic, Binary)
+getIssues =
+ realise . (formulate Proxy) . NE.fromList
+ =<< Git.getCommitHashes Nothing (Just Git.WorkingTree)
getIssueEvents :: IO IssueEvents
-getIssueEvents = getIssueEventsOf WorkingTree
-
-getIssueEventsOf :: CommitHash -> IO IssueEvents
-getIssueEventsOf commitHash = cachedMaybe (fmap ("events-" <>) (C.toText commitHash)) do
- maybeParentCommitHash <- getParentCommitHashOf commitHash
- case maybeParentCommitHash of
- Just parentCommitHash -> do
- oldIssues <- (.issues) <$> getIssuesOf parentCommitHash
- issues <- (.issues) <$> getIssuesOf commitHash
- oldIssueEvents <- (.issueEvents) <$> getIssueEventsOf parentCommitHash
- let issueEvents = propagateIssueEvents oldIssueEvents oldIssues commitHash issues
- pure IssueEvents {..}
- Nothing -> do
- scramble <- getScrambleOf commitHash
- let issueEvents = propagateIssueEvents [] M.empty commitHash scramble.issues
- pure IssueEvents {..}
-
-getParentCommitHashOf :: CommitHash -> IO (Maybe CommitHash)
-getParentCommitHashOf commitHash =
- either
- (\_ -> Nothing)
- (Just . Commit . T.strip . T.decodeUtf8 . LB.toStrict)
- <$> try @E.ProcessException
- ( case commitHash of
- WorkingTree -> sh "git show -s --format=%H HEAD"
- Commit hash -> sh (proc "git show -s --format=%%H %^" hash)
- )
-
--- | `Scramble` records the complete issues ONLY in files that have
--- been changed in the commit.
-data Scramble = Scramble
- { commitHash :: CommitHash,
- filesChanged :: [FilePath],
- issues :: M.Map T.Text I.Issue
- }
- deriving (Show, Binary, Generic)
-
-getScrambleOf :: CommitHash -> IO Scramble
-getScrambleOf commitHash = do
- (issues, filesChanged) <- first (M.fromList . map (\i -> (i.id, i))) <$> getIssuesAndFilesChanged commitHash
- pure $ Scramble {..}
-
-getIssuesAndFilesChanged :: CommitHash -> IO ([I.Issue], [FilePath])
-getIssuesAndFilesChanged commitHash = do
- files <- Git.getChangedFilesOf commitHash
- issues <-
- concat
- <$> catch
- (parMapM (getIssuesOfFile commitHash) files)
- (\(e :: E.InvalidTreeGrepperResult) -> die (show e))
- pure (issues, files)
+getIssueEvents =
+ realise . (formulate Proxy) . NE.fromList
+ =<< Git.getCommitHashes Nothing (Just Git.WorkingTree)
-- | Get all issues in the given directory and file.
-getIssuesOfFile :: CommitHash -> FilePath -> IO [I.Issue]
+getIssuesOfFile :: Git.CommitHash -> FilePath -> IO [I.Issue]
getIssuesOfFile commitHash filename =
( fmap catMaybes . parMapM (fromComment commitHash)
=<< G.getComments commitHash filename
@@ -145,85 +40,3 @@ getIssuesOfFile commitHash filename =
`catches` [ Handler \(_ :: E.UnknownFile) -> pure [],
Handler \(_ :: E.UnsupportedLanguage) -> pure []
]
-
--- | Note that `provenance` is trivial and needs to be fixed up later.
-fromComment :: CommitHash -> G.Comment -> IO (Maybe I.Issue)
-fromComment commitHash comment = do
- commit <- Git.getCommitOf commitHash
- let provenance = I.Provenance commit commit
-
- pure $
- ( \parseResult ->
- let (markers, title) =
- I.stripIssueMarkers (T.pack (show (P.render parseResult.heading)))
- in I.Issue
- { commitHash = commitHash,
- language = comment.language,
- rawTextHash = S.sha1 (LT.encodeUtf8 (LT.fromStrict rawText)),
- title = title,
- file = comment.filePath,
- provenance = provenance,
- startByte = comment.startByte,
- endByte = comment.endByte,
- startPoint = comment.startPoint,
- endPoint = comment.endPoint,
- tags = I.extractTags parseResult.tags,
- markers = markers,
- commentStyle = commentStyle,
- closed = False
- }
- )
- <$> I.parse I.issueMarkers (D.commonmarkToNode [] rawText)
- where
- (commentStyle, rawText) = G.uncomment comment.language comment.text
-
-propagateIssues :: M.Map T.Text I.Issue -> Scramble -> M.Map T.Text I.Issue
-propagateIssues oldIssues scramble =
- M.mergeWithKey
- ( \_ old new ->
- Just $
- new
- { I.provenance =
- I.Provenance
- { first = old.provenance.first,
- last =
- if ((/=) `on` (.rawTextHash)) old new
- then new.provenance.last
- else old.provenance.last
- },
- I.closed = False
- }
- )
- ( M.map
- ( \old ->
- if M.member old.id scramble.issues
- || not (old.file `elem` scramble.filesChanged)
- then old
- else old {I.closed = True}
- )
- )
- id
- oldIssues
- scramble.issues
-
-propagateIssueEvents :: [IssueEvent] -> M.Map T.Text I.Issue -> CommitHash -> M.Map T.Text I.Issue -> [IssueEvent]
-propagateIssueEvents oldIssueEvents oldIssues commitHash issues =
- oldIssueEvents ++ newIssueEvents oldIssues commitHash issues
-
-newIssueEvents :: M.Map T.Text I.Issue -> CommitHash -> M.Map T.Text I.Issue -> [IssueEvent]
-newIssueEvents oldIssues' commitHash issues' =
- concat
- [ [ IssueCreated commitHash issue
- | issue <- M.elems (issues `M.difference` oldIssues)
- ],
- [ IssueChanged commitHash oldIssue newIssue
- | (newIssue, oldIssue) <- M.elems (M.intersectionWith (,) issues oldIssues),
- ((/=) `on` (.rawTextHash)) newIssue oldIssue
- ],
- [ IssueDeleted commitHash issue {I.closed = True}
- | issue <- M.elems (oldIssues `M.difference` issues)
- ]
- ]
- where
- issues = M.filter (not . (.closed)) issues'
- oldIssues = M.filter (not . (.closed)) oldIssues'
diff --git a/app/History/IssueEvents.hs b/app/History/IssueEvents.hs
new file mode 100644
index 0000000..176d660
--- /dev/null
+++ b/app/History/IssueEvents.hs
@@ -0,0 +1,183 @@
+module History.IssueEvents
+ ( IssueEvents (..),
+ )
+where
+
+import Data.Binary (Binary)
+import Data.Function (on)
+import Data.List
+import Data.List.NonEmpty qualified as NE
+import Data.Map qualified as M
+import Data.Maybe (fromMaybe)
+import Data.Ord (comparing)
+import Data.Proxy (Proxy)
+import GHC.Generics (Generic)
+import Git qualified
+import History.Plan (Id, Planable, Proto, assume, propagate, protoOf)
+import History.Scramble (Scramble (..), getIssuesOfFile)
+import Issue (Issue (..))
+import IssueEvent (IssueEvent (..))
+import IssueEvent qualified as E
+
+data IssueEvents = IssueEvents
+ { commitHash :: Git.CommitHash,
+ issueEvents :: [E.IssueEvent]
+ }
+ deriving (Show, Generic, Binary)
+
+instance Planable IssueEvents where
+ type Id IssueEvents = Git.CommitHash
+ type Proto IssueEvents = Scramble
+ protoOf :: Proxy IssueEvents -> Git.CommitHash -> IO Scramble
+ protoOf _ commitHash@Git.WorkingTree = do
+ filesChanged <- Git.getFilesOf commitHash
+ issues <- concat <$> mapM (getIssuesOfFile commitHash) filesChanged
+ pure $
+ Scramble
+ { issues =
+ M.unions
+ [ M.singleton issue.id issue | issue <- issues
+ ],
+ ..
+ }
+ protoOf _ commitHash@(Git.Commit _) = do
+ filesChanged <- Git.getChangedFilesOf commitHash
+ issues <- concat <$> mapM (getIssuesOfFile commitHash) filesChanged
+ pure $
+ Scramble
+ { issues =
+ M.unions
+ [ M.singleton issue.id issue | issue <- issues
+ ],
+ ..
+ }
+
+ assume :: Scramble -> IssueEvents
+ assume Scramble {..} =
+ IssueEvents
+ { issueEvents =
+ [ IssueCreated commitHash issue
+ | issue <- M.elems issues
+ ],
+ ..
+ }
+
+ propagate ::
+ [Git.CommitHash] ->
+ IssueEvents ->
+ Scramble ->
+ IssueEvents
+ propagate = propagateIssueEvents
+
+-- | Propagates issue events from a top commit to a consecutive bottom commit.
+--
+-- Because we are building the history of issue events backwards, we use *top* and *bottom* to refer to the later and earlier commit within the Git history, respectively. We use *earlier* and *later* to refer to the top and bottom commits within the application of this function. In particular, top commits are processed earlier, and bottom commits are processed later.
+--
+-- For the top/earlier commit, issue events are known. For the bottom/later commit, only the scramble is known.
+--
+-- The scramble is said to *witness* an issue, if its changed files contain the issue's file. Thus, in addition to the issues a scramble contains, it witnesses the issues that have been deleted in the scramble's commit.
+--
+-- Because we are building the history from top/earlier to bottom/later commits, we have to assume that at any top/earlier commit, the issues present have been created in that commit. This function advances the issue's original commit as we learn about bottom/later commits from scrambles.
+--
+-- A scramble *advances* an issue's original commit if contains it or does not witness it. In particular, a scramble does NOT advance an issue's original commit, if it *just witnesses* it (ie. witnesses it, but does not contain it). To understand this, consider the following scenarios (A) and (B). Suppose the top/ earlier commit is `commit-c`, and the bottom/ later commit is `commit-a`. Between them, the commit `commit-b` is intermediary. Suppose for scenario (A) that `issue-1` is created in `commit-a`, deleted in `commit-b` and re-opened in `commit-c`. Suppose for scenario (B) that `issue-1` is originally created in `commit-c`. Then, at `commit-b` the following holds true:
+--
+-- - The issue `issue-1` has been created in `commit-c`, because it is present.
+-- - The issue `issue-1 is not present in `commit-b`.
+-- - The scramble of `commit-b` can witness `issue-1` in both scenarios (A), (B) even though it does not contain it.
+--
+-- Thus, we cannot decide at `commit-b` whether `issue-1` has been initially created in the top/ earlier commit `commit-c`, or whether is has been re-opened in `commit-c`. We need the information of presence in `commit-a` to decide this.
+--
+-- The most confusing edge case it the re-opening of issues. Suppose `issue-1` has been created in `commit-a`, deleted in `commit-b` and re-opened in `commit-c`. Contrast this with the scenario that `issue-1` has initially been created in `commit-c`. Observe the following as we propagate events from `commmit-c` to `commit-a`:
+--
+-- - At `commit-c`: `issue-1` is present in both scenarios, thus it is initially tracked as "created" in `commit-c`.
+-- - At `commit-b`: `issue-1` is NOT present in both scenarios. Note that the scramble can witness `issue-1` in either cases.
+-- - At `commit-a`: `issue-1` is present in the first scenario, but not in the second.
+--
+-- So, in the case `issue-1` has been re-opened, we cannot track its deletion at `commit-b`, because whether it was re-opened or originally created at `commit-b` depends on whether `issue-1` is present in the bottom `commit-a`, that we process only later. Thus, the scramble of commit `commit-b` cannot safely advance the issue's original commit, and we use this information to track re-opening of commits at later commits.
+--
+-- Note that in the whole process, issue change events and issue deletion events can never be bottom-most/latest events, as they would depend on information not yet known, ie. the first commit can neither change nor delete an issue.
+propagateIssueEvents :: [Git.CommitHash] -> IssueEvents -> Scramble -> IssueEvents
+propagateIssueEvents log topIssueEvents bottomScramble =
+ IssueEvents
+ { commitHash = bottomScramble.commitHash,
+ issueEvents =
+ sortOn logOrder $
+ let issueEventsPerIssue =
+ groupPerIssue topIssueEvents.issueEvents
+ in concat
+ [ -- CASE 1. The issue is present in the top/earlier history and bottom/later scramble. We can safely advance the issue's original commit.
+ concat
+ [ NE.init issueEvents ++ case NE.last issueEvents of
+ (IssueCreated commitHash issue) ->
+ if commitHash /= topCommitHash
+ then
+ [ IssueCreated commitHash issue,
+ IssueDeleted topCommitHash issue,
+ IssueCreated bottomCommitHash bottomIssue
+ ]
+ else
+ concat
+ [ if topIssue `neq` bottomIssue
+ then [IssueChanged commitHash bottomIssue issue]
+ else [],
+ [IssueCreated bottomCommitHash bottomIssue]
+ ]
+ _ -> error "bottom issues can only be created"
+ | issueEvents <- issueEventsPerIssue,
+ let topIssue = issue (NE.last issueEvents),
+ bottomIssue <- bottomIssues,
+ bottomIssue `sym` topIssue
+ ],
+ concat
+ [ -- CASE 2. The issue is present in the top/earlier history, not contained in the bottom/later scramble, but witnessed by the bottom/later scramble. We cannot safely advance the issue's original commit.
+ NE.init issueEvents ++ case NE.last issueEvents of
+ (IssueCreated commitHash issue) -> [IssueCreated commitHash issue]
+ _ -> error "bottom issues can only be created"
+ | issueEvents <- issueEventsPerIssue,
+ let topIssue = issue (NE.last issueEvents),
+ all (`nsym` topIssue) bottomIssues,
+ topIssue.file `elem` bottomScramble.filesChanged
+ ],
+ concat
+ [ -- CASE 3. The issue is present in the top/earlier history, but not witnessed by the bottom/later scramble. It is safe to advance the issue's commit hash.
+ NE.init issueEvents ++ case NE.last issueEvents of
+ (IssueCreated _ issue) -> [IssueCreated bottomCommitHash issue]
+ _ -> error "bottom issues can only be created"
+ | issueEvents <- issueEventsPerIssue,
+ let topIssue = issue (NE.last issueEvents),
+ all (`nsym` topIssue) bottomIssues,
+ not (topIssue.file `elem` bottomScramble.filesChanged)
+ ],
+ concat
+ [ -- CASE 4. The issue it not present in the top/earlier history, but contained in the bottom scramble. It had to be deleted by the top/ earlier commit.
+ [ IssueDeleted topIssueEvents.commitHash bottomIssue,
+ IssueCreated bottomCommitHash bottomIssue
+ ]
+ | bottomIssue <- bottomIssues,
+ all ((`nsym` bottomIssue) . issue . NE.last) issueEventsPerIssue
+ ]
+ ]
+ }
+ where
+ groupPerIssue =
+ map (NE.sortBy (comparing logOrder)) . NE.groupBy (sym `on` issue) . sortOn ((.id) . issue)
+
+ topCommitHash = topIssueEvents.commitHash
+ bottomCommitHash = bottomScramble.commitHash
+
+ bottomIssues = M.elems bottomScramble.issues
+
+ neq, sym, nsym :: Issue -> Issue -> Bool
+ sym = (==) `on` (.id)
+ nsym = (/=) `on` (.id)
+ neq = (/=) `on` (.rawTextHash)
+
+ commitHash (IssueChanged commitHash _ _) = commitHash
+ commitHash (IssueCreated commitHash _) = commitHash
+ commitHash (IssueDeleted commitHash _) = commitHash
+
+ issue (IssueChanged _ _ issue) = issue
+ issue (IssueCreated _ issue) = issue
+ issue (IssueDeleted _ issue) = issue
+
+ logOrder = fromMaybe (-1) . (`elemIndex` log) . commitHash
diff --git a/app/History/Issues.hs b/app/History/Issues.hs
new file mode 100644
index 0000000..08ad772
--- /dev/null
+++ b/app/History/Issues.hs
@@ -0,0 +1,72 @@
+module History.Issues
+ ( Issues (..),
+ )
+where
+
+import Data.Binary (Binary)
+import Data.Function (on)
+import Data.Map qualified as M
+import Data.Proxy (Proxy)
+import Data.Text qualified as T
+import GHC.Generics (Generic)
+import Git qualified
+import History.Plan (Id, Planable, Proto, assume, propagate, protoOf)
+import History.Scramble (Scramble (..), getIssuesOfFile)
+import Issue qualified as I
+
+data Issues = Issues
+ { commitHash :: Git.CommitHash,
+ issues :: M.Map T.Text I.Issue
+ }
+ deriving (Show, Generic, Binary)
+
+instance Planable Issues where
+ type Id Issues = Git.CommitHash
+ type Proto Issues = Scramble
+ protoOf :: Proxy Issues -> Git.CommitHash -> IO Scramble
+ protoOf _ commitHash = do
+ filesChanged <- Git.getChangedFilesOf commitHash
+ issues <- concat <$> mapM (getIssuesOfFile commitHash) filesChanged
+ pure $
+ Scramble
+ { issues =
+ M.unions
+ [ M.singleton issue.id issue | issue <- issues
+ ],
+ ..
+ }
+
+ assume :: Scramble -> Issues
+ assume (Scramble {..}) = Issues {..}
+
+ propagate :: [Id Issues] -> Issues -> Scramble -> Issues
+ propagate _ topIssues bottomScramble =
+ Issues
+ { commitHash = topIssues.commitHash,
+ issues =
+ M.mergeWithKey
+ ( \_ topIssue bottomIssue ->
+ Just $
+ topIssue
+ { I.provenance =
+ I.Provenance
+ { first = bottomIssue.provenance.first,
+ last =
+ if ((/=) `on` (.rawTextHash)) topIssue bottomIssue
+ then topIssue.provenance.last
+ else bottomIssue.provenance.last
+ }
+ }
+ )
+ ( \topIssues -> topIssues
+ )
+ ( \bottomIssues ->
+ M.map
+ ( \bottomIssue ->
+ bottomIssue {I.closed = True}
+ )
+ bottomIssues
+ )
+ topIssues.issues
+ bottomScramble.issues
+ }
diff --git a/app/History/Plan.hs b/app/History/Plan.hs
new file mode 100644
index 0000000..3dec391
--- /dev/null
+++ b/app/History/Plan.hs
@@ -0,0 +1,207 @@
+module History.Plan
+ ( Planable (..),
+ formulate,
+ realise,
+ )
+where
+
+import Control.Concurrent (forkIO, killThread)
+import Control.Concurrent.STM (TQueue, TVar, atomically, newTQueueIO, newTVarIO, readTQueue, readTVar, retry, writeTQueue, writeTVar)
+import Control.Monad (forever)
+import Data.Kind (Type)
+import Data.List (intercalate)
+import Data.List.NonEmpty qualified as NE
+import Data.Map qualified as M
+import Data.Proxy (Proxy (Proxy))
+import Data.Time.Clock
+import Parallel (parMapM_)
+import System.IO (hFlush, hPutStr, stderr, stdout)
+import System.Time.Monotonic (Clock, clockGetTime, newClock)
+import Text.Printf (printf)
+
+-- | Left-associative monadic fold of a structure with automatic parallelism, status-reporting and intermittent results (cacelability).
+--
+-- Semantically, `realise (formulate xs)` is equivalent to
+--
+-- ```haskell
+-- \(x:xs) -> foldM propagate (assume <$> protoOf x) =<< mapM protoOf xs
+-- ```
+--
+-- However, this module provides the following features:
+--
+-- - `protoOf` and `propagate` are computed in parallel,
+-- - `propagate` is scheduled to terminate as soon as possible,
+--
+-- This makes it possible to scale the computation across multiple cores, but still interrupt the process for intermittent results.
+--
+-- Additionally, a progress report is presented on stdout.
+class Planable output where
+ type Id output :: Type
+ type Proto output :: Type
+ protoOf :: Proxy output -> Id output -> IO (Proto output)
+ assume :: Proto output -> output
+ propagate :: [Id output] -> output -> Proto output -> output
+
+data Plan output = Plan (NE.NonEmpty (Id output)) [Task output]
+
+data Task output
+ = Compute (Id output)
+ | Propagate (Maybe (Id output)) (Id output)
+
+isCompute, isPropagate :: Task output -> Bool
+isCompute (Compute _) = True
+isCompute _ = False
+isPropagate (Propagate _ _) = True
+isPropagate _ = False
+
+formulate ::
+ Planable output =>
+ Proxy output ->
+ NE.NonEmpty (Id output) ->
+ Plan output
+formulate _ ids@(NE.uncons -> (id, (maybe [] NE.toList -> rest))) =
+ Plan ids $
+ Compute id
+ : Propagate Nothing id
+ : concat
+ ( zipWith
+ ( \id id' ->
+ [ Compute id,
+ Propagate id' id
+ ]
+ )
+ rest
+ (map Just (id : rest))
+ )
+
+data State output = State
+ { tasks :: [Task output],
+ protos :: M.Map (Id output) (Proto output),
+ outputs :: M.Map (Id output) output,
+ elapsed :: Clock
+ }
+
+realise :: (Ord (Id output), Planable output) => Plan output -> IO output
+realise plan@(Plan ids tasks) = do
+ elapsed <- newClock
+ let state0 =
+ State
+ { tasks = tasks,
+ protos = M.empty,
+ outputs = M.empty,
+ elapsed = elapsed
+ }
+ stateT <- newTVarIO state0
+ statusT <- newTQueueIO
+ tid <- forkIO $ forever do
+ state <- atomically (readTQueue statusT)
+ hPutStr stderr . printStatus =<< status state0 state
+ hFlush stdout
+ parMapM_ (step Proxy plan statusT stateT) tasks
+ let id = NE.last ids
+ output <- atomically $ do
+ maybe retry pure . M.lookup id . (.outputs) =<< readTVar stateT
+ killThread tid
+ pure output
+
+step ::
+ (Ord (Id output), Planable output) =>
+ Proxy output ->
+ Plan output ->
+ TQueue (State output) ->
+ TVar (State output) ->
+ Task output ->
+ IO ()
+step proxy _ statusT stateT (Compute id) = do
+ proto <- protoOf proxy id
+ atomically do
+ state <- readTVar stateT
+ let state' = state {protos = M.insert id proto state.protos}
+ writeTVar stateT state'
+ writeTQueue statusT state'
+ pure ()
+step _ (Plan ids _) statusT stateT (Propagate (Just id') id) = do
+ (output, proto) <- atomically do
+ state <- readTVar stateT
+ output <- maybe retry pure (M.lookup id' state.outputs)
+ proto <- maybe retry pure (M.lookup id state.protos)
+ pure (output, proto)
+ let output' = propagate (NE.toList ids) output proto
+ atomically do
+ state <- readTVar stateT
+ let state' = state {outputs = M.insert id output' state.outputs}
+ writeTVar stateT state'
+ writeTQueue statusT state'
+step _ _ statusT stateT (Propagate Nothing id) = do
+ proto <- atomically do
+ state <- readTVar stateT
+ maybe retry pure (M.lookup id state.protos)
+ let output = assume proto
+ atomically do
+ state <- readTVar stateT
+ let state' = state {outputs = M.insert id output state.outputs}
+ writeTVar stateT state'
+ writeTQueue statusT state'
+
+data Status = Status
+ { numTasks :: Progress Int,
+ numProtos :: Progress Int,
+ numOutputs :: Progress Int,
+ elapsed :: DiffTime
+ }
+ deriving (Show, Eq)
+
+data Progress a = Progress
+ { total :: a,
+ completed :: a
+ }
+ deriving (Show, Eq)
+
+status :: State output -> State output -> IO Status
+status state0 state = do
+ let totalTasks = length state0.tasks
+ totalProtos = length (filter isCompute state0.tasks)
+ totalOutputs = length (filter isPropagate state0.tasks)
+ completedTasks = completedProtos + completedOutputs
+ completedProtos = M.size state.protos
+ completedOutputs = M.size state.outputs
+ elapsed <- clockGetTime state.elapsed
+ pure
+ Status
+ { numTasks = Progress totalTasks completedTasks,
+ numProtos = Progress totalProtos completedProtos,
+ numOutputs = Progress totalOutputs completedOutputs,
+ elapsed = elapsed
+ }
+
+printStatus :: Status -> String
+printStatus Status {..} = do
+ let formatProgress completed total = pad total completed <> "/" <> total
+ pad total completed = replicate (length total - length completed) ' ' <> completed
+ eta =
+ formatEta
+ ( (fromIntegral numTasks.total * (realToFrac elapsed))
+ / fromIntegral numTasks.completed
+ )
+ (<> "\r") . intercalate " " $
+ [ formatProgress (show numTasks.completed) (show numTasks.total),
+ formatProgress (show numProtos.completed) (show numProtos.total),
+ formatProgress (show numOutputs.completed) (show numOutputs.total),
+ "ETA " <> eta
+ ]
+
+formatEta :: Double -> String
+formatEta s = do
+ if s < 60
+ then printf "%.1fs" s
+ else do
+ let m = s / 60
+ if m < 60
+ then printf "%.1fm" m
+ else do
+ let h = m / 60
+ if h < 24
+ then printf "%.1hf" h
+ else do
+ let d = h / 24
+ printf "%.1fd" d
diff --git a/app/History/Scramble.hs b/app/History/Scramble.hs
new file mode 100644
index 0000000..9004dbf
--- /dev/null
+++ b/app/History/Scramble.hs
@@ -0,0 +1,76 @@
+module History.Scramble
+ ( Scramble (..),
+ getIssuesOfFile,
+ fromComment,
+ )
+where
+
+import CMark qualified as D
+import Comment qualified as G
+import Control.Exception (Handler (..), catches)
+import Data.Binary (Binary)
+import Data.Digest.Pure.SHA qualified as S
+import Data.Map qualified as M
+import Data.Maybe (catMaybes)
+import Data.Text qualified as T
+import Data.Text.Lazy qualified as LT
+import Data.Text.Lazy.Encoding qualified as LT
+import Exception qualified as E
+import GHC.Generics (Generic)
+import Git qualified
+import Issue qualified as I
+import Issue.Parser qualified as I
+import Issue.Tag qualified as I
+import Issue.Text qualified as I
+import Parallel (parMapM)
+import Render qualified as P
+
+-- | `Scramble` records the complete issues ONLY in files that have
+-- been changed in the commit.
+data Scramble = Scramble
+ { commitHash :: Git.CommitHash,
+ filesChanged :: [FilePath],
+ issues :: M.Map T.Text I.Issue
+ }
+ deriving (Show, Binary, Generic)
+
+-- | Get all issues in the given directory and file.
+getIssuesOfFile :: Git.CommitHash -> FilePath -> IO [I.Issue]
+getIssuesOfFile commitHash filename =
+ ( fmap catMaybes . parMapM (fromComment commitHash)
+ =<< G.getComments commitHash filename
+ )
+ `catches` [ Handler \(_ :: E.UnknownFile) -> pure [],
+ Handler \(_ :: E.UnsupportedLanguage) -> pure []
+ ]
+
+-- | Note that `provenance` is trivial and needs to be fixed up later.
+fromComment :: Git.CommitHash -> G.Comment -> IO (Maybe I.Issue)
+fromComment commitHash comment = do
+ commit <- Git.getCommitOf commitHash
+ let provenance = I.Provenance commit commit
+
+ pure $
+ ( \parseResult ->
+ let (markers, title) =
+ I.stripIssueMarkers (T.pack (show (P.render parseResult.heading)))
+ in I.Issue
+ { commitHash = commitHash,
+ language = comment.language,
+ rawTextHash = S.sha1 (LT.encodeUtf8 (LT.fromStrict rawText)),
+ title = title,
+ file = comment.filePath,
+ provenance = provenance,
+ startByte = comment.startByte,
+ endByte = comment.endByte,
+ startPoint = comment.startPoint,
+ endPoint = comment.endPoint,
+ tags = I.extractTags parseResult.tags,
+ markers = markers,
+ commentStyle = commentStyle,
+ closed = False
+ }
+ )
+ <$> I.parse I.issueMarkers (D.commonmarkToNode [] rawText)
+ where
+ (commentStyle, rawText) = G.uncomment comment.language comment.text
diff --git a/app/Issue.hs b/app/Issue.hs
index 2743f45..b6ddad6 100644
--- a/app/Issue.hs
+++ b/app/Issue.hs
@@ -25,7 +25,7 @@ import Data.Time.Clock (UTCTime (utctDay))
import GHC.Generics (Generic)
import GHC.Records (HasField (..))
import Git (Author (..), Commit (..))
-import Git qualified as Git
+import Git qualified
import Issue.Parser qualified as I
import Issue.Provenance (Provenance (..))
import Issue.Tag (Tag (..))
diff --git a/app/Main.hs b/app/Main.hs
index f9fedea..1a1bf6b 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -654,7 +654,7 @@ main = do
putDoc colorize noPager width . (P.vsep . intersperse "") $
map (if detailed then (P.render . P.Detailed) else (P.render . P.Summarized)) issues
Options {colorize, noPager, width, command = Log {patch}} -> do
- es <- reverse . (.issueEvents) <$> H.getIssueEvents
+ es <- (.issueEvents) <$> H.getIssueEvents
putDoc colorize noPager width $
if patch
then P.vsep . intersperse P.emptyDoc $ map (P.render . P.Detailed) es
diff --git a/app/Parallel.hs b/app/Parallel.hs
index 1687364..e590164 100644
--- a/app/Parallel.hs
+++ b/app/Parallel.hs
@@ -1,6 +1,6 @@
-module Parallel (parMapM, parSequence) where
+module Parallel (parMapM, parMapM_, parSequence) where
-import Control.Concurrent.ParallelIO.Local (parallel, withPool)
+import Control.Concurrent.ParallelIO.Local (parallel, parallel_, withPool)
import GHC.Conc (getNumProcessors)
parMapM :: (a -> IO b) -> [a] -> IO [b]
@@ -8,6 +8,11 @@ parMapM f xs = do
n <- getNumProcessors
withPool n $ \pool -> parallel pool (map f xs)
+parMapM_ :: (a -> IO ()) -> [a] -> IO ()
+parMapM_ f xs = do
+ n <- getNumProcessors
+ withPool n $ \pool -> parallel_ pool (map f xs)
+
parSequence :: [IO a] -> IO [a]
parSequence xs = do
n <- getNumProcessors