From 0d20548e3846cb80acca07fad2a1dc3cfe024528 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Fri, 8 Dec 2023 06:27:15 +0100 Subject: chore: drop tree-grepper Regresses in that we only support Haskell for now, as Elm, Nix or Bash are not available as tree-sitter-* Haskell packages. --- app/Comment.hs | 182 ++++++++++++++++++++++++++++++++++++++++++++ app/Comment/Language.hs | 39 ++++++++++ app/Extract.hs | 17 +++++ app/Git.hs | 3 + app/History.hs | 10 +-- app/Issue.hs | 13 ++-- app/TreeGrepper/Comment.hs | 149 ------------------------------------ app/TreeGrepper/FileType.hs | 75 ------------------ app/TreeGrepper/Match.hs | 69 ----------------- app/TreeGrepper/Result.hs | 15 ---- 10 files changed, 252 insertions(+), 320 deletions(-) create mode 100644 app/Comment.hs create mode 100644 app/Comment/Language.hs create mode 100644 app/Extract.hs delete mode 100644 app/TreeGrepper/Comment.hs delete mode 100644 app/TreeGrepper/FileType.hs delete mode 100644 app/TreeGrepper/Match.hs delete mode 100644 app/TreeGrepper/Result.hs (limited to 'app') diff --git a/app/Comment.hs b/app/Comment.hs new file mode 100644 index 0000000..558778b --- /dev/null +++ b/app/Comment.hs @@ -0,0 +1,182 @@ +module Comment + ( Comment (..), + Point (..), + getComments, + extractComments, + CommentStyle (..), + uncomment, + comment, + ) +where + +import Comment.Language +import Control.Applicative (liftA2) +import Control.Exception (catch) +import Control.Monad +import Data.Binary (Binary) +import Data.ByteString qualified as B +import Data.List (find, sortBy) +import Data.List.NonEmpty qualified as N +import Data.Maybe (fromMaybe) +import Data.Ord (comparing) +import Data.Text qualified as T +import Data.Text.Encoding qualified as T +import Data.Text.Lazy qualified as LT +import Exception qualified as E +import Foreign.C.String +import Foreign.Marshal.Alloc (malloc) +import Foreign.Marshal.Array (mallocArray, peekArray) +import Foreign.Ptr (nullPtr) +import Foreign.Storable +import GHC.Generics (Generic) +import Git qualified +import System.FilePath (takeExtension) +import TreeSitter.Node qualified as S +import TreeSitter.Parser qualified as S +import TreeSitter.Tree qualified as S + +data Comment = Comment + { text :: T.Text, + language :: Language, + start :: Int, + end :: Int, + startPoint :: Point, + endPoint :: Point, + filePath :: FilePath + } + deriving (Eq, Show) + +data Point = Point + { row :: Int, + column :: Int + } + deriving (Eq, Show, Generic, Binary) + +getComments :: Git.CommitHash -> FilePath -> IO [Comment] +getComments commitHash filePath = + fmap mergeLineComments + . extractComments filePath language + . (T.encodeUtf8 . LT.toStrict) + =<< catch + (Git.readTextFileOf commitHash filePath) + (\(_ :: E.ProcessException) -> pure "") + where + language = fromExtension (takeExtension filePath) + + mergeLineComments :: [Comment] -> [Comment] + mergeLineComments = + map mergeGroup + . chainsBy (\a b -> a.endPoint.row + 1 == b.startPoint.row) + . sortBy (comparing (liftA2 (,) (.start) (.end))) + + mergeGroup :: N.NonEmpty Comment -> Comment + mergeGroup css@(c N.:| cs) = + c + { text = T.unlines (map (.text) (c : cs)), + start = first.start, + end = last.end, + startPoint = first.startPoint, + endPoint = last.endPoint + } + where + first = N.head css + last = N.last css + + {- A version of `Data.List.groupBy` that uses the last added group-member for comparison with new candidates for the group. `Data.List.groupBy` uses the initial member for all subsequent comparisons. -} + chainsBy :: (a -> a -> Bool) -> [a] -> [N.NonEmpty a] + chainsBy p = reverse . map N.reverse . go [] + where + go rs [] = rs + go [] (x : xs) = go [N.singleton x] xs + go (ass@((a N.:| as) : rs)) (x : xs) + | p a x = go ((x N.:| a : as) : rs) xs + | otherwise = go (N.singleton x : ass) xs + +extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment] +extractComments filePath language str' = do + S.withParser (parser language) $ \parser -> do + B.useAsCStringLen str' $ \(str, len) -> do + tree <- S.ts_parser_parse_string parser nullPtr str len + S.withRootNode tree $ \node -> do + map + ( \n' -> + let start = fromIntegral $ S.nodeStartByte n' + end = fromIntegral $ S.nodeEndByte n' + text = T.decodeUtf8 . B.take (end - start) . B.drop start $ str' + startPoint = fromTSPoint (S.nodeStartPoint n') + endPoint = fromTSPoint (S.nodeEndPoint n') + + fromTSPoint (S.TSPoint {..}) = Point (fromIntegral pointRow + 1) (fromIntegral pointColumn + 1) + in Comment {..} + ) + <$> (commentsFromNodeRec language =<< peek node) + +commentsFromNodeRec :: Language -> S.Node -> IO [S.Node] +commentsFromNodeRec language = + (filterM (isCommentNode language) =<<) + . childNodesFromNodeRec + +isCommentNode :: Language -> S.Node -> IO Bool +isCommentNode language n = + (`elem` (nodeTypes language)) <$> peekCString (S.nodeType n) + +childNodesFromNodeRec :: S.Node -> IO [S.Node] +childNodesFromNodeRec n = do + ns' <- childNodesFromNode n + ns <- concat <$> mapM childNodesFromNodeRec ns' + pure $ n : ns + +childNodesFromNode :: S.Node -> IO [S.Node] +childNodesFromNode n = do + let numChildren = fromIntegral (S.nodeChildCount n) + ns <- mallocArray numChildren + tsNode <- malloc + poke tsNode (S.nodeTSNode n) + S.ts_node_copy_child_nodes tsNode ns + peekArray numChildren ns + +data CommentStyle + = LineStyle T.Text + | BlockStyle T.Text T.Text + deriving (Eq, Show, Generic, Binary) + +comment :: CommentStyle -> T.Text -> T.Text +comment (LineStyle linePrefix) = T.unlines . map ((linePrefix <> " ") <>) . T.lines +comment (BlockStyle blockStart blockEnd) = (blockStart <>) . (<> blockEnd) + +uncomment :: Language -> T.Text -> (CommentStyle, T.Text) +uncomment language rawText = + maybe + ( ( LineStyle (lineStart language), + stripLineComments (lineStart language) text + ) + ) + ( \(blockStart, blockEnd) -> + ( BlockStyle blockStart blockEnd, + stripBlockComment blockStart blockEnd text + ) + ) + $ do + (blockStarts, blockEnd) <- block language + (,blockEnd) <$> find (`T.isPrefixOf` text) blockStarts + where + text = stripLines rawText + stripLines = T.intercalate "\n" . map T.strip . T.lines + +stripLineComments :: T.Text -> T.Text -> T.Text +stripLineComments lineStart text = + onLines + ( \line -> + fromMaybe line . fmap T.stripStart $ + T.stripPrefix lineStart line + ) + text + where + onLines f = T.intercalate "\n" . map f . T.lines + +stripBlockComment :: T.Text -> T.Text -> T.Text -> T.Text +stripBlockComment blockStart blockEnd text = + T.strip + . (fromMaybe text . T.stripSuffix blockEnd) + . (fromMaybe text . T.stripPrefix blockStart) + $ text diff --git a/app/Comment/Language.hs b/app/Comment/Language.hs new file mode 100644 index 0000000..009c6e6 --- /dev/null +++ b/app/Comment/Language.hs @@ -0,0 +1,39 @@ +module Comment.Language + ( Language (..), + fromExtension, + parser, + lineStart, + block, + nodeTypes, + ) +where + +import Control.Exception (throw) +import Data.Binary (Binary) +import Data.Text qualified as T +import Exception qualified as E +import Foreign.Ptr (Ptr) +import GHC.Generics (Generic) +import TreeSitter.Haskell qualified as S +import TreeSitter.Language qualified as S + +data Language + = Haskell + deriving (Eq, Show, Generic, Binary) + +fromExtension :: String -> Language +fromExtension ".hs" = Haskell +fromExtension ext = throw $ E.UnknownFileExtension ext + +-- TODO add languages elm, shell, nix +parser :: Language -> Ptr S.Language +parser Haskell = S.tree_sitter_haskell + +lineStart :: Language -> T.Text +lineStart Haskell = "--" + +block :: Language -> Maybe ([T.Text], T.Text) +block Haskell = Just (["{-"], "-}") + +nodeTypes :: Language -> [String] +nodeTypes Haskell = ["comment"] diff --git a/app/Extract.hs b/app/Extract.hs new file mode 100644 index 0000000..e351898 --- /dev/null +++ b/app/Extract.hs @@ -0,0 +1,17 @@ +module Extract where + +data Comment = Comment + { -- result fields + file :: String, + file_type :: FileType, + -- match fields + kind :: String, + name :: String, + text :: T.Text, + start :: Position, + end :: Position + } + +extractComments :: T.Text -> IO [Comment] +extractComments = do + parer <- ts_parser_new diff --git a/app/Git.hs b/app/Git.hs index a3c82fa..d308026 100644 --- a/app/Git.hs +++ b/app/Git.hs @@ -105,6 +105,9 @@ getCommitOf commitHash@(Commit hash) = do } _ -> throwIO E.NoCommits +-- TODO Fix `readTextFileOf` +-- +-- Handle file does not exist in `WorkingTree` case. readTextFileOf :: CommitHash -> FilePath -> IO LT.Text readTextFileOf WorkingTree filePath = LT.readFile filePath readTextFileOf (Commit hash) filePath = diff --git a/app/History.hs b/app/History.hs index 5b2dab3..d9f434d 100644 --- a/app/History.hs +++ b/app/History.hs @@ -6,6 +6,7 @@ where import CMark qualified as D import Cache (cachedMaybe) +import Comment qualified as G import Control.Exception (catch, handle, try) import Data.Binary (Binary) import Data.ByteString.Lazy qualified as LB @@ -35,7 +36,6 @@ import Render qualified as P import System.FilePath (()) import System.IO.Temp (withSystemTempDirectory) import System.Process.Typed (setWorkingDir) -import TreeGrepper.Comment qualified as G import Tuple () -- TODO Reduce cached data size @@ -129,10 +129,10 @@ fromComment commitHash comment = do in I.Issue { title = title, description = N.nonEmpty parseResult.paragraphs, - file = comment.file, + file = comment.filePath, provenance = provenance, - start = comment.start, - end = comment.end, + startPoint = comment.startPoint, + endPoint = comment.endPoint, tags = I.extractTags parseResult.tags, markers = markers, rawText = rawText, @@ -143,7 +143,7 @@ fromComment commitHash comment = do ) <$> I.parse I.issueMarkers (D.commonmarkToNode [] rawText) where - (commentStyle, rawText) = G.uncomment comment.file_type comment.text + (commentStyle, rawText) = G.uncomment comment.language comment.text propagate :: CommitHash -> History -> Scramble -> IO History propagate commitHash oldHistory scramble = do diff --git a/app/Issue.hs b/app/Issue.hs index 83e7141..9ae3de8 100644 --- a/app/Issue.hs +++ b/app/Issue.hs @@ -10,6 +10,7 @@ module Issue where import CMark qualified as D +import Comment qualified as G import Data.Binary (Binary (..)) import Data.List.NonEmpty (NonEmpty) import Data.Text qualified as T @@ -21,8 +22,6 @@ import Git (Author (..), Commit (..)) import Issue.Provenance (Provenance (..)) import Issue.Tag (Tag (..)) import Render qualified as P -import TreeGrepper.Comment qualified as G -import TreeGrepper.Match qualified as G import Prelude hiding (id) data Issue = Issue @@ -30,8 +29,8 @@ data Issue = Issue description :: Maybe (NonEmpty D.Node), file :: String, provenance :: Provenance, - start :: G.Position, - end :: G.Position, + startPoint :: G.Point, + endPoint :: G.Point, tags :: [Tag], markers :: [T.Text], rawText :: T.Text, @@ -78,12 +77,12 @@ replaceText :: Issue -> T.Text -> IO () replaceText issue s' = T.writeFile issue.file . replace (indent (comment s')) =<< T.readFile issue.file where comment = T.intercalate "\n" . map T.strip . T.lines . G.comment issue.commentStyle - indent = T.intercalate "\n" . mapButFirst (T.replicate (issue.start.column - 1) " " <>) . T.lines + indent = T.intercalate "\n" . mapButFirst (T.replicate (issue.startPoint.column - 1) " " <>) . T.lines replace s t = before <> s <> after where t' = T.lines t - before = T.intercalate "\n" (mapLast (T.take (issue.start.column - 1)) (take issue.start.row t')) - after = T.unlines (mapFirst (T.drop issue.end.column) (drop (issue.end.row - 1) t')) + before = T.intercalate "\n" (mapLast (T.take (issue.startPoint.column - 1)) (take issue.startPoint.row t')) + after = T.unlines (mapFirst (T.drop issue.endPoint.column) (drop (issue.endPoint.row - 1) t')) mapFirst _ [] = [] mapFirst f (x : xs) = f x : xs mapLast _ [] = [] diff --git a/app/TreeGrepper/Comment.hs b/app/TreeGrepper/Comment.hs deleted file mode 100644 index 7c2ca90..0000000 --- a/app/TreeGrepper/Comment.hs +++ /dev/null @@ -1,149 +0,0 @@ -module TreeGrepper.Comment - ( Comment (..), - getComments, - CommentStyle (..), - uncomment, - comment, - ) -where - -import Control.Exception (catch, throw) -import Data.Aeson qualified as A -import Data.Binary (Binary) -import Data.ByteString.Lazy.Char8 qualified as B -import Data.Function ((&)) -import Data.List (find) -import Data.Maybe (fromMaybe) -import Data.Text qualified as T -import Data.Text.Lazy.IO qualified as LT -import Exception qualified as E -import GHC.Generics (Generic) -import Git qualified -import Process (proc, sh) -import System.Directory (createDirectoryIfMissing) -import System.FilePath (takeDirectory, takeExtension, takeFileName, ()) -import System.IO.Temp (withSystemTempDirectory) -import System.Process.Typed qualified as P -import TreeGrepper.FileType (FileType (..)) -import TreeGrepper.FileType qualified as G -import TreeGrepper.Match (Match (..), Position (..)) -import TreeGrepper.Match qualified as G -import TreeGrepper.Result (Result (..)) -import TreeGrepper.Result qualified as G - -data Comment = Comment - { -- result fields - file :: String, - file_type :: FileType, - -- match fields - kind :: String, - name :: String, - text :: T.Text, - start :: Position, - end :: Position - } - deriving (Show, Generic) - -data CommentStyle - = LineStyle T.Text - | BlockStyle T.Text T.Text - deriving (Eq, Show, Generic, Binary) - -comment :: CommentStyle -> T.Text -> T.Text -comment (LineStyle linePrefix) = T.unlines . map ((linePrefix <> " ") <>) . T.lines -comment (BlockStyle blockStart blockEnd) = (blockStart <>) . (<> blockEnd) - -uncomment :: FileType -> T.Text -> (CommentStyle, T.Text) -uncomment fileType rawText = - maybe - ( ( LineStyle info.lineStart, - stripLineComments (G.info fileType).lineStart text - ) - ) - ( \(blockInfo, blockStart) -> - ( BlockStyle blockStart blockInfo.blockEnd, - stripBlockComment blockStart blockInfo.blockEnd text - ) - ) - $ do - blockInfo <- info.block - (,) blockInfo <$> find (`T.isPrefixOf` text) blockInfo.blockStart - where - info = G.info fileType - text = stripLines rawText - stripLines = T.intercalate "\n" . map T.strip . T.lines - -stripLineComments :: T.Text -> T.Text -> T.Text -stripLineComments lineStart text = - onLines - ( \line -> - fromMaybe line . fmap T.stripStart $ - T.stripPrefix lineStart line - ) - text - where - onLines f = T.intercalate "\n" . map f . T.lines - -stripBlockComment :: T.Text -> T.Text -> T.Text -> T.Text -stripBlockComment blockStart blockEnd text = - T.strip - . (fromMaybe text . T.stripSuffix blockEnd) - . (fromMaybe text . T.stripPrefix blockStart) - $ text - -fromMatch :: Result -> Match -> Comment -fromMatch Result {..} Match {..} = Comment {..} - -getComments :: Git.CommitHash -> FilePath -> IO [Comment] -getComments commitHash fn = do - let ext = takeExtension fn - s <- - catch - (Git.readTextFileOf commitHash fn) - (\(_ :: E.ProcessException) -> pure "") - withSystemTempDirectory (takeFileName fn) $ \cwd -> do - createDirectoryIfMissing True (cwd takeDirectory fn) - LT.writeFile (cwd fn) s - concatMap (\result -> map (fromMatch result) result.matches) - . map fixTreeGrepper - . decode - <$> sh - ( ( proc - "tree-grepper % --query % % --format json" - fn - (treeGrepperLanguage ext) - (treeGrepperQuery ext) - ) - & P.setWorkingDir cwd - ) - -decode :: B.ByteString -> [Result] -decode = either (throw . E.InvalidTreeGrepperResult) id . A.eitherDecode - -fixTreeGrepper :: G.Result -> G.Result -fixTreeGrepper treeGrepperResult = - treeGrepperResult {G.matches = G.merge treeGrepperResult.matches} - -treeGrepperLanguage :: String -> String -treeGrepperLanguage ext = - -- TODO Add support for all tree-grepper supported files - -- - -- tree-grepper supported files can be listed through `tree-grepper - -- --languages`. - -- - -- @backlog - case ext of - ".elm" -> "elm" - ".hs" -> "haskell" - ".nix" -> "nix" - ".sh" -> "sh" - _ -> throw (E.UnknownFileExtension ext) - -treeGrepperQuery :: String -> String -treeGrepperQuery ext = - case ext of - ".elm" -> "([(line_comment) (block_comment)])" - ".hs" -> "(comment)" - ".nix" -> "(comment)" - ".sh" -> "(comment)" - _ -> throw (E.UnknownFileExtension ext) diff --git a/app/TreeGrepper/FileType.hs b/app/TreeGrepper/FileType.hs deleted file mode 100644 index 506cbc5..0000000 --- a/app/TreeGrepper/FileType.hs +++ /dev/null @@ -1,75 +0,0 @@ -module TreeGrepper.FileType - ( FileType (..), - all, - Info (..), - BlockInfo (..), - info, - ) -where - -import Data.Aeson (FromJSON (parseJSON)) -import Data.Binary (Binary) -import Data.Text (Text) -import GHC.Generics (Generic) -import Prelude hiding (all) - -data FileType - = Elm - | Haskell - | Nix - | Shell - deriving (Eq, Show, Generic, Binary) - -instance FromJSON FileType where - parseJSON v = - parseJSON v >>= \case - "elm" -> pure Elm - "haskell" -> pure Haskell - "nix" -> pure Nix - "sh" -> pure Shell - fileType -> fail ("parsing file_type failed, got " ++ fileType) - -all :: [FileType] -all = - [ Elm, - Haskell, - Nix, - Shell - ] - -data Info = Info - { exts :: [String], - lineStart :: Text, - block :: Maybe BlockInfo - } - -data BlockInfo = BlockInfo - { blockStart :: [Text], - blockEnd :: Text - } - -info :: FileType -> Info -info Elm = - Info - { exts = [".elm"], - lineStart = "--", - block = Just BlockInfo {blockStart = ["{-|", "{-"], blockEnd = "-}"} - } -info Haskell = - Info - { exts = [".hs"], - lineStart = "--", - block = Just BlockInfo {blockStart = ["{-"], blockEnd = "-}"} - } -info Nix = - Info - { exts = [".nix"], - lineStart = "#", - block = Just BlockInfo {blockStart = ["/*"], blockEnd = "*/"} - } -info Shell = - Info - { exts = [".sh"], - lineStart = "#", - block = Nothing - } diff --git a/app/TreeGrepper/Match.hs b/app/TreeGrepper/Match.hs deleted file mode 100644 index 5d9479e..0000000 --- a/app/TreeGrepper/Match.hs +++ /dev/null @@ -1,69 +0,0 @@ -module TreeGrepper.Match - ( Match (..), - Position (..), - merge, - ) -where - -import Data.Aeson (FromJSON) -import Data.Binary (Binary) -import Data.Function (on) -import Data.List (sortBy) -import Data.List.NonEmpty (NonEmpty ((:|))) -import Data.List.NonEmpty qualified as N -import Data.Ord (comparing) -import Data.Text (Text) -import Data.Text qualified as T -import GHC.Generics (Generic) - -data Match = Match - { kind :: String, - name :: String, - text :: Text, - start :: Position, - end :: Position - } - deriving (Show, Generic) - -instance FromJSON Match - -data Position = Position - { row :: Int, - column :: Int - } - deriving (Eq, Show, Generic, Binary) - -instance Ord Position where - compare = compare `on` (\p -> (p.row, p.column)) - -instance FromJSON Position - -{- tree-grepper (tree-sitter) is unable to match sibling comments blocks as a whole. We thus merge matches if they are line-adjacent. -} -merge :: [Match] -> [Match] -merge matches = - map mergeGroup - . chainsBy (\a b -> a.end.row + 1 == b.start.row) - $ sortBy (comparing (.start)) matches - -mergeGroup :: NonEmpty Match -> Match -mergeGroup (m :| ms) = - m - { text = text, - start = start, - end = end - } - where - mss = m : ms - text = T.unlines $ map (.text) mss - start = minimum $ map (.start) mss - end = maximum $ map (.end) mss - -{- A version of `Data.List.groupBy` that uses the last added group-member for comparison with new candidates for the group. `Data.List.groupBy` uses the initial member for all subsequent comparisons. -} -chainsBy :: (a -> a -> Bool) -> [a] -> [NonEmpty a] -chainsBy p = reverse . map N.reverse . go [] - where - go rs [] = rs - go [] (x : xs) = go [N.singleton x] xs - go (ass@((a :| as) : rs)) (x : xs) - | p a x = go ((x :| a : as) : rs) xs - | otherwise = go (N.singleton x : ass) xs diff --git a/app/TreeGrepper/Result.hs b/app/TreeGrepper/Result.hs deleted file mode 100644 index 856871a..0000000 --- a/app/TreeGrepper/Result.hs +++ /dev/null @@ -1,15 +0,0 @@ -module TreeGrepper.Result (Result (..)) where - -import Data.Aeson (FromJSON) -import GHC.Generics (Generic) -import TreeGrepper.FileType (FileType) -import TreeGrepper.Match (Match) - -data Result = Result - { file :: String, - file_type :: FileType, - matches :: [Match] - } - deriving (Show, Generic) - -instance FromJSON Result -- cgit v1.2.3