aboutsummaryrefslogtreecommitdiffstats
path: root/app/Comment.hs
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-08 06:27:15 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-08 06:43:01 +0100
commit0d20548e3846cb80acca07fad2a1dc3cfe024528 (patch)
tree6605eb393af99914b4cce483f56e1becbcbe073d /app/Comment.hs
parent5842e730152a2ae11fc8772a505baa3ba81b1e9c (diff)
chore: drop tree-grepper
Regresses in that we only support Haskell for now, as Elm, Nix or Bash are not available as tree-sitter-* Haskell packages.
Diffstat (limited to 'app/Comment.hs')
-rw-r--r--app/Comment.hs182
1 files changed, 182 insertions, 0 deletions
diff --git a/app/Comment.hs b/app/Comment.hs
new file mode 100644
index 0000000..558778b
--- /dev/null
+++ b/app/Comment.hs
@@ -0,0 +1,182 @@
+module Comment
+ ( Comment (..),
+ Point (..),
+ getComments,
+ extractComments,
+ CommentStyle (..),
+ uncomment,
+ comment,
+ )
+where
+
+import Comment.Language
+import Control.Applicative (liftA2)
+import Control.Exception (catch)
+import Control.Monad
+import Data.Binary (Binary)
+import Data.ByteString qualified as B
+import Data.List (find, sortBy)
+import Data.List.NonEmpty qualified as N
+import Data.Maybe (fromMaybe)
+import Data.Ord (comparing)
+import Data.Text qualified as T
+import Data.Text.Encoding qualified as T
+import Data.Text.Lazy qualified as LT
+import Exception qualified as E
+import Foreign.C.String
+import Foreign.Marshal.Alloc (malloc)
+import Foreign.Marshal.Array (mallocArray, peekArray)
+import Foreign.Ptr (nullPtr)
+import Foreign.Storable
+import GHC.Generics (Generic)
+import Git qualified
+import System.FilePath (takeExtension)
+import TreeSitter.Node qualified as S
+import TreeSitter.Parser qualified as S
+import TreeSitter.Tree qualified as S
+
+data Comment = Comment
+ { text :: T.Text,
+ language :: Language,
+ start :: Int,
+ end :: Int,
+ startPoint :: Point,
+ endPoint :: Point,
+ filePath :: FilePath
+ }
+ deriving (Eq, Show)
+
+data Point = Point
+ { row :: Int,
+ column :: Int
+ }
+ deriving (Eq, Show, Generic, Binary)
+
+getComments :: Git.CommitHash -> FilePath -> IO [Comment]
+getComments commitHash filePath =
+ fmap mergeLineComments
+ . extractComments filePath language
+ . (T.encodeUtf8 . LT.toStrict)
+ =<< catch
+ (Git.readTextFileOf commitHash filePath)
+ (\(_ :: E.ProcessException) -> pure "")
+ where
+ language = fromExtension (takeExtension filePath)
+
+ mergeLineComments :: [Comment] -> [Comment]
+ mergeLineComments =
+ map mergeGroup
+ . chainsBy (\a b -> a.endPoint.row + 1 == b.startPoint.row)
+ . sortBy (comparing (liftA2 (,) (.start) (.end)))
+
+ mergeGroup :: N.NonEmpty Comment -> Comment
+ mergeGroup css@(c N.:| cs) =
+ c
+ { text = T.unlines (map (.text) (c : cs)),
+ start = first.start,
+ end = last.end,
+ startPoint = first.startPoint,
+ endPoint = last.endPoint
+ }
+ where
+ first = N.head css
+ last = N.last css
+
+ {- A version of `Data.List.groupBy` that uses the last added group-member for comparison with new candidates for the group. `Data.List.groupBy` uses the initial member for all subsequent comparisons. -}
+ chainsBy :: (a -> a -> Bool) -> [a] -> [N.NonEmpty a]
+ chainsBy p = reverse . map N.reverse . go []
+ where
+ go rs [] = rs
+ go [] (x : xs) = go [N.singleton x] xs
+ go (ass@((a N.:| as) : rs)) (x : xs)
+ | p a x = go ((x N.:| a : as) : rs) xs
+ | otherwise = go (N.singleton x : ass) xs
+
+extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment]
+extractComments filePath language str' = do
+ S.withParser (parser language) $ \parser -> do
+ B.useAsCStringLen str' $ \(str, len) -> do
+ tree <- S.ts_parser_parse_string parser nullPtr str len
+ S.withRootNode tree $ \node -> do
+ map
+ ( \n' ->
+ let start = fromIntegral $ S.nodeStartByte n'
+ end = fromIntegral $ S.nodeEndByte n'
+ text = T.decodeUtf8 . B.take (end - start) . B.drop start $ str'
+ startPoint = fromTSPoint (S.nodeStartPoint n')
+ endPoint = fromTSPoint (S.nodeEndPoint n')
+
+ fromTSPoint (S.TSPoint {..}) = Point (fromIntegral pointRow + 1) (fromIntegral pointColumn + 1)
+ in Comment {..}
+ )
+ <$> (commentsFromNodeRec language =<< peek node)
+
+commentsFromNodeRec :: Language -> S.Node -> IO [S.Node]
+commentsFromNodeRec language =
+ (filterM (isCommentNode language) =<<)
+ . childNodesFromNodeRec
+
+isCommentNode :: Language -> S.Node -> IO Bool
+isCommentNode language n =
+ (`elem` (nodeTypes language)) <$> peekCString (S.nodeType n)
+
+childNodesFromNodeRec :: S.Node -> IO [S.Node]
+childNodesFromNodeRec n = do
+ ns' <- childNodesFromNode n
+ ns <- concat <$> mapM childNodesFromNodeRec ns'
+ pure $ n : ns
+
+childNodesFromNode :: S.Node -> IO [S.Node]
+childNodesFromNode n = do
+ let numChildren = fromIntegral (S.nodeChildCount n)
+ ns <- mallocArray numChildren
+ tsNode <- malloc
+ poke tsNode (S.nodeTSNode n)
+ S.ts_node_copy_child_nodes tsNode ns
+ peekArray numChildren ns
+
+data CommentStyle
+ = LineStyle T.Text
+ | BlockStyle T.Text T.Text
+ deriving (Eq, Show, Generic, Binary)
+
+comment :: CommentStyle -> T.Text -> T.Text
+comment (LineStyle linePrefix) = T.unlines . map ((linePrefix <> " ") <>) . T.lines
+comment (BlockStyle blockStart blockEnd) = (blockStart <>) . (<> blockEnd)
+
+uncomment :: Language -> T.Text -> (CommentStyle, T.Text)
+uncomment language rawText =
+ maybe
+ ( ( LineStyle (lineStart language),
+ stripLineComments (lineStart language) text
+ )
+ )
+ ( \(blockStart, blockEnd) ->
+ ( BlockStyle blockStart blockEnd,
+ stripBlockComment blockStart blockEnd text
+ )
+ )
+ $ do
+ (blockStarts, blockEnd) <- block language
+ (,blockEnd) <$> find (`T.isPrefixOf` text) blockStarts
+ where
+ text = stripLines rawText
+ stripLines = T.intercalate "\n" . map T.strip . T.lines
+
+stripLineComments :: T.Text -> T.Text -> T.Text
+stripLineComments lineStart text =
+ onLines
+ ( \line ->
+ fromMaybe line . fmap T.stripStart $
+ T.stripPrefix lineStart line
+ )
+ text
+ where
+ onLines f = T.intercalate "\n" . map f . T.lines
+
+stripBlockComment :: T.Text -> T.Text -> T.Text -> T.Text
+stripBlockComment blockStart blockEnd text =
+ T.strip
+ . (fromMaybe text . T.stripSuffix blockEnd)
+ . (fromMaybe text . T.stripPrefix blockStart)
+ $ text