From 0d96613d9aa41f93ebb440bb1aa383456b49f28f Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Fri, 15 Dec 2023 07:40:33 +0100 Subject: feat: drop haskell-tree-sitter Drop haskell-tree-sitter in favor of custom bindings to tree-sitter. haskell-tree-sitter is outdated and seems unmaintained. The implementation add low-level bindings to tree-sitter and traverses the AST in Haskell. We suspect that many FFI calls are more expensive than performing just a single API call to a C function that does the traversal. This will be addressed in upcoming commits. @prerequisite-for add-languages-elm-shell-nix --- app/Comment.hs | 80 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 31 deletions(-) (limited to 'app/Comment.hs') diff --git a/app/Comment.hs b/app/Comment.hs index 7bd4ad5..63f610a 100644 --- a/app/Comment.hs +++ b/app/Comment.hs @@ -25,16 +25,13 @@ import Data.Text.Encoding qualified as T import Exception qualified as E import Foreign.C.String import Foreign.Marshal.Alloc (free, malloc) -import Foreign.Marshal.Array (mallocArray, peekArray) -import Foreign.Ptr (nullPtr) +import Foreign.Ptr (Ptr, nullPtr) import Foreign.Storable import GHC.Generics (Generic) import GHC.Int (Int64) import Git qualified import System.FilePath (takeExtension) -import TreeSitter.Node qualified as S -import TreeSitter.Parser qualified as S -import TreeSitter.Tree qualified as S +import TreeSitter qualified as S data Comment = Comment { text :: T.Text, @@ -96,51 +93,72 @@ extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment] extractComments filePath language str' = do S.withParser (parser language) $ \parser -> do B.useAsCStringLen str' $ \(str, len) -> do - tree <- S.ts_parser_parse_string parser nullPtr str len - S.withRootNode tree $ \node -> do - map - ( \n' -> - let startByte = fromIntegral $ S.nodeStartByte n' - endByte = fromIntegral $ S.nodeEndByte n' - text = + tree <- S.ts_parser_parse_string parser nullPtr str (fromIntegral len) + node <- malloc + S.ts_tree_root_node tree node + x <- + mapM + ( \n' -> do + startByte <- fromIntegral <$> S.ts_node_start_byte n' + endByte <- fromIntegral <$> S.ts_node_end_byte n' + let text = T.decodeUtf8 . B.take (fromIntegral endByte - fromIntegral startByte) . B.drop (fromIntegral startByte) $ str' - startPoint = fromTSPoint (S.nodeStartPoint n') - endPoint = fromTSPoint (S.nodeEndPoint n') - fromTSPoint (S.TSPoint {..}) = Point (fromIntegral pointRow + 1) (fromIntegral pointColumn + 1) - in Comment {..} + startPoint <- do + point <- malloc + S.ts_node_start_point n' point + S.Point {..} <- peek point + free point + pure + Point + { row = fromIntegral row, + column = fromIntegral column + } + endPoint <- do + point <- malloc + S.ts_node_end_point n' point + S.Point {..} <- peek point + free point + pure + Point + { row = fromIntegral row, + column = fromIntegral column + } + + pure Comment {..} ) - <$> (commentsFromNodeRec language =<< peek node) + =<< (commentsFromNodeRec language node) + free node + pure x -commentsFromNodeRec :: Language -> S.Node -> IO [S.Node] +commentsFromNodeRec :: Language -> Ptr S.Node -> IO [Ptr S.Node] commentsFromNodeRec language = (filterM (isCommentNode language) =<<) . childNodesFromNodeRec -isCommentNode :: Language -> S.Node -> IO Bool +isCommentNode :: Language -> Ptr S.Node -> IO Bool isCommentNode language n = - (`elem` (nodeTypes language)) <$> peekCString (S.nodeType n) + fmap (`elem` (nodeTypes language)) . peekCString =<< S.ts_node_type n -childNodesFromNodeRec :: S.Node -> IO [S.Node] +childNodesFromNodeRec :: Ptr S.Node -> IO [Ptr S.Node] childNodesFromNodeRec n = do ns' <- childNodesFromNode n ns <- concat <$> mapM childNodesFromNodeRec ns' pure $ n : ns -childNodesFromNode :: S.Node -> IO [S.Node] +childNodesFromNode :: Ptr S.Node -> IO [Ptr S.Node] childNodesFromNode n = do - let numChildren = fromIntegral (S.nodeChildCount n) - ns <- mallocArray numChildren - tsNode <- malloc - poke tsNode (S.nodeTSNode n) - S.ts_node_copy_child_nodes tsNode ns - free tsNode - ns' <- peekArray numChildren ns - free ns - pure ns' + numChildren <- fromIntegral <$> S.ts_node_named_child_count n + mapM + ( \k -> do + node <- malloc + S.ts_node_named_child n k node + pure node + ) + [0 .. numChildren - 1] data CommentStyle = LineStyle T.Text -- cgit v1.2.3