From 10c764c022b1e46c84a3b4d3743a58bd1072b5a5 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Mon, 18 Dec 2023 02:41:56 +0100 Subject: feat: limit the number of FFI calls for extracting comments This replaces the tree-sitter bindings with a call to a single C function that traverses the AST. We expect the query API to be slower than manually traversing the tree for this particular use case. This will be addressed in an upcoming commit. @prerequisite-for add-languages-elm-shell-nix --- app/TreeSitter.hs | 87 ++++++++++++++++++++++++++----------------------------- 1 file changed, 41 insertions(+), 46 deletions(-) (limited to 'app/TreeSitter.hs') diff --git a/app/TreeSitter.hs b/app/TreeSitter.hs index e911d1b..230fefc 100644 --- a/app/TreeSitter.hs +++ b/app/TreeSitter.hs @@ -1,65 +1,60 @@ -module TreeSitter where - --- | References: [tree-sitter/api.h](https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h) +module TreeSitter + ( Language, + Node (..), + Point (..), + extract_comments, + tree_sitter_haskell, + ) +where import Foreign.C.String (CString) import Foreign.C.Types (CInt (..)) import Foreign.Ptr (Ptr) -import Foreign.Storable (Storable (..), peek) - -data Parser +import Foreign.Storable (Storable (..)) data Language -data Tree = Tree - data Node = Node + { startPoint :: Point, + endPoint :: Point, + startByte :: CInt, + endByte :: CInt + } + deriving (Show, Eq) instance Storable Node where - sizeOf _ = 32 + alignment _ = 8 + sizeOf _ = 24 + peek ptr = + Node + <$> peekByteOff ptr 0 + <*> peekByteOff ptr 8 + <*> peekByteOff ptr 16 + <*> peekByteOff ptr 20 + poke ptr (Node {..}) = do + pokeByteOff ptr 0 startPoint + pokeByteOff ptr 8 endPoint + pokeByteOff ptr 16 startByte + pokeByteOff ptr 20 endByte data Point = Point { row :: CInt, column :: CInt - } deriving (Show) + } + deriving (Show, Eq) instance Storable Point where + alignment _ = 4 sizeOf _ = 8 - alignment _ = 8 - peek p = Point <$> peekByteOff p 0 <*> peekByteOff p 4 - -withParser :: Ptr Language -> (Ptr Parser -> IO a) -> IO a -withParser l f = do - p <- ts_parser_new - ts_parser_set_language p l - x <- f p - ts_parser_delete p - pure x - -foreign import ccall unsafe "ts_node_start_point_p" ts_node_start_point :: Ptr Node -> Ptr Point -> IO () - -foreign import ccall unsafe "ts_node_end_point_p" ts_node_end_point :: Ptr Node -> Ptr Point -> IO () - -foreign import ccall unsafe "ts_node_start_byte_p" ts_node_start_byte :: Ptr Node -> IO CInt - -foreign import ccall unsafe "ts_node_end_byte_p" ts_node_end_byte :: Ptr Node -> IO CInt - -foreign import ccall unsafe "ts_node_type_p" ts_node_type :: Ptr Node -> IO CString - -foreign import ccall unsafe "ts_node_named_child_p" ts_node_named_child :: Ptr Node -> CInt -> Ptr Node -> IO () - -foreign import ccall unsafe "ts_node_named_child_count_p" ts_node_named_child_count :: Ptr Node -> IO CInt - -foreign import ccall unsafe "ts_tree_root_node_p" ts_tree_root_node :: Ptr Tree -> Ptr Node -> IO () - -foreign import ccall unsafe "ts_tree_delete" ts_tree_delete :: Ptr Tree -> IO () - -foreign import ccall unsafe "ts_parser_parse_string" ts_parser_parse_string :: Ptr Parser -> Ptr Tree -> CString -> CInt -> IO (Ptr Tree) - -foreign import ccall unsafe "ts_parser_new" ts_parser_new :: IO (Ptr Parser) - -foreign import ccall unsafe "ts_parser_delete" ts_parser_delete :: Ptr Parser -> IO () - -foreign import ccall unsafe "ts_parser_set_language" ts_parser_set_language :: Ptr Parser -> Ptr Language -> IO () + peek ptr = Point <$> peekByteOff ptr 0 <*> peekByteOff ptr 4 + poke ptr (Point {..}) = pokeByteOff ptr 0 row >> pokeByteOff ptr 4 column + +foreign import ccall unsafe "extract_comments" + extract_comments :: + Ptr Language -> + CString -> + Ptr (Ptr Node) -> + Ptr CInt -> + IO () foreign import ccall unsafe "tree_sitter_haskell" tree_sitter_haskell :: Ptr Language -- cgit v1.2.3