aboutsummaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-15 07:40:33 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-18 05:26:24 +0100
commit0d96613d9aa41f93ebb440bb1aa383456b49f28f (patch)
tree8338371bcdeb58957f3b312517cebc9763b380ba /app
parent4013b920f51790a88b5afce5be72c52b8cb2adc6 (diff)
feat: drop haskell-tree-sitter
Drop haskell-tree-sitter in favor of custom bindings to tree-sitter. haskell-tree-sitter is outdated and seems unmaintained. The implementation add low-level bindings to tree-sitter and traverses the AST in Haskell. We suspect that many FFI calls are more expensive than performing just a single API call to a C function that does the traversal. This will be addressed in upcoming commits. @prerequisite-for add-languages-elm-shell-nix
Diffstat (limited to 'app')
-rw-r--r--app/Comment.hs80
-rw-r--r--app/Comment/Language.hs3
-rw-r--r--app/TreeSitter.hs65
-rw-r--r--app/TreeSitter/bridge.c34
4 files changed, 149 insertions, 33 deletions
diff --git a/app/Comment.hs b/app/Comment.hs
index 7bd4ad5..63f610a 100644
--- a/app/Comment.hs
+++ b/app/Comment.hs
@@ -25,16 +25,13 @@ import Data.Text.Encoding qualified as T
import Exception qualified as E
import Foreign.C.String
import Foreign.Marshal.Alloc (free, malloc)
-import Foreign.Marshal.Array (mallocArray, peekArray)
-import Foreign.Ptr (nullPtr)
+import Foreign.Ptr (Ptr, nullPtr)
import Foreign.Storable
import GHC.Generics (Generic)
import GHC.Int (Int64)
import Git qualified
import System.FilePath (takeExtension)
-import TreeSitter.Node qualified as S
-import TreeSitter.Parser qualified as S
-import TreeSitter.Tree qualified as S
+import TreeSitter qualified as S
data Comment = Comment
{ text :: T.Text,
@@ -96,51 +93,72 @@ extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment]
extractComments filePath language str' = do
S.withParser (parser language) $ \parser -> do
B.useAsCStringLen str' $ \(str, len) -> do
- tree <- S.ts_parser_parse_string parser nullPtr str len
- S.withRootNode tree $ \node -> do
- map
- ( \n' ->
- let startByte = fromIntegral $ S.nodeStartByte n'
- endByte = fromIntegral $ S.nodeEndByte n'
- text =
+ tree <- S.ts_parser_parse_string parser nullPtr str (fromIntegral len)
+ node <- malloc
+ S.ts_tree_root_node tree node
+ x <-
+ mapM
+ ( \n' -> do
+ startByte <- fromIntegral <$> S.ts_node_start_byte n'
+ endByte <- fromIntegral <$> S.ts_node_end_byte n'
+ let text =
T.decodeUtf8
. B.take (fromIntegral endByte - fromIntegral startByte)
. B.drop (fromIntegral startByte)
$ str'
- startPoint = fromTSPoint (S.nodeStartPoint n')
- endPoint = fromTSPoint (S.nodeEndPoint n')
- fromTSPoint (S.TSPoint {..}) = Point (fromIntegral pointRow + 1) (fromIntegral pointColumn + 1)
- in Comment {..}
+ startPoint <- do
+ point <- malloc
+ S.ts_node_start_point n' point
+ S.Point {..} <- peek point
+ free point
+ pure
+ Point
+ { row = fromIntegral row,
+ column = fromIntegral column
+ }
+ endPoint <- do
+ point <- malloc
+ S.ts_node_end_point n' point
+ S.Point {..} <- peek point
+ free point
+ pure
+ Point
+ { row = fromIntegral row,
+ column = fromIntegral column
+ }
+
+ pure Comment {..}
)
- <$> (commentsFromNodeRec language =<< peek node)
+ =<< (commentsFromNodeRec language node)
+ free node
+ pure x
-commentsFromNodeRec :: Language -> S.Node -> IO [S.Node]
+commentsFromNodeRec :: Language -> Ptr S.Node -> IO [Ptr S.Node]
commentsFromNodeRec language =
(filterM (isCommentNode language) =<<)
. childNodesFromNodeRec
-isCommentNode :: Language -> S.Node -> IO Bool
+isCommentNode :: Language -> Ptr S.Node -> IO Bool
isCommentNode language n =
- (`elem` (nodeTypes language)) <$> peekCString (S.nodeType n)
+ fmap (`elem` (nodeTypes language)) . peekCString =<< S.ts_node_type n
-childNodesFromNodeRec :: S.Node -> IO [S.Node]
+childNodesFromNodeRec :: Ptr S.Node -> IO [Ptr S.Node]
childNodesFromNodeRec n = do
ns' <- childNodesFromNode n
ns <- concat <$> mapM childNodesFromNodeRec ns'
pure $ n : ns
-childNodesFromNode :: S.Node -> IO [S.Node]
+childNodesFromNode :: Ptr S.Node -> IO [Ptr S.Node]
childNodesFromNode n = do
- let numChildren = fromIntegral (S.nodeChildCount n)
- ns <- mallocArray numChildren
- tsNode <- malloc
- poke tsNode (S.nodeTSNode n)
- S.ts_node_copy_child_nodes tsNode ns
- free tsNode
- ns' <- peekArray numChildren ns
- free ns
- pure ns'
+ numChildren <- fromIntegral <$> S.ts_node_named_child_count n
+ mapM
+ ( \k -> do
+ node <- malloc
+ S.ts_node_named_child n k node
+ pure node
+ )
+ [0 .. numChildren - 1]
data CommentStyle
= LineStyle T.Text
diff --git a/app/Comment/Language.hs b/app/Comment/Language.hs
index 67c3413..925cf56 100644
--- a/app/Comment/Language.hs
+++ b/app/Comment/Language.hs
@@ -14,8 +14,7 @@ import Data.Text qualified as T
import Exception qualified as E
import Foreign.Ptr (Ptr)
import GHC.Generics (Generic)
-import TreeSitter.Haskell qualified as S
-import TreeSitter.Language qualified as S
+import TreeSitter qualified as S
data Language
= Haskell
diff --git a/app/TreeSitter.hs b/app/TreeSitter.hs
new file mode 100644
index 0000000..e911d1b
--- /dev/null
+++ b/app/TreeSitter.hs
@@ -0,0 +1,65 @@
+module TreeSitter where
+
+-- | References: [tree-sitter/api.h](https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h)
+
+import Foreign.C.String (CString)
+import Foreign.C.Types (CInt (..))
+import Foreign.Ptr (Ptr)
+import Foreign.Storable (Storable (..), peek)
+
+data Parser
+
+data Language
+
+data Tree = Tree
+
+data Node = Node
+
+instance Storable Node where
+ sizeOf _ = 32
+
+data Point = Point
+ { row :: CInt,
+ column :: CInt
+ } deriving (Show)
+
+instance Storable Point where
+ sizeOf _ = 8
+ alignment _ = 8
+ peek p = Point <$> peekByteOff p 0 <*> peekByteOff p 4
+
+withParser :: Ptr Language -> (Ptr Parser -> IO a) -> IO a
+withParser l f = do
+ p <- ts_parser_new
+ ts_parser_set_language p l
+ x <- f p
+ ts_parser_delete p
+ pure x
+
+foreign import ccall unsafe "ts_node_start_point_p" ts_node_start_point :: Ptr Node -> Ptr Point -> IO ()
+
+foreign import ccall unsafe "ts_node_end_point_p" ts_node_end_point :: Ptr Node -> Ptr Point -> IO ()
+
+foreign import ccall unsafe "ts_node_start_byte_p" ts_node_start_byte :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_node_end_byte_p" ts_node_end_byte :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_node_type_p" ts_node_type :: Ptr Node -> IO CString
+
+foreign import ccall unsafe "ts_node_named_child_p" ts_node_named_child :: Ptr Node -> CInt -> Ptr Node -> IO ()
+
+foreign import ccall unsafe "ts_node_named_child_count_p" ts_node_named_child_count :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_tree_root_node_p" ts_tree_root_node :: Ptr Tree -> Ptr Node -> IO ()
+
+foreign import ccall unsafe "ts_tree_delete" ts_tree_delete :: Ptr Tree -> IO ()
+
+foreign import ccall unsafe "ts_parser_parse_string" ts_parser_parse_string :: Ptr Parser -> Ptr Tree -> CString -> CInt -> IO (Ptr Tree)
+
+foreign import ccall unsafe "ts_parser_new" ts_parser_new :: IO (Ptr Parser)
+
+foreign import ccall unsafe "ts_parser_delete" ts_parser_delete :: Ptr Parser -> IO ()
+
+foreign import ccall unsafe "ts_parser_set_language" ts_parser_set_language :: Ptr Parser -> Ptr Language -> IO ()
+
+foreign import ccall unsafe "tree_sitter_haskell" tree_sitter_haskell :: Ptr Language
diff --git a/app/TreeSitter/bridge.c b/app/TreeSitter/bridge.c
new file mode 100644
index 0000000..904c88e
--- /dev/null
+++ b/app/TreeSitter/bridge.c
@@ -0,0 +1,34 @@
+#include "tree_sitter/api.h"
+#include "string.h"
+
+void ts_tree_root_node_p(TSTree *tree, TSNode *node) {
+ (*node) = ts_tree_root_node(tree);
+}
+
+uint32_t ts_node_named_child_count_p(TSNode *node) {
+ return ts_node_named_child_count(*node);
+}
+
+uint32_t ts_node_start_byte_p(TSNode *node) {
+ return ts_node_start_byte(*node);
+}
+
+uint32_t ts_node_end_byte_p(TSNode *node) {
+ return ts_node_end_byte(*node);
+}
+
+uint32_t ts_node_start_point_p(TSNode *node, TSPoint *point) {
+ (*point) = ts_node_start_point(*node);
+}
+
+uint32_t ts_node_end_point_p(TSNode *node, TSPoint *point) {
+ (*point) = ts_node_end_point(*node);
+}
+
+const char* ts_node_type_p(TSNode *node) {
+ return ts_node_type(*node);
+}
+
+void ts_node_named_child_p(TSNode* self, uint32_t child_index, TSNode* node) {
+ (*node) = ts_node_named_child(*self, child_index);
+}