From 0d96613d9aa41f93ebb440bb1aa383456b49f28f Mon Sep 17 00:00:00 2001
From: Alexander Foremny <aforemny@posteo.de>
Date: Fri, 15 Dec 2023 07:40:33 +0100
Subject: feat: drop haskell-tree-sitter

Drop haskell-tree-sitter in favor of custom bindings to tree-sitter.
haskell-tree-sitter is outdated and seems unmaintained.

The implementation add low-level bindings to tree-sitter and traverses
the AST in Haskell.

We suspect that many FFI calls are more expensive than performing just a
single API call to a C function that does the traversal.

This will be addressed in upcoming commits.

@prerequisite-for add-languages-elm-shell-nix
---
 app/Comment.hs          | 80 ++++++++++++++++++++++++++++++-------------------
 app/Comment/Language.hs |  3 +-
 app/TreeSitter.hs       | 65 ++++++++++++++++++++++++++++++++++++++++
 app/TreeSitter/bridge.c | 34 +++++++++++++++++++++
 4 files changed, 149 insertions(+), 33 deletions(-)
 create mode 100644 app/TreeSitter.hs
 create mode 100644 app/TreeSitter/bridge.c

(limited to 'app')

diff --git a/app/Comment.hs b/app/Comment.hs
index 7bd4ad5..63f610a 100644
--- a/app/Comment.hs
+++ b/app/Comment.hs
@@ -25,16 +25,13 @@ import Data.Text.Encoding qualified as T
 import Exception qualified as E
 import Foreign.C.String
 import Foreign.Marshal.Alloc (free, malloc)
-import Foreign.Marshal.Array (mallocArray, peekArray)
-import Foreign.Ptr (nullPtr)
+import Foreign.Ptr (Ptr, nullPtr)
 import Foreign.Storable
 import GHC.Generics (Generic)
 import GHC.Int (Int64)
 import Git qualified
 import System.FilePath (takeExtension)
-import TreeSitter.Node qualified as S
-import TreeSitter.Parser qualified as S
-import TreeSitter.Tree qualified as S
+import TreeSitter qualified as S
 
 data Comment = Comment
   { text :: T.Text,
@@ -96,51 +93,72 @@ extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment]
 extractComments filePath language str' = do
   S.withParser (parser language) $ \parser -> do
     B.useAsCStringLen str' $ \(str, len) -> do
-      tree <- S.ts_parser_parse_string parser nullPtr str len
-      S.withRootNode tree $ \node -> do
-        map
-          ( \n' ->
-              let startByte = fromIntegral $ S.nodeStartByte n'
-                  endByte = fromIntegral $ S.nodeEndByte n'
-                  text =
+      tree <- S.ts_parser_parse_string parser nullPtr str (fromIntegral len)
+      node <- malloc
+      S.ts_tree_root_node tree node
+      x <-
+        mapM
+          ( \n' -> do
+              startByte <- fromIntegral <$> S.ts_node_start_byte n'
+              endByte <- fromIntegral <$> S.ts_node_end_byte n'
+              let text =
                     T.decodeUtf8
                       . B.take (fromIntegral endByte - fromIntegral startByte)
                       . B.drop (fromIntegral startByte)
                       $ str'
-                  startPoint = fromTSPoint (S.nodeStartPoint n')
-                  endPoint = fromTSPoint (S.nodeEndPoint n')
 
-                  fromTSPoint (S.TSPoint {..}) = Point (fromIntegral pointRow + 1) (fromIntegral pointColumn + 1)
-               in Comment {..}
+              startPoint <- do
+                point <- malloc
+                S.ts_node_start_point n' point
+                S.Point {..} <- peek point
+                free point
+                pure
+                  Point
+                    { row = fromIntegral row,
+                      column = fromIntegral column
+                    }
+              endPoint <- do
+                point <- malloc
+                S.ts_node_end_point n' point
+                S.Point {..} <- peek point
+                free point
+                pure
+                  Point
+                    { row = fromIntegral row,
+                      column = fromIntegral column
+                    }
+
+              pure Comment {..}
           )
-          <$> (commentsFromNodeRec language =<< peek node)
+          =<< (commentsFromNodeRec language node)
+      free node
+      pure x
 
-commentsFromNodeRec :: Language -> S.Node -> IO [S.Node]
+commentsFromNodeRec :: Language -> Ptr S.Node -> IO [Ptr S.Node]
 commentsFromNodeRec language =
   (filterM (isCommentNode language) =<<)
     . childNodesFromNodeRec
 
-isCommentNode :: Language -> S.Node -> IO Bool
+isCommentNode :: Language -> Ptr S.Node -> IO Bool
 isCommentNode language n =
-  (`elem` (nodeTypes language)) <$> peekCString (S.nodeType n)
+  fmap (`elem` (nodeTypes language)) . peekCString =<< S.ts_node_type n
 
-childNodesFromNodeRec :: S.Node -> IO [S.Node]
+childNodesFromNodeRec :: Ptr S.Node -> IO [Ptr S.Node]
 childNodesFromNodeRec n = do
   ns' <- childNodesFromNode n
   ns <- concat <$> mapM childNodesFromNodeRec ns'
   pure $ n : ns
 
-childNodesFromNode :: S.Node -> IO [S.Node]
+childNodesFromNode :: Ptr S.Node -> IO [Ptr S.Node]
 childNodesFromNode n = do
-  let numChildren = fromIntegral (S.nodeChildCount n)
-  ns <- mallocArray numChildren
-  tsNode <- malloc
-  poke tsNode (S.nodeTSNode n)
-  S.ts_node_copy_child_nodes tsNode ns
-  free tsNode
-  ns' <- peekArray numChildren ns
-  free ns
-  pure ns'
+  numChildren <- fromIntegral <$> S.ts_node_named_child_count n
+  mapM
+    ( \k -> do
+        node <- malloc
+        S.ts_node_named_child n k node
+        pure node
+    )
+    [0 .. numChildren - 1]
 
 data CommentStyle
   = LineStyle T.Text
diff --git a/app/Comment/Language.hs b/app/Comment/Language.hs
index 67c3413..925cf56 100644
--- a/app/Comment/Language.hs
+++ b/app/Comment/Language.hs
@@ -14,8 +14,7 @@ import Data.Text qualified as T
 import Exception qualified as E
 import Foreign.Ptr (Ptr)
 import GHC.Generics (Generic)
-import TreeSitter.Haskell qualified as S
-import TreeSitter.Language qualified as S
+import TreeSitter qualified as S
 
 data Language
   = Haskell
diff --git a/app/TreeSitter.hs b/app/TreeSitter.hs
new file mode 100644
index 0000000..e911d1b
--- /dev/null
+++ b/app/TreeSitter.hs
@@ -0,0 +1,65 @@
+module TreeSitter where
+
+-- | References: [tree-sitter/api.h](https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h)
+
+import Foreign.C.String (CString)
+import Foreign.C.Types (CInt (..))
+import Foreign.Ptr (Ptr)
+import Foreign.Storable (Storable (..), peek)
+
+data Parser
+
+data Language
+
+data Tree = Tree
+
+data Node = Node
+
+instance Storable Node where
+  sizeOf _ = 32
+
+data Point = Point
+  { row :: CInt,
+    column :: CInt
+  } deriving (Show)
+
+instance Storable Point where
+  sizeOf _ = 8
+  alignment _ = 8
+  peek p = Point <$> peekByteOff p 0 <*> peekByteOff p 4
+
+withParser :: Ptr Language -> (Ptr Parser -> IO a) -> IO a
+withParser l f = do
+  p <- ts_parser_new
+  ts_parser_set_language p l
+  x <- f p
+  ts_parser_delete p
+  pure x
+
+foreign import ccall unsafe "ts_node_start_point_p" ts_node_start_point :: Ptr Node -> Ptr Point -> IO ()
+
+foreign import ccall unsafe "ts_node_end_point_p" ts_node_end_point :: Ptr Node -> Ptr Point -> IO ()
+
+foreign import ccall unsafe "ts_node_start_byte_p" ts_node_start_byte :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_node_end_byte_p" ts_node_end_byte :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_node_type_p" ts_node_type :: Ptr Node -> IO CString
+
+foreign import ccall unsafe "ts_node_named_child_p" ts_node_named_child :: Ptr Node -> CInt -> Ptr Node -> IO ()
+
+foreign import ccall unsafe "ts_node_named_child_count_p" ts_node_named_child_count :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_tree_root_node_p" ts_tree_root_node :: Ptr Tree -> Ptr Node -> IO ()
+
+foreign import ccall unsafe "ts_tree_delete" ts_tree_delete :: Ptr Tree -> IO ()
+
+foreign import ccall unsafe "ts_parser_parse_string" ts_parser_parse_string :: Ptr Parser -> Ptr Tree -> CString -> CInt -> IO (Ptr Tree)
+
+foreign import ccall unsafe "ts_parser_new" ts_parser_new :: IO (Ptr Parser)
+
+foreign import ccall unsafe "ts_parser_delete" ts_parser_delete :: Ptr Parser -> IO ()
+
+foreign import ccall unsafe "ts_parser_set_language" ts_parser_set_language :: Ptr Parser -> Ptr Language -> IO ()
+
+foreign import ccall unsafe "tree_sitter_haskell" tree_sitter_haskell :: Ptr Language
diff --git a/app/TreeSitter/bridge.c b/app/TreeSitter/bridge.c
new file mode 100644
index 0000000..904c88e
--- /dev/null
+++ b/app/TreeSitter/bridge.c
@@ -0,0 +1,34 @@
+#include "tree_sitter/api.h"
+#include "string.h"
+
+void ts_tree_root_node_p(TSTree *tree, TSNode *node) {
+  (*node) = ts_tree_root_node(tree);
+}
+
+uint32_t ts_node_named_child_count_p(TSNode *node) {
+  return ts_node_named_child_count(*node);
+}
+
+uint32_t ts_node_start_byte_p(TSNode *node) {
+  return ts_node_start_byte(*node);
+}
+
+uint32_t ts_node_end_byte_p(TSNode *node) {
+  return ts_node_end_byte(*node);
+}
+
+uint32_t ts_node_start_point_p(TSNode *node, TSPoint *point) {
+  (*point) = ts_node_start_point(*node);
+}
+
+uint32_t ts_node_end_point_p(TSNode *node, TSPoint *point) {
+  (*point) = ts_node_end_point(*node);
+}
+
+const char* ts_node_type_p(TSNode *node) {
+  return ts_node_type(*node);
+}
+
+void ts_node_named_child_p(TSNode* self, uint32_t child_index, TSNode* node) {
+  (*node) = ts_node_named_child(*self, child_index);
+}
-- 
cgit v1.2.3