aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-15 07:40:33 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-18 05:26:24 +0100
commit0d96613d9aa41f93ebb440bb1aa383456b49f28f (patch)
tree8338371bcdeb58957f3b312517cebc9763b380ba
parent4013b920f51790a88b5afce5be72c52b8cb2adc6 (diff)
feat: drop haskell-tree-sitter
Drop haskell-tree-sitter in favor of custom bindings to tree-sitter. haskell-tree-sitter is outdated and seems unmaintained. The implementation add low-level bindings to tree-sitter and traverses the AST in Haskell. We suspect that many FFI calls are more expensive than performing just a single API call to a C function that does the traversal. This will be addressed in upcoming commits. @prerequisite-for add-languages-elm-shell-nix
-rw-r--r--anissue.cabal7
-rw-r--r--app/Comment.hs80
-rw-r--r--app/Comment/Language.hs3
-rw-r--r--app/TreeSitter.hs65
-rw-r--r--app/TreeSitter/bridge.c34
-rw-r--r--default.nix31
6 files changed, 183 insertions, 37 deletions
diff --git a/anissue.cabal b/anissue.cabal
index 74c1478..7043c79 100644
--- a/anissue.cabal
+++ b/anissue.cabal
@@ -95,8 +95,11 @@ executable anissue
Render
Settings
Text.Diff.Extra
+ TreeSitter
Tuple
+ extra-libraries: tree-sitter tree-sitter-haskell
+
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
@@ -124,8 +127,6 @@ executable anissue
terminal-size,
text,
time,
- tree-sitter,
- tree-sitter-haskell,
typed-process,
xdg-basedir,
yaml
@@ -152,3 +153,5 @@ executable anissue
RecordWildCards
TypeFamilies
ViewPatterns
+
+ c-sources: app/TreeSitter/bridge.c
diff --git a/app/Comment.hs b/app/Comment.hs
index 7bd4ad5..63f610a 100644
--- a/app/Comment.hs
+++ b/app/Comment.hs
@@ -25,16 +25,13 @@ import Data.Text.Encoding qualified as T
import Exception qualified as E
import Foreign.C.String
import Foreign.Marshal.Alloc (free, malloc)
-import Foreign.Marshal.Array (mallocArray, peekArray)
-import Foreign.Ptr (nullPtr)
+import Foreign.Ptr (Ptr, nullPtr)
import Foreign.Storable
import GHC.Generics (Generic)
import GHC.Int (Int64)
import Git qualified
import System.FilePath (takeExtension)
-import TreeSitter.Node qualified as S
-import TreeSitter.Parser qualified as S
-import TreeSitter.Tree qualified as S
+import TreeSitter qualified as S
data Comment = Comment
{ text :: T.Text,
@@ -96,51 +93,72 @@ extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment]
extractComments filePath language str' = do
S.withParser (parser language) $ \parser -> do
B.useAsCStringLen str' $ \(str, len) -> do
- tree <- S.ts_parser_parse_string parser nullPtr str len
- S.withRootNode tree $ \node -> do
- map
- ( \n' ->
- let startByte = fromIntegral $ S.nodeStartByte n'
- endByte = fromIntegral $ S.nodeEndByte n'
- text =
+ tree <- S.ts_parser_parse_string parser nullPtr str (fromIntegral len)
+ node <- malloc
+ S.ts_tree_root_node tree node
+ x <-
+ mapM
+ ( \n' -> do
+ startByte <- fromIntegral <$> S.ts_node_start_byte n'
+ endByte <- fromIntegral <$> S.ts_node_end_byte n'
+ let text =
T.decodeUtf8
. B.take (fromIntegral endByte - fromIntegral startByte)
. B.drop (fromIntegral startByte)
$ str'
- startPoint = fromTSPoint (S.nodeStartPoint n')
- endPoint = fromTSPoint (S.nodeEndPoint n')
- fromTSPoint (S.TSPoint {..}) = Point (fromIntegral pointRow + 1) (fromIntegral pointColumn + 1)
- in Comment {..}
+ startPoint <- do
+ point <- malloc
+ S.ts_node_start_point n' point
+ S.Point {..} <- peek point
+ free point
+ pure
+ Point
+ { row = fromIntegral row,
+ column = fromIntegral column
+ }
+ endPoint <- do
+ point <- malloc
+ S.ts_node_end_point n' point
+ S.Point {..} <- peek point
+ free point
+ pure
+ Point
+ { row = fromIntegral row,
+ column = fromIntegral column
+ }
+
+ pure Comment {..}
)
- <$> (commentsFromNodeRec language =<< peek node)
+ =<< (commentsFromNodeRec language node)
+ free node
+ pure x
-commentsFromNodeRec :: Language -> S.Node -> IO [S.Node]
+commentsFromNodeRec :: Language -> Ptr S.Node -> IO [Ptr S.Node]
commentsFromNodeRec language =
(filterM (isCommentNode language) =<<)
. childNodesFromNodeRec
-isCommentNode :: Language -> S.Node -> IO Bool
+isCommentNode :: Language -> Ptr S.Node -> IO Bool
isCommentNode language n =
- (`elem` (nodeTypes language)) <$> peekCString (S.nodeType n)
+ fmap (`elem` (nodeTypes language)) . peekCString =<< S.ts_node_type n
-childNodesFromNodeRec :: S.Node -> IO [S.Node]
+childNodesFromNodeRec :: Ptr S.Node -> IO [Ptr S.Node]
childNodesFromNodeRec n = do
ns' <- childNodesFromNode n
ns <- concat <$> mapM childNodesFromNodeRec ns'
pure $ n : ns
-childNodesFromNode :: S.Node -> IO [S.Node]
+childNodesFromNode :: Ptr S.Node -> IO [Ptr S.Node]
childNodesFromNode n = do
- let numChildren = fromIntegral (S.nodeChildCount n)
- ns <- mallocArray numChildren
- tsNode <- malloc
- poke tsNode (S.nodeTSNode n)
- S.ts_node_copy_child_nodes tsNode ns
- free tsNode
- ns' <- peekArray numChildren ns
- free ns
- pure ns'
+ numChildren <- fromIntegral <$> S.ts_node_named_child_count n
+ mapM
+ ( \k -> do
+ node <- malloc
+ S.ts_node_named_child n k node
+ pure node
+ )
+ [0 .. numChildren - 1]
data CommentStyle
= LineStyle T.Text
diff --git a/app/Comment/Language.hs b/app/Comment/Language.hs
index 67c3413..925cf56 100644
--- a/app/Comment/Language.hs
+++ b/app/Comment/Language.hs
@@ -14,8 +14,7 @@ import Data.Text qualified as T
import Exception qualified as E
import Foreign.Ptr (Ptr)
import GHC.Generics (Generic)
-import TreeSitter.Haskell qualified as S
-import TreeSitter.Language qualified as S
+import TreeSitter qualified as S
data Language
= Haskell
diff --git a/app/TreeSitter.hs b/app/TreeSitter.hs
new file mode 100644
index 0000000..e911d1b
--- /dev/null
+++ b/app/TreeSitter.hs
@@ -0,0 +1,65 @@
+module TreeSitter where
+
+-- | References: [tree-sitter/api.h](https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h)
+
+import Foreign.C.String (CString)
+import Foreign.C.Types (CInt (..))
+import Foreign.Ptr (Ptr)
+import Foreign.Storable (Storable (..), peek)
+
+data Parser
+
+data Language
+
+data Tree = Tree
+
+data Node = Node
+
+instance Storable Node where
+ sizeOf _ = 32
+
+data Point = Point
+ { row :: CInt,
+ column :: CInt
+ } deriving (Show)
+
+instance Storable Point where
+ sizeOf _ = 8
+ alignment _ = 8
+ peek p = Point <$> peekByteOff p 0 <*> peekByteOff p 4
+
+withParser :: Ptr Language -> (Ptr Parser -> IO a) -> IO a
+withParser l f = do
+ p <- ts_parser_new
+ ts_parser_set_language p l
+ x <- f p
+ ts_parser_delete p
+ pure x
+
+foreign import ccall unsafe "ts_node_start_point_p" ts_node_start_point :: Ptr Node -> Ptr Point -> IO ()
+
+foreign import ccall unsafe "ts_node_end_point_p" ts_node_end_point :: Ptr Node -> Ptr Point -> IO ()
+
+foreign import ccall unsafe "ts_node_start_byte_p" ts_node_start_byte :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_node_end_byte_p" ts_node_end_byte :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_node_type_p" ts_node_type :: Ptr Node -> IO CString
+
+foreign import ccall unsafe "ts_node_named_child_p" ts_node_named_child :: Ptr Node -> CInt -> Ptr Node -> IO ()
+
+foreign import ccall unsafe "ts_node_named_child_count_p" ts_node_named_child_count :: Ptr Node -> IO CInt
+
+foreign import ccall unsafe "ts_tree_root_node_p" ts_tree_root_node :: Ptr Tree -> Ptr Node -> IO ()
+
+foreign import ccall unsafe "ts_tree_delete" ts_tree_delete :: Ptr Tree -> IO ()
+
+foreign import ccall unsafe "ts_parser_parse_string" ts_parser_parse_string :: Ptr Parser -> Ptr Tree -> CString -> CInt -> IO (Ptr Tree)
+
+foreign import ccall unsafe "ts_parser_new" ts_parser_new :: IO (Ptr Parser)
+
+foreign import ccall unsafe "ts_parser_delete" ts_parser_delete :: Ptr Parser -> IO ()
+
+foreign import ccall unsafe "ts_parser_set_language" ts_parser_set_language :: Ptr Parser -> Ptr Language -> IO ()
+
+foreign import ccall unsafe "tree_sitter_haskell" tree_sitter_haskell :: Ptr Language
diff --git a/app/TreeSitter/bridge.c b/app/TreeSitter/bridge.c
new file mode 100644
index 0000000..904c88e
--- /dev/null
+++ b/app/TreeSitter/bridge.c
@@ -0,0 +1,34 @@
+#include "tree_sitter/api.h"
+#include "string.h"
+
+void ts_tree_root_node_p(TSTree *tree, TSNode *node) {
+ (*node) = ts_tree_root_node(tree);
+}
+
+uint32_t ts_node_named_child_count_p(TSNode *node) {
+ return ts_node_named_child_count(*node);
+}
+
+uint32_t ts_node_start_byte_p(TSNode *node) {
+ return ts_node_start_byte(*node);
+}
+
+uint32_t ts_node_end_byte_p(TSNode *node) {
+ return ts_node_end_byte(*node);
+}
+
+uint32_t ts_node_start_point_p(TSNode *node, TSPoint *point) {
+ (*point) = ts_node_start_point(*node);
+}
+
+uint32_t ts_node_end_point_p(TSNode *node, TSPoint *point) {
+ (*point) = ts_node_end_point(*node);
+}
+
+const char* ts_node_type_p(TSNode *node) {
+ return ts_node_type(*node);
+}
+
+void ts_node_named_child_p(TSNode* self, uint32_t child_index, TSNode* node) {
+ (*node) = ts_node_named_child(*self, child_index);
+}
diff --git a/default.nix b/default.nix
index 59d560e..9667ef6 100644
--- a/default.nix
+++ b/default.nix
@@ -12,6 +12,25 @@
});
})
(self: super: {
+ tree-sitter-grammars = pkgs.lib.mapAttrs
+ (name: grammar:
+ if self.lib.isDerivation grammar then
+ pkgs.stdenv.mkDerivation
+ {
+ inherit (grammar) pname version;
+ phases = [ "installPhase" ];
+ installPhase = ''
+ mkdir -p $out/lib
+ cp ${grammar}/parser $out/lib/lib${name}.so
+ '';
+ }
+ else
+ grammar
+
+ )
+ super.tree-sitter-grammars;
+ })
+ (self: super: {
anissue = pkgs.writers.writeDashBin "anissue" ''
set -efu
exec cabal run anissue -- "$@"
@@ -25,7 +44,10 @@ let
haskellPackages = pkgs.haskellPackages.override {
overrides = self: super: {
- anissue = (super.callCabal2nix "anissue" ./. { }).overrideAttrs (oldAttrs: rec {
+ anissue = (super.callCabal2nix "anissue" ./. {
+ inherit (pkgs) tree-sitter;
+ inherit (pkgs.tree-sitter-grammars) tree-sitter-haskell;
+ }).overrideAttrs (oldAttrs: rec {
nativeBuildInputs = [ pkgs.installShellFiles ];
buildInputs = oldAttrs.buildInputs or [ ] ++ [ pkgs.makeWrapper ];
passthru = oldAttrs.passthru // {
@@ -63,7 +85,12 @@ rec {
pkgs.ghcid
pkgs.haskell-language-server
pkgs.tree-sitter
- ] ++ anissue.passthru.dependencies;
+ ]
+ ++ (
+ pkgs.lib.filter pkgs.lib.isDerivation
+ (pkgs.lib.attrValues pkgs.tree-sitter-grammars)
+ )
+ ++ anissue.passthru.dependencies;
withHoogle = true;
withHaddock = true;
shellHook = ''