aboutsummaryrefslogtreecommitdiffstats
path: root/app/Comment.hs
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-18 02:41:56 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-18 05:27:40 +0100
commit10c764c022b1e46c84a3b4d3743a58bd1072b5a5 (patch)
tree9e37cf690bbeb8e430ddf4340b08f55c6fa78954 /app/Comment.hs
parent0d96613d9aa41f93ebb440bb1aa383456b49f28f (diff)
feat: limit the number of FFI calls for extracting comments
This replaces the tree-sitter bindings with a call to a single C function that traverses the AST. We expect the query API to be slower than manually traversing the tree for this particular use case. This will be addressed in an upcoming commit. @prerequisite-for add-languages-elm-shell-nix
Diffstat (limited to 'app/Comment.hs')
-rw-r--r--app/Comment.hs114
1 files changed, 40 insertions, 74 deletions
diff --git a/app/Comment.hs b/app/Comment.hs
index 63f610a..2769c83 100644
--- a/app/Comment.hs
+++ b/app/Comment.hs
@@ -12,7 +12,6 @@ where
import Comment.Language
import Control.Applicative (liftA2)
import Control.Exception (catch)
-import Control.Monad
import Data.Binary (Binary)
import Data.ByteString qualified as B
import Data.ByteString.Lazy qualified as LB
@@ -23,12 +22,10 @@ import Data.Ord (comparing)
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Exception qualified as E
-import Foreign.C.String
-import Foreign.Marshal.Alloc (free, malloc)
-import Foreign.Ptr (Ptr, nullPtr)
+import Foreign.Marshal.Alloc (alloca, free)
+import Foreign.Marshal.Array (peekArray)
import Foreign.Storable
import GHC.Generics (Generic)
-import GHC.Int (Int64)
import Git qualified
import System.FilePath (takeExtension)
import TreeSitter qualified as S
@@ -36,8 +33,8 @@ import TreeSitter qualified as S
data Comment = Comment
{ text :: T.Text,
language :: Language,
- startByte :: Int64,
- endByte :: Int64,
+ startByte :: Int,
+ endByte :: Int,
startPoint :: Point,
endPoint :: Point,
filePath :: FilePath
@@ -90,75 +87,44 @@ getComments commitHash filePath =
| otherwise = go (N.singleton x : ass) xs
extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment]
-extractComments filePath language str' = do
- S.withParser (parser language) $ \parser -> do
- B.useAsCStringLen str' $ \(str, len) -> do
- tree <- S.ts_parser_parse_string parser nullPtr str (fromIntegral len)
- node <- malloc
- S.ts_tree_root_node tree node
- x <-
- mapM
- ( \n' -> do
- startByte <- fromIntegral <$> S.ts_node_start_byte n'
- endByte <- fromIntegral <$> S.ts_node_end_byte n'
- let text =
- T.decodeUtf8
- . B.take (fromIntegral endByte - fromIntegral startByte)
- . B.drop (fromIntegral startByte)
- $ str'
-
- startPoint <- do
- point <- malloc
- S.ts_node_start_point n' point
- S.Point {..} <- peek point
- free point
- pure
- Point
- { row = fromIntegral row,
- column = fromIntegral column
- }
- endPoint <- do
- point <- malloc
- S.ts_node_end_point n' point
- S.Point {..} <- peek point
- free point
- pure
- Point
- { row = fromIntegral row,
- column = fromIntegral column
+extractComments filePath language str' =
+ alloca $ \nodesPtrPtr -> do
+ alloca $ \numNodesPtr -> do
+ B.useAsCString str' $ \str ->
+ S.extract_comments
+ (parser language)
+ str
+ nodesPtrPtr
+ numNodesPtr
+ numNodes <- peek numNodesPtr
+ nodesPtr <- peek nodesPtrPtr
+ nodes <- peekArray (fromIntegral numNodes) nodesPtr
+ free nodesPtr
+ pure $
+ map
+ ( \node ->
+ let startByte = fromIntegral node.startByte
+ endByte = fromIntegral node.endByte
+ in Comment
+ { startPoint =
+ Point
+ { row = fromIntegral node.startPoint.row + 1,
+ column = fromIntegral node.startPoint.column + 1
+ },
+ endPoint =
+ Point
+ { row = fromIntegral node.endPoint.row + 1,
+ column = fromIntegral node.endPoint.column + 1
+ },
+ text =
+ T.decodeUtf8
+ . B.take (endByte - startByte)
+ . B.drop startByte
+ $ str',
+ ..
}
-
- pure Comment {..}
)
- =<< (commentsFromNodeRec language node)
- free node
- pure x
-
-commentsFromNodeRec :: Language -> Ptr S.Node -> IO [Ptr S.Node]
-commentsFromNodeRec language =
- (filterM (isCommentNode language) =<<)
- . childNodesFromNodeRec
-
-isCommentNode :: Language -> Ptr S.Node -> IO Bool
-isCommentNode language n =
- fmap (`elem` (nodeTypes language)) . peekCString =<< S.ts_node_type n
-
-childNodesFromNodeRec :: Ptr S.Node -> IO [Ptr S.Node]
-childNodesFromNodeRec n = do
- ns' <- childNodesFromNode n
- ns <- concat <$> mapM childNodesFromNodeRec ns'
- pure $ n : ns
-
-childNodesFromNode :: Ptr S.Node -> IO [Ptr S.Node]
-childNodesFromNode n = do
- numChildren <- fromIntegral <$> S.ts_node_named_child_count n
- mapM
- ( \k -> do
- node <- malloc
- S.ts_node_named_child n k node
- pure node
- )
- [0 .. numChildren - 1]
+ nodes
data CommentStyle
= LineStyle T.Text