aboutsummaryrefslogtreecommitdiffstats
path: root/app/tree_sitter.c
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-18 02:41:56 +0100
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-12-18 05:27:40 +0100
commit10c764c022b1e46c84a3b4d3743a58bd1072b5a5 (patch)
tree9e37cf690bbeb8e430ddf4340b08f55c6fa78954 /app/tree_sitter.c
parent0d96613d9aa41f93ebb440bb1aa383456b49f28f (diff)
feat: limit the number of FFI calls for extracting comments
This replaces the tree-sitter bindings with a call to a single C function that traverses the AST. We expect the query API to be slower than manually traversing the tree for this particular use case. This will be addressed in an upcoming commit. @prerequisite-for add-languages-elm-shell-nix
Diffstat (limited to 'app/tree_sitter.c')
-rw-r--r--app/tree_sitter.c85
1 files changed, 85 insertions, 0 deletions
diff --git a/app/tree_sitter.c b/app/tree_sitter.c
new file mode 100644
index 0000000..d0f9fa8
--- /dev/null
+++ b/app/tree_sitter.c
@@ -0,0 +1,85 @@
+#include "string.h"
+#include "tree_sitter/api.h"
+
+typedef struct Node {
+ TSPoint start_point;
+ TSPoint end_point;
+ uint32_t start_byte;
+ uint32_t end_byte;
+} Node;
+
+void extract_comments(
+ TSLanguage* language,
+ char* input,
+ Node** out,
+ uint32_t* out_len
+) {
+ TSParser* parser = ts_parser_new();
+ ts_parser_set_language(parser, language);
+ TSTree* tree = ts_parser_parse_string(parser, NULL, input, strlen(input));
+ TSNode root_node = ts_tree_root_node(tree);
+
+ char* pattern = "(comment) @comment";
+ uint32_t error_offset;
+ TSQueryError error_type;
+ TSQuery* query = ts_query_new(language, pattern, strlen(pattern), &error_offset, &error_type);
+ TSQueryCursor* query_cursor = ts_query_cursor_new();
+ ts_query_cursor_exec(query_cursor, query, root_node);
+
+ TSQueryMatch query_match;
+ uint32_t n_max = 1024;
+ *out = malloc(sizeof(Node) * n_max);
+ Node* node = *out;
+ uint32_t n = 0;
+ while (ts_query_cursor_next_match(query_cursor, &query_match)) {
+ if (n >= n_max) {
+ n_max *= 2;
+ *out = realloc(*out, sizeof(Node) * n_max);
+ node = *out + n;
+ }
+ TSNode captured_node = query_match.captures[0].node;
+ node->start_byte = ts_node_start_byte(captured_node);
+ node->end_byte = ts_node_end_byte(captured_node);
+ node->start_point = ts_node_start_point(captured_node);
+ node->end_point = ts_node_end_point(captured_node);
+ node++; n++;
+ }
+ *out_len = n;
+
+ ts_query_cursor_delete(query_cursor);
+ ts_query_delete(query);
+ ts_tree_delete(tree);
+ ts_parser_delete(parser);
+}
+
+void ts_tree_root_node_p(TSTree *tree, TSNode *node) {
+ (*node) = ts_tree_root_node(tree);
+}
+
+uint32_t ts_node_named_child_count_p(TSNode *node) {
+ return ts_node_named_child_count(*node);
+}
+
+uint32_t ts_node_start_byte_p(TSNode *node) {
+ return ts_node_start_byte(*node);
+}
+
+uint32_t ts_node_end_byte_p(TSNode *node) {
+ return ts_node_end_byte(*node);
+}
+
+uint32_t ts_node_start_point_p(TSNode *node, TSPoint *point) {
+ (*point) = ts_node_start_point(*node);
+}
+
+uint32_t ts_node_end_point_p(TSNode *node, TSPoint *point) {
+ (*point) = ts_node_end_point(*node);
+}
+
+const char* ts_node_type_p(TSNode *node) {
+ return ts_node_type(*node);
+}
+
+void ts_node_named_child_p(TSNode* self, uint32_t child_index, TSNode* node) {
+ (*node) = ts_node_named_child(*self, child_index);
+}