From 24700382f2e1f9b1dac1de3e8170294cf0c818d4 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Fri, 29 Sep 2023 11:33:15 +0200 Subject: fix extracting body for shell issues The underlying problem is that tree-sitter does not allow for matching sibling nodes. So any issues comprised of line comments (vs. block comments) suffered from this issue, independent of the language (shell). Thus, we manually merge such sibling matches after running tree-grepper. --- src/extract-generic.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 5 deletions(-) (limited to 'src/extract-generic.sh') diff --git a/src/extract-generic.sh b/src/extract-generic.sh index cadbb30..f4aef22 100755 --- a/src/extract-generic.sh +++ b/src/extract-generic.sh @@ -4,12 +4,57 @@ set -efu input_file=${1-/dev/stdin} +# note: merge sibling comments into matched comment blocks, as tree-sitter does +# not allow for matching sibling nodes +function merge_matches() { + matches=$(jq -c .matches[]) + new_matches= + last_match= + new_match= + while read -r cur_match; do + if test -z "$new_match"; then + new_match=$cur_match + fi + if ! test -z "$last_match"; then + last_end_row=$(echo "$last_match" | jq -r .end.row) + cur_start_row=$(echo "$cur_match" | jq -r .start.row) + if test "$(($last_end_row + 1))" = "$cur_start_row"; then + new_text=$(echo "$new_match" | jq -r .text) + cur_text=$(echo "$cur_match" | jq -r .text) + text=$(printf "%s\n%s" "$new_text" "$cur_text") + end=$(echo "$cur_match" | jq -c .end) + new_match=$( + echo "$new_match" | + # XXX "$end" seems to be forbidden as variable name + jq -c \ + --argjson end_ "$end" \ + --arg text "$text" \ + '. + { + "end": $end_, + "text": $text + }' + ) + else + new_matches=$(printf "%s\n%s" "$new_matches" "$new_match") + new_match=$cur_match + fi + fi + last_match=$cur_match + done <<< "$matches" + if ! test -z "$new_match"; then + new_matches=$(printf "%s\n%s" "$new_matches" "$new_match") + fi + new_matches=$(echo "$new_matches" | jq -cs .) + echo "$line" | jq -c --argjson matches "$new_matches" '. + { "matches": $matches }' +} + tree-grepper \ --query "$TREE_GREPPER_LANGUAGE" "$TREE_GREPPER_QUERY" \ --format json $input_file | - jq 'sort_by(.file)' | - jq '.[]' --indent 0 | + jq -c '.[]' | while read -r line; do + echo "$line" | merge_matches + done | while read -r line; do file=$(echo "$line" | jq -r .file) file_type=$(echo "$line" | jq -r .file_type) items=$(echo "$line" | @@ -32,19 +77,19 @@ tree-grepper \ text=$(echo "$item" | jq .match.text -r | - sed 's/^'"$LINE_COMMENT_START"' *TODO *//' | + sed 's/^'"$LINE_COMMENT_START"'\( *TODO *\| \?\)//g' | { if test -z "${BLOCK_COMMENT_START-}"; then cat else - sed 's/^'"$BLOCK_COMMENT_START"' *TODO *//' + sed 's/^'"$BLOCK_COMMENT_START"'\( *TODO *\| \?\)//' fi } | { if test -z "${BLOCK_COMMENT_END-}"; then cat else - sed 's/ *'"$BLOCK_COMMENT_END"'$//' + sed 's/ *'"$BLOCK_COMMENT_END"' *$//' fi } ) -- cgit v1.2.3