diff options
Diffstat (limited to 'src/extract-generic.sh')
-rwxr-xr-x | src/extract-generic.sh | 55 |
1 files changed, 50 insertions, 5 deletions
diff --git a/src/extract-generic.sh b/src/extract-generic.sh index cadbb30..f4aef22 100755 --- a/src/extract-generic.sh +++ b/src/extract-generic.sh @@ -4,12 +4,57 @@ set -efu input_file=${1-/dev/stdin} +# note: merge sibling comments into matched comment blocks, as tree-sitter does +# not allow for matching sibling nodes +function merge_matches() { + matches=$(jq -c .matches[]) + new_matches= + last_match= + new_match= + while read -r cur_match; do + if test -z "$new_match"; then + new_match=$cur_match + fi + if ! test -z "$last_match"; then + last_end_row=$(echo "$last_match" | jq -r .end.row) + cur_start_row=$(echo "$cur_match" | jq -r .start.row) + if test "$(($last_end_row + 1))" = "$cur_start_row"; then + new_text=$(echo "$new_match" | jq -r .text) + cur_text=$(echo "$cur_match" | jq -r .text) + text=$(printf "%s\n%s" "$new_text" "$cur_text") + end=$(echo "$cur_match" | jq -c .end) + new_match=$( + echo "$new_match" | + # XXX "$end" seems to be forbidden as variable name + jq -c \ + --argjson end_ "$end" \ + --arg text "$text" \ + '. + { + "end": $end_, + "text": $text + }' + ) + else + new_matches=$(printf "%s\n%s" "$new_matches" "$new_match") + new_match=$cur_match + fi + fi + last_match=$cur_match + done <<< "$matches" + if ! test -z "$new_match"; then + new_matches=$(printf "%s\n%s" "$new_matches" "$new_match") + fi + new_matches=$(echo "$new_matches" | jq -cs .) + echo "$line" | jq -c --argjson matches "$new_matches" '. + { "matches": $matches }' +} + tree-grepper \ --query "$TREE_GREPPER_LANGUAGE" "$TREE_GREPPER_QUERY" \ --format json $input_file | - jq 'sort_by(.file)' | - jq '.[]' --indent 0 | + jq -c '.[]' | while read -r line; do + echo "$line" | merge_matches + done | while read -r line; do file=$(echo "$line" | jq -r .file) file_type=$(echo "$line" | jq -r .file_type) items=$(echo "$line" | @@ -32,19 +77,19 @@ tree-grepper \ text=$(echo "$item" | jq .match.text -r | - sed 's/^'"$LINE_COMMENT_START"' *TODO *//' | + sed 's/^'"$LINE_COMMENT_START"'\( *TODO *\| \?\)//g' | { if test -z "${BLOCK_COMMENT_START-}"; then cat else - sed 's/^'"$BLOCK_COMMENT_START"' *TODO *//' + sed 's/^'"$BLOCK_COMMENT_START"'\( *TODO *\| \?\)//' fi } | { if test -z "${BLOCK_COMMENT_END-}"; then cat else - sed 's/ *'"$BLOCK_COMMENT_END"'$//' + sed 's/ *'"$BLOCK_COMMENT_END"' *$//' fi } ) |