From 24700382f2e1f9b1dac1de3e8170294cf0c818d4 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Fri, 29 Sep 2023 11:33:15 +0200 Subject: fix extracting body for shell issues The underlying problem is that tree-sitter does not allow for matching sibling nodes. So any issues comprised of line comments (vs. block comments) suffered from this issue, independent of the language (shell). Thus, we manually merge such sibling matches after running tree-grepper. --- src/extract-elm.sh | 2 +- src/extract-generic.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++----- src/extract-nix.sh | 2 +- src/extract-sh.sh | 2 +- src/extract.sh | 13 ++---------- 5 files changed, 55 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/extract-elm.sh b/src/extract-elm.sh index 9fe1d4c..7ab3e17 100755 --- a/src/extract-elm.sh +++ b/src/extract-elm.sh @@ -3,7 +3,7 @@ set -efu TREE_GREPPER_LANGUAGE=elm \ -TREE_GREPPER_QUERY='([(line_comment) (block_comment)]+)' \ +TREE_GREPPER_QUERY='([(line_comment) (block_comment)])' \ LINE_COMMENT_START='--' \ BLOCK_COMMENT_START='{-|\?' \ BLOCK_COMMENT_END='-}' \ diff --git a/src/extract-generic.sh b/src/extract-generic.sh index cadbb30..f4aef22 100755 --- a/src/extract-generic.sh +++ b/src/extract-generic.sh @@ -4,12 +4,57 @@ set -efu input_file=${1-/dev/stdin} +# note: merge sibling comments into matched comment blocks, as tree-sitter does +# not allow for matching sibling nodes +function merge_matches() { + matches=$(jq -c .matches[]) + new_matches= + last_match= + new_match= + while read -r cur_match; do + if test -z "$new_match"; then + new_match=$cur_match + fi + if ! test -z "$last_match"; then + last_end_row=$(echo "$last_match" | jq -r .end.row) + cur_start_row=$(echo "$cur_match" | jq -r .start.row) + if test "$(($last_end_row + 1))" = "$cur_start_row"; then + new_text=$(echo "$new_match" | jq -r .text) + cur_text=$(echo "$cur_match" | jq -r .text) + text=$(printf "%s\n%s" "$new_text" "$cur_text") + end=$(echo "$cur_match" | jq -c .end) + new_match=$( + echo "$new_match" | + # XXX "$end" seems to be forbidden as variable name + jq -c \ + --argjson end_ "$end" \ + --arg text "$text" \ + '. + { + "end": $end_, + "text": $text + }' + ) + else + new_matches=$(printf "%s\n%s" "$new_matches" "$new_match") + new_match=$cur_match + fi + fi + last_match=$cur_match + done <<< "$matches" + if ! test -z "$new_match"; then + new_matches=$(printf "%s\n%s" "$new_matches" "$new_match") + fi + new_matches=$(echo "$new_matches" | jq -cs .) + echo "$line" | jq -c --argjson matches "$new_matches" '. + { "matches": $matches }' +} + tree-grepper \ --query "$TREE_GREPPER_LANGUAGE" "$TREE_GREPPER_QUERY" \ --format json $input_file | - jq 'sort_by(.file)' | - jq '.[]' --indent 0 | + jq -c '.[]' | while read -r line; do + echo "$line" | merge_matches + done | while read -r line; do file=$(echo "$line" | jq -r .file) file_type=$(echo "$line" | jq -r .file_type) items=$(echo "$line" | @@ -32,19 +77,19 @@ tree-grepper \ text=$(echo "$item" | jq .match.text -r | - sed 's/^'"$LINE_COMMENT_START"' *TODO *//' | + sed 's/^'"$LINE_COMMENT_START"'\( *TODO *\| \?\)//g' | { if test -z "${BLOCK_COMMENT_START-}"; then cat else - sed 's/^'"$BLOCK_COMMENT_START"' *TODO *//' + sed 's/^'"$BLOCK_COMMENT_START"'\( *TODO *\| \?\)//' fi } | { if test -z "${BLOCK_COMMENT_END-}"; then cat else - sed 's/ *'"$BLOCK_COMMENT_END"'$//' + sed 's/ *'"$BLOCK_COMMENT_END"' *$//' fi } ) diff --git a/src/extract-nix.sh b/src/extract-nix.sh index 335cbba..575131d 100755 --- a/src/extract-nix.sh +++ b/src/extract-nix.sh @@ -3,7 +3,7 @@ set -efu TREE_GREPPER_LANGUAGE=nix \ -TREE_GREPPER_QUERY='((comment)+)' \ +TREE_GREPPER_QUERY='(comment)' \ LINE_COMMENT_START='#' \ BLOCK_COMMENT_START='\/\*' \ BLOCK_COMMENT_END='\*\/' \ diff --git a/src/extract-sh.sh b/src/extract-sh.sh index c31df74..a8bb1ff 100755 --- a/src/extract-sh.sh +++ b/src/extract-sh.sh @@ -3,6 +3,6 @@ set -efu TREE_GREPPER_LANGUAGE=sh \ -TREE_GREPPER_QUERY='((comment)+)' \ +TREE_GREPPER_QUERY='(comment)' \ LINE_COMMENT_START='#' \ "$(dirname "$0")"/extract-generic.sh "$@" diff --git a/src/extract.sh b/src/extract.sh index 6422694..07346fc 100755 --- a/src/extract.sh +++ b/src/extract.sh @@ -80,17 +80,8 @@ git ls-files --cached --exclude-standard --other | head -n 1) created_at=$(git show $first_commit --no-patch --format=%ad) - heading=$(echo "$text" | sed '/^$/Q' | sed 's/.*TODO //') - # TODO body is not extracted in shell scripts - # - # @assigned aforemny - # @assigned kirchner@posteo.de - # @scheduled 2023-10-03 - body=$(echo "$text" | tail -n +$(($(echo "$heading" | wc -l) + 2)) | \ - awk -F '[^ ]' ' - NR == 1 {n = length($1)} - {sub("^ {1,"n"}", ""); print}' - ) + heading=$(echo "$text" | sed -n '0,/^$/p') + body=$(echo "$text" | tail -n +$(($(echo "$heading" | wc -l) + 2))) echo "$item" | jq -c \ --arg body "$body" \ -- cgit v1.2.3