diff options
author | Alexander Foremny <aforemny@posteo.de> | 2023-09-29 11:33:15 +0200 |
---|---|---|
committer | Alexander Foremny <aforemny@posteo.de> | 2023-09-29 11:37:01 +0200 |
commit | 24700382f2e1f9b1dac1de3e8170294cf0c818d4 (patch) | |
tree | 6eb4a8c469912307c7b767f1894ce5504845266f | |
parent | 3592e5b055ad0fcd70d1ae9e4804a4b5c6a77c5d (diff) |
fix extracting body for shell issues
The underlying problem is that tree-sitter does not allow for matching
sibling nodes. So any issues comprised of line comments (vs. block
comments) suffered from this issue, independent of the language (shell).
Thus, we manually merge such sibling matches after running tree-grepper.
-rwxr-xr-x | src/extract-elm.sh | 2 | ||||
-rwxr-xr-x | src/extract-generic.sh | 55 | ||||
-rwxr-xr-x | src/extract-nix.sh | 2 | ||||
-rwxr-xr-x | src/extract-sh.sh | 2 | ||||
-rwxr-xr-x | src/extract.sh | 13 |
5 files changed, 55 insertions, 19 deletions
diff --git a/src/extract-elm.sh b/src/extract-elm.sh index 9fe1d4c..7ab3e17 100755 --- a/src/extract-elm.sh +++ b/src/extract-elm.sh @@ -3,7 +3,7 @@ set -efu TREE_GREPPER_LANGUAGE=elm \ -TREE_GREPPER_QUERY='([(line_comment) (block_comment)]+)' \ +TREE_GREPPER_QUERY='([(line_comment) (block_comment)])' \ LINE_COMMENT_START='--' \ BLOCK_COMMENT_START='{-|\?' \ BLOCK_COMMENT_END='-}' \ diff --git a/src/extract-generic.sh b/src/extract-generic.sh index cadbb30..f4aef22 100755 --- a/src/extract-generic.sh +++ b/src/extract-generic.sh @@ -4,12 +4,57 @@ set -efu input_file=${1-/dev/stdin} +# note: merge sibling comments into matched comment blocks, as tree-sitter does +# not allow for matching sibling nodes +function merge_matches() { + matches=$(jq -c .matches[]) + new_matches= + last_match= + new_match= + while read -r cur_match; do + if test -z "$new_match"; then + new_match=$cur_match + fi + if ! test -z "$last_match"; then + last_end_row=$(echo "$last_match" | jq -r .end.row) + cur_start_row=$(echo "$cur_match" | jq -r .start.row) + if test "$(($last_end_row + 1))" = "$cur_start_row"; then + new_text=$(echo "$new_match" | jq -r .text) + cur_text=$(echo "$cur_match" | jq -r .text) + text=$(printf "%s\n%s" "$new_text" "$cur_text") + end=$(echo "$cur_match" | jq -c .end) + new_match=$( + echo "$new_match" | + # XXX "$end" seems to be forbidden as variable name + jq -c \ + --argjson end_ "$end" \ + --arg text "$text" \ + '. + { + "end": $end_, + "text": $text + }' + ) + else + new_matches=$(printf "%s\n%s" "$new_matches" "$new_match") + new_match=$cur_match + fi + fi + last_match=$cur_match + done <<< "$matches" + if ! test -z "$new_match"; then + new_matches=$(printf "%s\n%s" "$new_matches" "$new_match") + fi + new_matches=$(echo "$new_matches" | jq -cs .) + echo "$line" | jq -c --argjson matches "$new_matches" '. + { "matches": $matches }' +} + tree-grepper \ --query "$TREE_GREPPER_LANGUAGE" "$TREE_GREPPER_QUERY" \ --format json $input_file | - jq 'sort_by(.file)' | - jq '.[]' --indent 0 | + jq -c '.[]' | while read -r line; do + echo "$line" | merge_matches + done | while read -r line; do file=$(echo "$line" | jq -r .file) file_type=$(echo "$line" | jq -r .file_type) items=$(echo "$line" | @@ -32,19 +77,19 @@ tree-grepper \ text=$(echo "$item" | jq .match.text -r | - sed 's/^'"$LINE_COMMENT_START"' *TODO *//' | + sed 's/^'"$LINE_COMMENT_START"'\( *TODO *\| \?\)//g' | { if test -z "${BLOCK_COMMENT_START-}"; then cat else - sed 's/^'"$BLOCK_COMMENT_START"' *TODO *//' + sed 's/^'"$BLOCK_COMMENT_START"'\( *TODO *\| \?\)//' fi } | { if test -z "${BLOCK_COMMENT_END-}"; then cat else - sed 's/ *'"$BLOCK_COMMENT_END"'$//' + sed 's/ *'"$BLOCK_COMMENT_END"' *$//' fi } ) diff --git a/src/extract-nix.sh b/src/extract-nix.sh index 335cbba..575131d 100755 --- a/src/extract-nix.sh +++ b/src/extract-nix.sh @@ -3,7 +3,7 @@ set -efu TREE_GREPPER_LANGUAGE=nix \ -TREE_GREPPER_QUERY='((comment)+)' \ +TREE_GREPPER_QUERY='(comment)' \ LINE_COMMENT_START='#' \ BLOCK_COMMENT_START='\/\*' \ BLOCK_COMMENT_END='\*\/' \ diff --git a/src/extract-sh.sh b/src/extract-sh.sh index c31df74..a8bb1ff 100755 --- a/src/extract-sh.sh +++ b/src/extract-sh.sh @@ -3,6 +3,6 @@ set -efu TREE_GREPPER_LANGUAGE=sh \ -TREE_GREPPER_QUERY='((comment)+)' \ +TREE_GREPPER_QUERY='(comment)' \ LINE_COMMENT_START='#' \ "$(dirname "$0")"/extract-generic.sh "$@" diff --git a/src/extract.sh b/src/extract.sh index 6422694..07346fc 100755 --- a/src/extract.sh +++ b/src/extract.sh @@ -80,17 +80,8 @@ git ls-files --cached --exclude-standard --other | head -n 1) created_at=$(git show $first_commit --no-patch --format=%ad) - heading=$(echo "$text" | sed '/^$/Q' | sed 's/.*TODO //') - # TODO body is not extracted in shell scripts - # - # @assigned aforemny - # @assigned kirchner@posteo.de - # @scheduled 2023-10-03 - body=$(echo "$text" | tail -n +$(($(echo "$heading" | wc -l) + 2)) | \ - awk -F '[^ ]' ' - NR == 1 {n = length($1)} - {sub("^ {1,"n"}", ""); print}' - ) + heading=$(echo "$text" | sed -n '0,/^$/p') + body=$(echo "$text" | tail -n +$(($(echo "$heading" | wc -l) + 2))) echo "$item" | jq -c \ --arg body "$body" \ |