aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Alexander Foremny <aforemny@posteo.de>2023-09-29 11:33:15 +0200
committerLibravatar Alexander Foremny <aforemny@posteo.de>2023-09-29 11:37:01 +0200
commit24700382f2e1f9b1dac1de3e8170294cf0c818d4 (patch)
tree6eb4a8c469912307c7b767f1894ce5504845266f /src
parent3592e5b055ad0fcd70d1ae9e4804a4b5c6a77c5d (diff)
fix extracting body for shell issues
The underlying problem is that tree-sitter does not allow for matching sibling nodes. So any issues comprised of line comments (vs. block comments) suffered from this issue, independent of the language (shell). Thus, we manually merge such sibling matches after running tree-grepper.
Diffstat (limited to 'src')
-rwxr-xr-xsrc/extract-elm.sh2
-rwxr-xr-xsrc/extract-generic.sh55
-rwxr-xr-xsrc/extract-nix.sh2
-rwxr-xr-xsrc/extract-sh.sh2
-rwxr-xr-xsrc/extract.sh13
5 files changed, 55 insertions, 19 deletions
diff --git a/src/extract-elm.sh b/src/extract-elm.sh
index 9fe1d4c..7ab3e17 100755
--- a/src/extract-elm.sh
+++ b/src/extract-elm.sh
@@ -3,7 +3,7 @@
set -efu
TREE_GREPPER_LANGUAGE=elm \
-TREE_GREPPER_QUERY='([(line_comment) (block_comment)]+)' \
+TREE_GREPPER_QUERY='([(line_comment) (block_comment)])' \
LINE_COMMENT_START='--' \
BLOCK_COMMENT_START='{-|\?' \
BLOCK_COMMENT_END='-}' \
diff --git a/src/extract-generic.sh b/src/extract-generic.sh
index cadbb30..f4aef22 100755
--- a/src/extract-generic.sh
+++ b/src/extract-generic.sh
@@ -4,12 +4,57 @@ set -efu
input_file=${1-/dev/stdin}
+# note: merge sibling comments into matched comment blocks, as tree-sitter does
+# not allow for matching sibling nodes
+function merge_matches() {
+ matches=$(jq -c .matches[])
+ new_matches=
+ last_match=
+ new_match=
+ while read -r cur_match; do
+ if test -z "$new_match"; then
+ new_match=$cur_match
+ fi
+ if ! test -z "$last_match"; then
+ last_end_row=$(echo "$last_match" | jq -r .end.row)
+ cur_start_row=$(echo "$cur_match" | jq -r .start.row)
+ if test "$(($last_end_row + 1))" = "$cur_start_row"; then
+ new_text=$(echo "$new_match" | jq -r .text)
+ cur_text=$(echo "$cur_match" | jq -r .text)
+ text=$(printf "%s\n%s" "$new_text" "$cur_text")
+ end=$(echo "$cur_match" | jq -c .end)
+ new_match=$(
+ echo "$new_match" |
+ # XXX "$end" seems to be forbidden as variable name
+ jq -c \
+ --argjson end_ "$end" \
+ --arg text "$text" \
+ '. + {
+ "end": $end_,
+ "text": $text
+ }'
+ )
+ else
+ new_matches=$(printf "%s\n%s" "$new_matches" "$new_match")
+ new_match=$cur_match
+ fi
+ fi
+ last_match=$cur_match
+ done <<< "$matches"
+ if ! test -z "$new_match"; then
+ new_matches=$(printf "%s\n%s" "$new_matches" "$new_match")
+ fi
+ new_matches=$(echo "$new_matches" | jq -cs .)
+ echo "$line" | jq -c --argjson matches "$new_matches" '. + { "matches": $matches }'
+}
+
tree-grepper \
--query "$TREE_GREPPER_LANGUAGE" "$TREE_GREPPER_QUERY" \
--format json $input_file |
- jq 'sort_by(.file)' |
- jq '.[]' --indent 0 |
+ jq -c '.[]' |
while read -r line; do
+ echo "$line" | merge_matches
+ done | while read -r line; do
file=$(echo "$line" | jq -r .file)
file_type=$(echo "$line" | jq -r .file_type)
items=$(echo "$line" |
@@ -32,19 +77,19 @@ tree-grepper \
text=$(echo "$item" |
jq .match.text -r |
- sed 's/^'"$LINE_COMMENT_START"' *TODO *//' |
+ sed 's/^'"$LINE_COMMENT_START"'\( *TODO *\| \?\)//g' |
{
if test -z "${BLOCK_COMMENT_START-}"; then
cat
else
- sed 's/^'"$BLOCK_COMMENT_START"' *TODO *//'
+ sed 's/^'"$BLOCK_COMMENT_START"'\( *TODO *\| \?\)//'
fi
} |
{
if test -z "${BLOCK_COMMENT_END-}"; then
cat
else
- sed 's/ *'"$BLOCK_COMMENT_END"'$//'
+ sed 's/ *'"$BLOCK_COMMENT_END"' *$//'
fi
}
)
diff --git a/src/extract-nix.sh b/src/extract-nix.sh
index 335cbba..575131d 100755
--- a/src/extract-nix.sh
+++ b/src/extract-nix.sh
@@ -3,7 +3,7 @@
set -efu
TREE_GREPPER_LANGUAGE=nix \
-TREE_GREPPER_QUERY='((comment)+)' \
+TREE_GREPPER_QUERY='(comment)' \
LINE_COMMENT_START='#' \
BLOCK_COMMENT_START='\/\*' \
BLOCK_COMMENT_END='\*\/' \
diff --git a/src/extract-sh.sh b/src/extract-sh.sh
index c31df74..a8bb1ff 100755
--- a/src/extract-sh.sh
+++ b/src/extract-sh.sh
@@ -3,6 +3,6 @@
set -efu
TREE_GREPPER_LANGUAGE=sh \
-TREE_GREPPER_QUERY='((comment)+)' \
+TREE_GREPPER_QUERY='(comment)' \
LINE_COMMENT_START='#' \
"$(dirname "$0")"/extract-generic.sh "$@"
diff --git a/src/extract.sh b/src/extract.sh
index 6422694..07346fc 100755
--- a/src/extract.sh
+++ b/src/extract.sh
@@ -80,17 +80,8 @@ git ls-files --cached --exclude-standard --other |
head -n 1)
created_at=$(git show $first_commit --no-patch --format=%ad)
- heading=$(echo "$text" | sed '/^$/Q' | sed 's/.*TODO //')
- # TODO body is not extracted in shell scripts
- #
- # @assigned aforemny
- # @assigned kirchner@posteo.de
- # @scheduled 2023-10-03
- body=$(echo "$text" | tail -n +$(($(echo "$heading" | wc -l) + 2)) | \
- awk -F '[^ ]' '
- NR == 1 {n = length($1)}
- {sub("^ {1,"n"}", ""); print}'
- )
+ heading=$(echo "$text" | sed -n '0,/^$/p')
+ body=$(echo "$text" | tail -n +$(($(echo "$heading" | wc -l) + 2)))
echo "$item" | jq -c \
--arg body "$body" \