aboutsummaryrefslogtreecommitdiff
path: root/dev/tools
diff options
context:
space:
mode:
authorJason Gross2020-05-13 13:40:51 -0400
committerJason Gross2020-05-20 15:18:09 -0400
commit9cc5075f6cff9ddcb12160301b6880aca595e2b5 (patch)
treecb5fb99e15579a4352a43a04ffc7d435e5740105 /dev/tools
parent5bf16099faa02b07c2f0e26927a56e26959b6128 (diff)
Use pagination in fetching the number of reviews
Fixes #12300 Note that I currently only paginate the API call for the number of reviews, not the main API call, because (a) the main API call doesn't seem subject to pagination (it returns a dict, not an array), and (b) because fetching the total number of pages incurs an extra API call for each one that we want to paginate, even if there is only one page. We could work around (b) with a significantly more complicated `curl_paginate` function which heuristically recognizes the end of the header/beginning of the body, such as ```bash curl_paginate() { # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100 url="$1?per_page=100" # We need to process the header to get the pagination. We have two # options: # # 1. We can make an extra API call at the beginning to get the total # number of pages, search for a rel="last" link, and then loop # over all the pages. # # 2. We can ask for the header info with every single curl request, # search for a rel="next" link to follow to the next page, and # then parse out the body from the header. # # Although (1) is simpler, we choose to do (2) to save an extra API # call per invocation of curl. while [ ! -z "${url}" ]; do response="$(curl -si "${url}")" # we search for something like 'link: <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="next", <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="last"' and take the first 'next' url url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')" echo "Response: ${response}" >&2 echo "${response}" | { is_header="yes" while read line; do if [ "${is_header}" == "yes" ]; then if echo "${line}" | grep -q '^\s*[\[{]'; then # we treat lines beginning with [ or { as the beginning of the response body is_header="no" echo "${line}" fi else echo "${line}" fi done } done } ```
Diffstat (limited to 'dev/tools')
-rwxr-xr-xdev/tools/merge-pr.sh21
1 files changed, 20 insertions, 1 deletions
diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh
index ce64aebdc7..7c65f984ee 100755
--- a/dev/tools/merge-pr.sh
+++ b/dev/tools/merge-pr.sh
@@ -49,10 +49,29 @@ ask_confirmation() {
fi
}
+curl_paginate() {
+ # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100
+ url="$1?per_page=100"
+ # we search for something like 'page=34>; rel="last"' to get the number of pages, as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received
+ url_info="$(curl -sI "${url}")"
+ page_count="$(echo "${url_info}" | grep -o 'page=\([0-9]*\)>; rel="last"' | grep -o '[0-9]*')"
+ if [ -z "${page_count}" ]; then
+ page_count=1
+ fi
+ for page in $(seq 1 ${page_count}); do
+ curl -s "${url}&page=${page}"
+ done
+}
+
+curl_paginate_array() {
+ curl_paginate "$@" | jq '[.[]]' # we concatenate the arrays
+}
+
check_util jq
check_util curl
check_util git
check_util gpg
+check_util grep
# command line parsing
@@ -203,7 +222,7 @@ fi
# Generate commit message
info "Fetching review data"
-reviews=$(curl -s "$API/pulls/$PR/reviews")
+reviews=$(curl_paginate_array "$API/pulls/$PR/reviews")
msg="Merge PR #$PR: $TITLE"
has_state() {