Use pagination in fetching the number of reviews

Fixes #12300 Note that I currently only paginate the API call for the number of reviews, not the main API call, because (a) the main API call doesn't seem subject to pagination (it returns a dict, not an array), and (b) because fetching the total number of pages incurs an extra API call for each one that we want to paginate, even if there is only one page. We could work around (b) with a significantly more complicated `curl_paginate` function which heuristically recognizes the end of the header/beginning of the body, such as ```bash curl_paginate() { # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100 url="$1?per_page=100" # We need to process the header to get the pagination. We have two # options: # # 1. We can make an extra API call at the beginning to get the total # number of pages, search for a rel="last" link, and then loop # over all the pages. # # 2. We can ask for the header info with every single curl request, # search for a rel="next" link to follow to the next page, and # then parse out the body from the header. # # Although (1) is simpler, we choose to do (2) to save an extra API # call per invocation of curl. while [ ! -z "${url}" ]; do response="$(curl -si "${url}")" # we search for something like 'link: <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="next", <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="last"' and take the first 'next' url url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')" echo "Response: ${response}" >&2 echo "${response}" | { is_header="yes" while read line; do if [ "${is_header}" == "yes" ]; then if echo "${line}" | grep -q '^\s*[\[{]'; then # we treat lines beginning with [ or { as the beginning of the response body is_header="no" echo "${line}" fi else echo "${line}" fi done } done } ```
author: Jason Gross 2020-05-13 13:40:51 -0400
committer: Jason Gross 2020-05-20 15:18:09 -0400
commit: 9cc5075f6cff9ddcb12160301b6880aca595e2b5 (patch)
tree: cb5fb99e15579a4352a43a04ffc7d435e5740105 /dev/tools
parent: 5bf16099faa02b07c2f0e26927a56e26959b6128 (diff)
1 files changed, 20 insertions, 1 deletions
diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh
index ce64aebdc7..7c65f984ee 100755
--- a/dev/tools/merge-pr.sh
+++ b/dev/tools/merge-pr.sh
@@ -49,10 +49,29 @@ ask_confirmation() {
   fi
 }
 
+curl_paginate() {
+  # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100
+  url="$1?per_page=100"
+  # we search for something like 'page=34>; rel="last"' to get the number of pages, as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received
+  url_info="$(curl -sI "${url}")"
+  page_count="$(echo "${url_info}" | grep -o 'page=\([0-9]*\)>; rel="last"' | grep -o '[0-9]*')"
+  if [ -z "${page_count}" ]; then
+    page_count=1
+  fi
+  for page in $(seq 1 ${page_count}); do
+    curl -s "${url}&page=${page}"
+  done
+}
+
+curl_paginate_array() {
+  curl_paginate "$@" | jq '[.[]]' # we concatenate the arrays
+}
+
 check_util jq
 check_util curl
 check_util git
 check_util gpg
+check_util grep
 
 # command line parsing
 
@@ -203,7 +222,7 @@ fi
 # Generate commit message
 
 info "Fetching review data"
-reviews=$(curl -s "$API/pulls/$PR/reviews")
+reviews=$(curl_paginate_array "$API/pulls/$PR/reviews")
 msg="Merge PR #$PR: $TITLE"
 
 has_state() {
author	Jason Gross	2020-05-13 13:40:51 -0400
committer	Jason Gross	2020-05-20 15:18:09 -0400
commit	9cc5075f6cff9ddcb12160301b6880aca595e2b5 (patch)
tree	cb5fb99e15579a4352a43a04ffc7d435e5740105 /dev/tools
parent	5bf16099faa02b07c2f0e26927a56e26959b6128 (diff)