From 9cc5075f6cff9ddcb12160301b6880aca595e2b5 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Wed, 13 May 2020 13:40:51 -0400 Subject: Use pagination in fetching the number of reviews Fixes #12300 Note that I currently only paginate the API call for the number of reviews, not the main API call, because (a) the main API call doesn't seem subject to pagination (it returns a dict, not an array), and (b) because fetching the total number of pages incurs an extra API call for each one that we want to paginate, even if there is only one page. We could work around (b) with a significantly more complicated `curl_paginate` function which heuristically recognizes the end of the header/beginning of the body, such as ```bash curl_paginate() { # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100 url="$1?per_page=100" # We need to process the header to get the pagination. We have two # options: # # 1. We can make an extra API call at the beginning to get the total # number of pages, search for a rel="last" link, and then loop # over all the pages. # # 2. We can ask for the header info with every single curl request, # search for a rel="next" link to follow to the next page, and # then parse out the body from the header. # # Although (1) is simpler, we choose to do (2) to save an extra API # call per invocation of curl. while [ ! -z "${url}" ]; do response="$(curl -si "${url}")" # we search for something like 'link: ; rel="next", ; rel="last"' and take the first 'next' url url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')" echo "Response: ${response}" >&2 echo "${response}" | { is_header="yes" while read line; do if [ "${is_header}" == "yes" ]; then if echo "${line}" | grep -q '^\s*[\[{]'; then # we treat lines beginning with [ or { as the beginning of the response body is_header="no" echo "${line}" fi else echo "${line}" fi done } done } ``` --- dev/tools/merge-pr.sh | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'dev') diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh index ce64aebdc7..7c65f984ee 100755 --- a/dev/tools/merge-pr.sh +++ b/dev/tools/merge-pr.sh @@ -49,10 +49,29 @@ ask_confirmation() { fi } +curl_paginate() { + # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100 + url="$1?per_page=100" + # we search for something like 'page=34>; rel="last"' to get the number of pages, as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received + url_info="$(curl -sI "${url}")" + page_count="$(echo "${url_info}" | grep -o 'page=\([0-9]*\)>; rel="last"' | grep -o '[0-9]*')" + if [ -z "${page_count}" ]; then + page_count=1 + fi + for page in $(seq 1 ${page_count}); do + curl -s "${url}&page=${page}" + done +} + +curl_paginate_array() { + curl_paginate "$@" | jq '[.[]]' # we concatenate the arrays +} + check_util jq check_util curl check_util git check_util gpg +check_util grep # command line parsing @@ -203,7 +222,7 @@ fi # Generate commit message info "Fetching review data" -reviews=$(curl -s "$API/pulls/$PR/reviews") +reviews=$(curl_paginate_array "$API/pulls/$PR/reviews") msg="Merge PR #$PR: $TITLE" has_state() { -- cgit v1.2.3 From 19a231af66b275e83989e947b373cd44a889a319 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Mon, 18 May 2020 12:03:49 -0400 Subject: [merge-pr.sh] Follow next links instead As per PR review request --- dev/tools/merge-pr.sh | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'dev') diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh index 7c65f984ee..ac4aafce62 100755 --- a/dev/tools/merge-pr.sh +++ b/dev/tools/merge-pr.sh @@ -52,14 +52,27 @@ ask_confirmation() { curl_paginate() { # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100 url="$1?per_page=100" - # we search for something like 'page=34>; rel="last"' to get the number of pages, as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received - url_info="$(curl -sI "${url}")" - page_count="$(echo "${url_info}" | grep -o 'page=\([0-9]*\)>; rel="last"' | grep -o '[0-9]*')" - if [ -z "${page_count}" ]; then - page_count=1 - fi - for page in $(seq 1 ${page_count}); do - curl -s "${url}&page=${page}" + # We need to process the header to get the pagination. We have two + # options: + # + # 1. We can make an extra API call at the beginning to get the total + # number of pages, search for a rel="last" link, and then loop + # over all the pages. + # + # 2. We can ask for the header info with every single curl request, + # search for a rel="next" link to follow to the next page, and + # then parse out the body from the header. + # + # Although (1) is a bit simpler, we choose to do (2) to save an + # extra API call per invocation of curl. + while [ ! -z "${url}" ]; do + response="$(curl -si "${url}")" + # we search for something like 'link: ; rel="next", ; rel="last"' and take the first 'next' url + url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')" + # we extract the body by looking for the first line that starts + # with [ or {, using the perl-regexp mode for grep as in + # https://stackoverflow.com/a/7167115/377022 + echo "${response}" | grep -Pzao '(?s)\n\s*[\[{].*' | tr '\0' '\n' done } @@ -72,6 +85,7 @@ check_util curl check_util git check_util gpg check_util grep +check_util tr # command line parsing @@ -89,6 +103,8 @@ fi # Fetching PR metadata +# The main API call returns a dict/object, not an array, so we don't +# bother paginating PRDATA=$(curl -s "$API/pulls/$PR") TITLE=$(echo "$PRDATA" | jq -r '.title') -- cgit v1.2.3 From 8263c13919ca5a9ea59bc7cb930117215ddec3a9 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Tue, 19 May 2020 15:31:50 -0400 Subject: [merge-pr] Use a simpler method to get all pages h/t SkySkimmer at https://github.com/coq/coq/pull/12316#issuecomment-630952659 --- dev/tools/merge-pr.sh | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) (limited to 'dev') diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh index ac4aafce62..82e4bd1e1e 100755 --- a/dev/tools/merge-pr.sh +++ b/dev/tools/merge-pr.sh @@ -49,35 +49,19 @@ ask_confirmation() { fi } -curl_paginate() { +curl_paginate_array() { # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100 url="$1?per_page=100" - # We need to process the header to get the pagination. We have two - # options: - # - # 1. We can make an extra API call at the beginning to get the total - # number of pages, search for a rel="last" link, and then loop - # over all the pages. - # - # 2. We can ask for the header info with every single curl request, - # search for a rel="next" link to follow to the next page, and - # then parse out the body from the header. - # - # Although (1) is a bit simpler, we choose to do (2) to save an - # extra API call per invocation of curl. - while [ ! -z "${url}" ]; do - response="$(curl -si "${url}")" - # we search for something like 'link: ; rel="next", ; rel="last"' and take the first 'next' url - url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')" - # we extract the body by looking for the first line that starts - # with [ or {, using the perl-regexp mode for grep as in - # https://stackoverflow.com/a/7167115/377022 - echo "${response}" | grep -Pzao '(?s)\n\s*[\[{].*' | tr '\0' '\n' - done -} - -curl_paginate_array() { - curl_paginate "$@" | jq '[.[]]' # we concatenate the arrays + # we keep fetching pages until the response is below the per-page limit (possibly 0 elements) + page=1 + while true; do + response="$(curl -s "${url}&page=${page}")" + echo "${response}" + if [ "$(jq 'length' <<< "$response")" -lt 100 ]; then # done + break + fi + page=$(($page + 1)) + done | jq '[.[]]' # we concatenate the arrays } check_util jq @@ -85,7 +69,6 @@ check_util curl check_util git check_util gpg check_util grep -check_util tr # command line parsing -- cgit v1.2.3