diff options
| author | Jason Gross | 2020-05-13 13:40:51 -0400 |
|---|---|---|
| committer | Jason Gross | 2020-05-20 15:18:09 -0400 |
| commit | 9cc5075f6cff9ddcb12160301b6880aca595e2b5 (patch) | |
| tree | cb5fb99e15579a4352a43a04ffc7d435e5740105 /dev/tools | |
| parent | 5bf16099faa02b07c2f0e26927a56e26959b6128 (diff) | |
Use pagination in fetching the number of reviews
Fixes #12300
Note that I currently only paginate the API call for the number of
reviews, not the main API call, because (a) the main API call doesn't
seem subject to pagination (it returns a dict, not an array), and (b)
because fetching the total number of pages incurs an extra API call for
each one that we want to paginate, even if there is only one page. We
could work around (b) with a significantly more complicated
`curl_paginate` function which heuristically recognizes the end of the
header/beginning of the body, such as
```bash
curl_paginate() {
# as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100
url="$1?per_page=100"
# We need to process the header to get the pagination. We have two
# options:
#
# 1. We can make an extra API call at the beginning to get the total
# number of pages, search for a rel="last" link, and then loop
# over all the pages.
#
# 2. We can ask for the header info with every single curl request,
# search for a rel="next" link to follow to the next page, and
# then parse out the body from the header.
#
# Although (1) is simpler, we choose to do (2) to save an extra API
# call per invocation of curl.
while [ ! -z "${url}" ]; do
response="$(curl -si "${url}")"
# we search for something like 'link: <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="next", <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="last"' and take the first 'next' url
url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')"
echo "Response: ${response}" >&2
echo "${response}" |
{
is_header="yes"
while read line; do
if [ "${is_header}" == "yes" ]; then
if echo "${line}" | grep -q '^\s*[\[{]'; then # we treat lines beginning with [ or { as the beginning of the response body
is_header="no"
echo "${line}"
fi
else
echo "${line}"
fi
done
}
done
}
```
Diffstat (limited to 'dev/tools')
| -rwxr-xr-x | dev/tools/merge-pr.sh | 21 |
1 files changed, 20 insertions, 1 deletions
diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh index ce64aebdc7..7c65f984ee 100755 --- a/dev/tools/merge-pr.sh +++ b/dev/tools/merge-pr.sh @@ -49,10 +49,29 @@ ask_confirmation() { fi } +curl_paginate() { + # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100 + url="$1?per_page=100" + # we search for something like 'page=34>; rel="last"' to get the number of pages, as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received + url_info="$(curl -sI "${url}")" + page_count="$(echo "${url_info}" | grep -o 'page=\([0-9]*\)>; rel="last"' | grep -o '[0-9]*')" + if [ -z "${page_count}" ]; then + page_count=1 + fi + for page in $(seq 1 ${page_count}); do + curl -s "${url}&page=${page}" + done +} + +curl_paginate_array() { + curl_paginate "$@" | jq '[.[]]' # we concatenate the arrays +} + check_util jq check_util curl check_util git check_util gpg +check_util grep # command line parsing @@ -203,7 +222,7 @@ fi # Generate commit message info "Fetching review data" -reviews=$(curl -s "$API/pulls/$PR/reviews") +reviews=$(curl_paginate_array "$API/pulls/$PR/reviews") msg="Merge PR #$PR: $TITLE" has_state() { |
