From 9cc5075f6cff9ddcb12160301b6880aca595e2b5 Mon Sep 17 00:00:00 2001
From: Jason Gross
Date: Wed, 13 May 2020 13:40:51 -0400
Subject: Use pagination in fetching the number of reviews

Fixes #12300

Note that I currently only paginate the API call for the number of
reviews, not the main API call, because (a) the main API call doesn't
seem subject to pagination (it returns a dict, not an array), and (b)
because fetching the total number of pages incurs an extra API call for
each one that we want to paginate, even if there is only one page.  We
could work around (b) with a significantly more complicated
`curl_paginate` function which heuristically recognizes the end of the
header/beginning of the body, such as
```bash
curl_paginate() {
  # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100
  url="$1?per_page=100"
  # We need to process the header to get the pagination.  We have two
  # options:
  #
  # 1. We can make an extra API call at the beginning to get the total
  #    number of pages, search for a rel="last" link, and then loop
  #    over all the pages.
  #
  # 2. We can ask for the header info with every single curl request,
  #    search for a rel="next" link to follow to the next page, and
  #    then parse out the body from the header.
  #
  # Although (1) is simpler, we choose to do (2) to save an extra API
  # call per invocation of curl.
  while [ ! -z "${url}" ]; do
    response="$(curl -si "${url}")"
    # we search for something like 'link: <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="next", <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="last"' and take the first 'next' url
    url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')"
    echo "Response: ${response}" >&2
    echo "${response}" |
      {
        is_header="yes"
        while read line; do
          if [ "${is_header}" == "yes" ]; then
            if echo "${line}" | grep -q '^\s*[\[{]'; then # we treat lines beginning with [ or { as the beginning of the response body
              is_header="no"
              echo "${line}"
            fi
          else
            echo "${line}"
          fi
        done
      }
  done
}
```
---
 dev/tools/merge-pr.sh | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'dev')

diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh
index ce64aebdc7..7c65f984ee 100755
--- a/dev/tools/merge-pr.sh
+++ b/dev/tools/merge-pr.sh
@@ -49,10 +49,29 @@ ask_confirmation() {
   fi
 }
 
+curl_paginate() {
+  # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100
+  url="$1?per_page=100"
+  # we search for something like 'page=34>; rel="last"' to get the number of pages, as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received
+  url_info="$(curl -sI "${url}")"
+  page_count="$(echo "${url_info}" | grep -o 'page=\([0-9]*\)>; rel="last"' | grep -o '[0-9]*')"
+  if [ -z "${page_count}" ]; then
+    page_count=1
+  fi
+  for page in $(seq 1 ${page_count}); do
+    curl -s "${url}&page=${page}"
+  done
+}
+
+curl_paginate_array() {
+  curl_paginate "$@" | jq '[.[]]' # we concatenate the arrays
+}
+
 check_util jq
 check_util curl
 check_util git
 check_util gpg
+check_util grep
 
 # command line parsing
 
@@ -203,7 +222,7 @@ fi
 # Generate commit message
 
 info "Fetching review data"
-reviews=$(curl -s "$API/pulls/$PR/reviews")
+reviews=$(curl_paginate_array "$API/pulls/$PR/reviews")
 msg="Merge PR #$PR: $TITLE"
 
 has_state() {
-- 
cgit v1.2.3


From 19a231af66b275e83989e947b373cd44a889a319 Mon Sep 17 00:00:00 2001
From: Jason Gross
Date: Mon, 18 May 2020 12:03:49 -0400
Subject: [merge-pr.sh] Follow next links instead

As per PR review request
---
 dev/tools/merge-pr.sh | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

(limited to 'dev')

diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh
index 7c65f984ee..ac4aafce62 100755
--- a/dev/tools/merge-pr.sh
+++ b/dev/tools/merge-pr.sh
@@ -52,14 +52,27 @@ ask_confirmation() {
 curl_paginate() {
   # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100
   url="$1?per_page=100"
-  # we search for something like 'page=34>; rel="last"' to get the number of pages, as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received
-  url_info="$(curl -sI "${url}")"
-  page_count="$(echo "${url_info}" | grep -o 'page=\([0-9]*\)>; rel="last"' | grep -o '[0-9]*')"
-  if [ -z "${page_count}" ]; then
-    page_count=1
-  fi
-  for page in $(seq 1 ${page_count}); do
-    curl -s "${url}&page=${page}"
+  # We need to process the header to get the pagination.  We have two
+  # options:
+  #
+  # 1. We can make an extra API call at the beginning to get the total
+  #    number of pages, search for a rel="last" link, and then loop
+  #    over all the pages.
+  #
+  # 2. We can ask for the header info with every single curl request,
+  #    search for a rel="next" link to follow to the next page, and
+  #    then parse out the body from the header.
+  #
+  # Although (1) is a bit simpler, we choose to do (2) to save an
+  # extra API call per invocation of curl.
+  while [ ! -z "${url}" ]; do
+    response="$(curl -si "${url}")"
+    # we search for something like 'link: <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="next", <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="last"' and take the first 'next' url
+    url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')"
+    # we extract the body by looking for the first line that starts
+    # with [ or {, using the perl-regexp mode for grep as in
+    # https://stackoverflow.com/a/7167115/377022
+    echo "${response}" | grep -Pzao '(?s)\n\s*[\[{].*' | tr '\0' '\n'
   done
 }
 
@@ -72,6 +85,7 @@ check_util curl
 check_util git
 check_util gpg
 check_util grep
+check_util tr
 
 # command line parsing
 
@@ -89,6 +103,8 @@ fi
 
 # Fetching PR metadata
 
+# The main API call returns a dict/object, not an array, so we don't
+# bother paginating
 PRDATA=$(curl -s "$API/pulls/$PR")
 
 TITLE=$(echo "$PRDATA" | jq -r '.title')
-- 
cgit v1.2.3


From 8263c13919ca5a9ea59bc7cb930117215ddec3a9 Mon Sep 17 00:00:00 2001
From: Jason Gross
Date: Tue, 19 May 2020 15:31:50 -0400
Subject: [merge-pr] Use a simpler method to get all pages

h/t SkySkimmer at
https://github.com/coq/coq/pull/12316#issuecomment-630952659
---
 dev/tools/merge-pr.sh | 39 +++++++++++----------------------------
 1 file changed, 11 insertions(+), 28 deletions(-)

(limited to 'dev')

diff --git a/dev/tools/merge-pr.sh b/dev/tools/merge-pr.sh
index ac4aafce62..82e4bd1e1e 100755
--- a/dev/tools/merge-pr.sh
+++ b/dev/tools/merge-pr.sh
@@ -49,35 +49,19 @@ ask_confirmation() {
   fi
 }
 
-curl_paginate() {
+curl_paginate_array() {
   # as per https://developer.github.com/v3/guides/traversing-with-pagination/#changing-the-number-of-items-received, GitHub will never give us more than 100
   url="$1?per_page=100"
-  # We need to process the header to get the pagination.  We have two
-  # options:
-  #
-  # 1. We can make an extra API call at the beginning to get the total
-  #    number of pages, search for a rel="last" link, and then loop
-  #    over all the pages.
-  #
-  # 2. We can ask for the header info with every single curl request,
-  #    search for a rel="next" link to follow to the next page, and
-  #    then parse out the body from the header.
-  #
-  # Although (1) is a bit simpler, we choose to do (2) to save an
-  # extra API call per invocation of curl.
-  while [ ! -z "${url}" ]; do
-    response="$(curl -si "${url}")"
-    # we search for something like 'link: <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="next", <https://api.github.com/repositories/1377159/pulls/12129/reviews?page=2>; rel="last"' and take the first 'next' url
-    url="$(echo "${response}" | grep -m 1 -io '^link: .*>; rel="next"' | grep -o '<[^>]*>; rel="next"' | grep -o '<[^>]*>' | sed s'/[<>]//g')"
-    # we extract the body by looking for the first line that starts
-    # with [ or {, using the perl-regexp mode for grep as in
-    # https://stackoverflow.com/a/7167115/377022
-    echo "${response}" | grep -Pzao '(?s)\n\s*[\[{].*' | tr '\0' '\n'
-  done
-}
-
-curl_paginate_array() {
-  curl_paginate "$@" | jq '[.[]]' # we concatenate the arrays
+  # we keep fetching pages until the response is below the per-page limit (possibly 0 elements)
+  page=1
+  while true; do
+    response="$(curl -s "${url}&page=${page}")"
+    echo "${response}"
+    if [ "$(jq 'length' <<< "$response")" -lt 100 ]; then # done
+      break
+    fi
+    page=$(($page + 1))
+  done | jq '[.[]]' # we concatenate the arrays
 }
 
 check_util jq
@@ -85,7 +69,6 @@ check_util curl
 check_util git
 check_util gpg
 check_util grep
-check_util tr
 
 # command line parsing
 
-- 
cgit v1.2.3