Enhance find heap bound (#2579)

* Enhance find_heap_bound to not go smaller than min_step Previously, it could accidentally get a step size smaller than the min_step and just continue looping forever. Now the while loop has a new termination condition to catch this case. * Add context to find_heap_bound This new option allows for checking additional heap sizes in increments of the min_step above the found minimum heap size.
author: Jack Koenig 2022-12-06 09:44:42 -0800
committer: GitHub 2022-12-06 09:44:42 -0800
commit: 64ac2d2cb866f5c3e30544bb1087dd374b57fdcb (patch)
tree: 3681eaacddafe9575d28a92bf3ea95c2f5ad8c8a /benchmark/scripts
parent: 842a7b1bbebe586c019bba296caba13c15380dda (diff)
1 files changed, 51 insertions, 2 deletions
diff --git a/benchmark/scripts/find_heap_bound.py b/benchmark/scripts/find_heap_bound.py
index e89cfa5d..764fdf00 100755
--- a/benchmark/scripts/find_heap_bound.py
+++ b/benchmark/scripts/find_heap_bound.py
@@ -6,10 +6,13 @@ import argparse
 from typing import NamedTuple
 from subprocess import TimeoutExpired
 import logging
+from functools import total_ordering
 
 from monitor_job import monitor_job, JobFailedError
 
 BaseHeapSize = NamedTuple('JavaHeapSize', [('value', int), ('suffix', str)])
+
+@total_ordering
 class HeapSize(BaseHeapSize):
     K_FACTOR = 1024
     M_FACTOR = 1024*1024
@@ -51,6 +54,16 @@ class HeapSize(BaseHeapSize):
     def __sub__(self, rhs):
         return HeapSize.from_bytes(self.toBytes() - rhs.toBytes())
 
+
+    def __eq__(self, rhs):
+        return self.toBytes() == rhs.toBytes()
+
+    # Defining __eq__ for total_ordering forces us to explicitly inherit __hash__
+    __hash__ = BaseHeapSize.__hash__
+
+    def __ge__(self, rhs):
+        return self.toBytes() >= rhs.toBytes()
+
     @classmethod
     def from_str(cls, s: str):
         regex = '(\d+)([kKmMgG])?'
@@ -97,6 +110,8 @@ def parseargs():
     parser.add_argument("--timeout-factor", type=float, default=4.0,
                         help="Multiple of wallclock time of first successful run "
                              "that counts as a timeout, runs over this time count as a fail")
+    parser.add_argument("--context", type=int, default=0,
+                        help="Number of extra steps above the minimum bound to run")
     return parser.parse_args()
 
 
@@ -137,16 +152,23 @@ def main():
     seen = set()
     timeout = None # Set by first successful run
     cur = HeapSize.from_str(args.start_size)
-    while cur not in seen:
+    last_success = cur
+
+    # Do binary search
+    while cur not in seen and (step is None or step >= min_step):
         seen.add(cur)
         try:
             cmd = mk_cmd(args.java, cur, args.args)
-            logger.info("Running {}".format(" ".join(cmd)))
+            if logger.isEnabledFor(logging.DEBUG):
+                logger.debug("Running {}".format(" ".join(cmd)))
+            else:
+                logger.info("Running {}".format(cur))
             stats = monitor_job(cmd, timeout=timeout)
             logger.debug(stats)
             if timeout is None:
                 timeout = stats.wall_clock_time * args.timeout_factor
                 logger.debug("Timeout set to {} s".format(timeout))
+            last_success = cur
             results.append((cur, stats))
             if step is None:
                 step = (cur / 2).round_to(min_step)
@@ -166,6 +188,33 @@ def main():
             cur = (cur + step).round_to(min_step)
         logger.debug("Next = {}, step = {}".format(cur, step))
 
+    # Run extra steps for some context above the minimum size
+    extra_steps = []
+    if args.context > 0:
+        for i in range(1, args.context):
+            diff = min_step * i
+            heap_size = last_success + diff
+            if heap_size not in seen:
+                extra_steps.append(heap_size)
+        log_steps = ", ".join([str(e) for e in extra_steps]) # Pretty print
+        logger.info("Because context is {}, running extra heap sizes: {}".format(args.context, log_steps))
+
+    for cur in extra_steps:
+        logger.debug("Next = {}".format(cur))
+        seen.add(cur)
+        try:
+            cmd = mk_cmd(args.java, cur, args.args)
+            if logger.isEnabledFor(logging.DEBUG):
+                logger.debug("Running {}".format(" ".join(cmd)))
+            else:
+                logger.info("Running {}".format(cur))
+            stats = monitor_job(cmd, timeout=timeout)
+            logger.debug(stats)
+            results.append((cur, stats))
+        except (JobFailedError, TimeoutExpired) as e:
+            logger.debug(job_failed_msg(e))
+            results.append((cur, None))
+
     sorted_results = sorted(results, key=lambda tup: tup[0].toBytes(), reverse=True)
 
     table = [["Xmx", "Max RSS (MiB)", "Wall Clock (s)", "User Time (s)", "System Time (s)"]]
author	Jack Koenig	2022-12-06 09:44:42 -0800
committer	GitHub	2022-12-06 09:44:42 -0800
commit	64ac2d2cb866f5c3e30544bb1087dd374b57fdcb (patch)
tree	3681eaacddafe9575d28a92bf3ea95c2f5ad8c8a /benchmark/scripts
parent	842a7b1bbebe586c019bba296caba13c15380dda (diff)