From c21f31d09f2511497cea5cb03bd6ddba440c55fe Mon Sep 17 00:00:00 2001 From: mergify[bot] Date: Tue, 29 Nov 2022 17:24:38 +0000 Subject: Implement compressed Namespace (backport #2856) (#2860) * Implement compressed Namespace (#2856) The namespace disambiguates requests for the same name with _. Rather than storing every disambiguated name in the underlying HashMap, it now only stores the base along with the "next available" index. This makes the logic for checking if a name is already contained in the namespace slightly more sophisticated because users can name things in a way that will collide with disambiguated names from a common substring. For example, in naming the sequence "foo", "foo", "foo_1", the 2nd "foo" takes the name "foo_1" so the following "foo_1" gets disambiguated to "foo_1_1". But since we compressed that original "foo_1" into the same HashMap entry as just "foo", we have to do a form of "prefix checking" whenever naming something that ends in "_". In practice, the saved memory allocations more than make up for the more complicated logic to disambiguate names because the common case is still fast. (cherry picked from commit 1654d87a02ca799bf12805a611a91e7524d49843) # Conflicts: # core/src/main/scala/chisel3/internal/Builder.scala * Resolve backport conflicts Co-authored-by: Jack Koenig --- core/src/main/scala/chisel3/internal/Builder.scala | 63 ++++++++++++++--- .../scala/chisel3/internal/NamespaceSpec.scala | 81 ++++++++++++++++++++++ 2 files changed, 136 insertions(+), 8 deletions(-) create mode 100644 src/test/scala/chisel3/internal/NamespaceSpec.scala diff --git a/core/src/main/scala/chisel3/internal/Builder.scala b/core/src/main/scala/chisel3/internal/Builder.scala index e3dfff09..d06b7992 100644 --- a/core/src/main/scala/chisel3/internal/Builder.scala +++ b/core/src/main/scala/chisel3/internal/Builder.scala @@ -17,18 +17,29 @@ import chisel3.internal.Builder.Prefix import logger.LazyLogging import scala.collection.mutable +import scala.annotation.tailrec private[chisel3] class Namespace(keywords: Set[String]) { + // This HashMap is compressed, not every name in the namespace is present here. + // If the same name is requested multiple times, it only takes 1 entry in the HashMap and the + // value is incremented for each time the name is requested. + // Names can be requested that collide with compressed sets of names, thus the algorithm for + // checking if a name is present in the Namespace is more complex than just checking the HashMap, + // see getIndex below. private val names = collection.mutable.HashMap[String, Long]() def copyTo(other: Namespace): Unit = names.foreach { case (s: String, l: Long) => other.names(s) = l } for (keyword <- keywords) names(keyword) = 1 - private def rename(n: String): String = { - val index = names(n) + @tailrec + private def rename(n: String, index: Long): String = { val tryName = s"${n}_${index}" - names(n) = index + 1 - if (this contains tryName) rename(n) else tryName + if (names.contains(tryName)) { + rename(n, index + 1) + } else { + names(n) = index + 1 + tryName + } } private def sanitize(s: String, leadingDigitOk: Boolean = false): String = { @@ -40,14 +51,50 @@ private[chisel3] class Namespace(keywords: Set[String]) { if (headOk) res else s"_$res" } - def contains(elem: String): Boolean = names.contains(elem) + /** Checks if `n` ends in `_\d+` and returns the substring before `_` if so, null otherwise */ + // TODO can and should this be folded in to sanitize? Same iteration as the forall? + private def prefix(n: String): Int = { + // This is micro-optimized because it runs on every single name + var i = n.size - 1 + while (i > 0 && n(i).isDigit) { + i -= 1 + } + // Will get i == 0 for all digits or _\d+ with empty prefix, those have no prefix so returning 0 is correct + if (i == n.size) 0 // no digits + else if (n(i) != '_') 0 // no _ + else i + } + + // Gets the current index for this name, None means it is not contained in the Namespace + private def getIndex(elem: String): Option[Long] = + names.get(elem).orElse { + // This exact name isn't contained, but if we end in _, we need to check our prefix + val maybePrefix = prefix(elem) + if (maybePrefix == 0) None + else { + // If we get a prefix collision and our index is taken, we start disambiguating with __1 + names + .get(elem.take(maybePrefix)) + .filter { prefixIdx => + val ourIdx = elem.drop(maybePrefix + 1).toInt + // The namespace starts disambiguating at _1 so _0 is a false collision case + ourIdx != 0 && prefixIdx > ourIdx + } + .map(_ => 1) + } + } + + def contains(elem: String): Boolean = getIndex(elem).isDefined // leadingDigitOk is for use in fields of Records def name(elem: String, leadingDigitOk: Boolean = false): String = { val sanitized = sanitize(elem, leadingDigitOk) - val result = if (this.contains(sanitized)) rename(sanitized) else sanitized - names(result) = 1 - result + getIndex(sanitized) match { + case Some(idx) => rename(sanitized, idx) + case None => + names(sanitized) = 1 + sanitized + } } } diff --git a/src/test/scala/chisel3/internal/NamespaceSpec.scala b/src/test/scala/chisel3/internal/NamespaceSpec.scala new file mode 100644 index 00000000..fd808ff0 --- /dev/null +++ b/src/test/scala/chisel3/internal/NamespaceSpec.scala @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: Apache-2.0 + +package chisel3.internal + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers._ + +class NamespaceSpec extends AnyFlatSpec { + behavior.of("Namespace") + + they should "support basic disambiguation" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x") should be("x") + name("x") should be("x_1") + name("x") should be("x_2") + } + + they should "support explicit _# names before names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x_1") should be("x_1") + name("x_2") should be("x_2") + name("x") should be("x") + name("x") should be("x_3") + } + + they should "support explicit _# names in the middle of names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x") should be("x") + name("x") should be("x_1") + name("x_1") should be("x_1_1") + name("x_2") should be("x_2") + name("x") should be("x_3") + } + + // For some reason, multi-character names tickled a different failure mode than single character + they should "support explicit _# names in the middle of longer names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("foo") should be("foo") + name("foo") should be("foo_1") + name("foo_1") should be("foo_1_1") + name("foo_2") should be("foo_2") + name("foo") should be("foo_3") + } + + they should "support collisions in recursively growing names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x") should be("x") + name("x") should be("x_1") + name("x_1") should be("x_1_1") + name("x_1") should be("x_1_2") + name("x_1_1") should be("x_1_1_1") + name("x_1_1") should be("x_1_1_2") + } + + they should "support collisions in recursively shrinking names" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x_1_1") should be("x_1_1") + name("x_1_1") should be("x_1_1_1") + name("x_1") should be("x_1") + name("x_1") should be("x_1_2") + name("x") should be("x") + name("x") should be("x_2") + } + + // The namespace never generates names with _0 so it's actually a false collision case + they should "properly handle false collisions with signals ending in _0" in { + val namespace = Namespace.empty + val name = namespace.name(_, false) + name("x") should be("x") + name("x") should be("x_1") + name("x_0") should be("x_0") + name("x") should be("x_2") + name("x_0") should be("x_0_1") + } +} -- cgit v1.2.3