Implement MFC-style source locator compression (#2212)

* Implement MFC-style source locator compression * Fix formatting issues * Fix emitting empty FileInfo if the firrtl doesn't have one * Remove '.scala' requirement in FileInfo parsing regex * Handle parsing of FileInfos with no line/col nums * Split FileInfos only if they match This should fix any issues with FileInfos that do not use the "file line:col" format, and allow any valid firrtl using these info comments to compile. * Add unit tests for locator compression * Move InfoTests to InfoSpec class * Fix existing unit tests with fileinfo comments * Add unit tests to ignore the algorithm's own output
author: Jared Barocsi 2021-05-13 14:30:58 -0700
committer: GitHub 2021-05-13 14:30:58 -0700
commit: 6aa86b13abe25271278498836217c89c00d7b151 (patch)
tree: fcaf85d1d271148ea46edbbb0ef010300a4a6105 /src/main
parent: 5f3e196b83b576b190197807ead0e769f0d5e70b (diff)
1 files changed, 107 insertions, 1 deletions
diff --git a/src/main/scala/firrtl/ir/IR.scala b/src/main/scala/firrtl/ir/IR.scala
index 13ba3d46..8ba29d8e 100644
--- a/src/main/scala/firrtl/ir/IR.scala
+++ b/src/main/scala/firrtl/ir/IR.scala
@@ -35,6 +35,7 @@ case class FileInfo(escaped: String) extends Info {
   override def toString: String = " @[" + escaped + "]"
   def ++(that: Info): Info = if (that == NoInfo) this else MultiInfo(Seq(this, that))
   def unescaped: String = FileInfo.unescape(escaped)
+  def split:     (String, String, String) = FileInfo.split(escaped)
   @deprecated("Use FileInfo.unescaped instead. FileInfo.info will be removed in FIRRTL 1.5.", "FIRRTL 1.4")
   def info: StringLit = StringLit(this.unescaped)
 }
@@ -88,6 +89,17 @@ object FileInfo {
     JavaUnicodeEscaper.outsideOf(32, 0x7f)
   )
 
+  // Splits the FileInfo into its corresponding file, line, and column strings
+  private def split(s: String): (String, String, String) = {
+    s match {
+      // Yield the three
+      case FileInfoRegex(file, line, col) => (file, line, col)
+      // Otherwise, just return the string itself and null for the other values
+      case _ => (s, null, null)
+    }
+  }
+
+  private val FileInfoRegex = """(?:([^\s]+)(?: (\d+)\:(\d+)))""".r
 }
 
 case class MultiInfo(infos: Seq[Info]) extends Info {
@@ -102,7 +114,7 @@ case class MultiInfo(infos: Seq[Info]) extends Info {
     else ""
   }
   def ++(that: Info): Info = if (that == NoInfo) this else MultiInfo(infos :+ that)
-  def flatten: Seq[FileInfo] = MultiInfo.flattenInfo(infos)
+  def flatten: Seq[FileInfo] = MultiInfo.compressInfo(MultiInfo.flattenInfo(infos))
 }
 object MultiInfo {
   def apply(infos: Info*) = {
@@ -126,6 +138,100 @@ object MultiInfo {
     case f: FileInfo => Seq(f)
     case MultiInfo(infos) => flattenInfo(infos)
   }
+
+  private def compressInfo(infos: Seq[FileInfo]): Seq[FileInfo] = {
+    // Sort infos by file name, then line number, then column number
+    val sorted = infos.sortWith((A, B) => {
+      val (fileA, lineA, colA) = A.split
+      val (fileB, lineB, colB) = B.split
+
+      // A FileInfo with no line nor column number should be sorted to the beginning of
+      // the sequence of FileInfos that share its fileName. This way, they can be immediately
+      // skipped by the algorithm
+      if (lineA == null || lineB == null)
+        fileA <= fileB && lineA == null
+      else
+        // Default comparison
+        fileA <= fileB &&
+        lineA <= lineB &&
+        colA <= colB
+    })
+
+    // Holds the current file/line being parsed.
+    var currentFile = ""
+    var currentLine = ""
+
+    // Holds any compressed line/column numbers to be appended after the file name
+    var locators = new StringBuilder
+    // Holds all encountered columns that exist within the current line
+    var columns: Seq[String] = Seq()
+
+    var out: Seq[FileInfo] = Seq()
+
+    // Helper function to append the contents of the columns Seq to the locators buffer.
+    def serializeColumns: Unit = {
+      if (columns.size == 0)
+        return
+
+      var columnsList = columns.mkString(",")
+      // Wrap the columns in curly braces if it contains more than one entry
+      if (columns.size > 1)
+        columnsList = '{' + columnsList + '}'
+
+      // If there already exists line/column numbers in the buffer, delimit the new
+      // info with a space
+      if (locators.nonEmpty)
+        locators ++= " "
+
+      locators ++= s"$currentLine:$columnsList"
+    }
+
+    for (info <- sorted) {
+      val (file, line, col) = info.split
+
+      // Only process file infos that match the pattern *fully*, so that all 3 capture groups were
+      // matched
+      // TODO: Enforce a specific format for FileInfos (file.name line1:{col1,col2} line2:{col3,col4}...),
+      // so that this code can run with the assumption that all FileInfos obey that format.
+      if (line != null && col != null) {
+        // If we encounter a new file, yield the current compressed info
+        if (file != currentFile) {
+          if (currentFile.nonEmpty) {
+            serializeColumns
+            out :+= FileInfo.fromEscaped(s"$currentFile $locators")
+          }
+
+          // Reset all tracking variables to now track the new file
+          currentFile = file
+          currentLine = ""
+          locators.clear()
+          columns = Seq()
+        }
+
+        // If we encounter a new line, append the current columns to the line buffer.
+        if (line != currentLine) {
+          if (currentLine.nonEmpty)
+            serializeColumns
+          // Track the new current line
+          currentLine = line
+          columns = Seq()
+        }
+
+        columns :+= col
+      } else
+        // Add in the uncompressed FileInfo
+        out :+= info
+    }
+
+    // Serialize any remaining column info that was parsed by the loop
+    serializeColumns
+
+    // Append the remaining FileInfo if one was parsed
+    if (currentFile.nonEmpty)
+      out :+= FileInfo.fromEscaped(s"$currentFile $locators")
+
+    out
+  }
 }
 
 trait HasName {
author	Jared Barocsi	2021-05-13 14:30:58 -0700
committer	GitHub	2021-05-13 14:30:58 -0700
commit	6aa86b13abe25271278498836217c89c00d7b151 (patch)
tree	fcaf85d1d271148ea46edbbb0ef010300a4a6105 /src/main
parent	5f3e196b83b576b190197807ead0e769f0d5e70b (diff)