aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack Koenig2017-09-22 15:36:35 -0700
committerGitHub2017-09-22 15:36:35 -0700
commit34e9944aaf3c1fc76fcaaacc02509f217c0c0d63 (patch)
tree387d82ae7d07861609d9f0e4cf7ac249d379f764
parentf04a18efdf4ca88fe1ac77acab30e21290957919 (diff)
Fix string lit (#663)
Refactor StringLit to use String instead of Array[Byte]
-rw-r--r--src/main/scala/firrtl/Emitter.scala9
-rw-r--r--src/main/scala/firrtl/StringLit.scala95
-rw-r--r--src/main/scala/firrtl/Visitor.scala8
-rw-r--r--src/main/scala/firrtl/ir/IR.scala44
-rw-r--r--src/main/scala/firrtl/passes/Checks.scala2
-rw-r--r--src/main/scala/firrtl/passes/Passes.scala2
-rw-r--r--src/test/scala/firrtlTests/StringSpec.scala83
7 files changed, 95 insertions, 148 deletions
diff --git a/src/main/scala/firrtl/Emitter.scala b/src/main/scala/firrtl/Emitter.scala
index 8831e030..6386c33a 100644
--- a/src/main/scala/firrtl/Emitter.scala
+++ b/src/main/scala/firrtl/Emitter.scala
@@ -197,9 +197,7 @@ class VerilogEmitter extends SeqTransform with Emitter {
def stringify(param: Param): String = param match {
case IntParam(name, value) => s".$name($value)"
case DoubleParam(name, value) => s".$name($value)"
- case StringParam(name, value) =>
- val strx = "\"" + VerilogStringLitHandler.escape(value) + "\""
- s".${name}($strx)"
+ case StringParam(name, value) => s".${name}(${value.verilogEscape})"
case RawStringParam(name, value) => s".$name($value)"
}
def stringify(tpe: GroundType): String = tpe match {
@@ -516,9 +514,7 @@ class VerilogEmitter extends SeqTransform with Emitter {
def stop(ret: Int): Seq[Any] = Seq(if (ret == 0) "$finish;" else "$fatal;")
def printf(str: StringLit, args: Seq[Expression]): Seq[Any] = {
- val q = '"'.toString
- val strx = s"""$q${VerilogStringLitHandler escape str}$q""" +:
- (args flatMap (Seq("," , _)))
+ val strx = str.verilogEscape +: args.flatMap(Seq(",",_))
Seq("$fwrite(32'h80000002,", strx, ");")
}
@@ -575,7 +571,6 @@ class VerilogEmitter extends SeqTransform with Emitter {
assign(WRef(sx.name, sx.value.tpe, NodeKind, MALE), sx.value)
sx
case sx: Stop =>
- val errorString = StringLit(s"${sx.ret}\n".getBytes)
simulate(sx.clk, sx.en, stop(sx.ret), Some("STOP_COND"))
sx
case sx: Print =>
diff --git a/src/main/scala/firrtl/StringLit.scala b/src/main/scala/firrtl/StringLit.scala
deleted file mode 100644
index 206fe6a4..00000000
--- a/src/main/scala/firrtl/StringLit.scala
+++ /dev/null
@@ -1,95 +0,0 @@
-// See LICENSE for license details.
-
-package firrtl
-
-import firrtl.ir._
-
-import java.nio.charset.StandardCharsets.UTF_8
-import scala.annotation.tailrec
-
-// Default implementations are for valid FIRRTL Strings
-trait StringLitHandler {
-
- // Apply correct escaping for strings in a given language
- def escape(lit: StringLit): String = {
- @tailrec
- def escape(in: Array[Byte], str: String): String = {
- if (in.isEmpty) str
- else {
- val s = in.head match {
- case 0x5c => "\\\\"
- case 0x0a => "\\n"
- case 0x09 => "\\t"
- case 0x22 => "\\\""
- case c =>
- if (c >= 0x20 && c <= 0x7e) new String(Array(c), UTF_8)
- else throw new FIRRTLException(s"Unsupported byte in StringLit: $c")
- }
- escape(in.tail, str + s)
- }
- }
- escape(lit.array, "")
- }
-
- // Turn raw parsed strings into String Literals
- def unescape(raw: String): StringLit = {
- @tailrec
- def unescape(in: Array[Byte], out: Array[Byte]): Array[Byte] = {
- if (in.isEmpty) out
- else {
- val c = in.head
- if ((c < 0x20) || (c > 0x7e)) { // Range from ' ' to '~'
- val msg = "Unsupported unescaped UTF-8 Byte 0x%02x! ".format(c) +
- "Valid range [0x20-0x7e]"
- throw new InvalidStringLitException(msg)
- }
- // Handle escaped characters
- if (c == 0x5c) { // backslash
- val (n: Int, bytes: Array[Byte]) = in(1) match {
- case 0x5c => (2, Array[Byte](0x5c)) // backslash
- case 0x6e => (2, Array[Byte](0x0a)) // newline
- case 0x74 => (2, Array[Byte](0x09)) // tab
- case 0x27 => (2, Array[Byte](0x27)) // single quote
- case 0x22 => (2, Array[Byte](0x22)) // double quote
- // TODO Finalize supported escapes, implement hex2Bytes
- //case 0x78 => (4, hex2Bytes(in.slice(2, 3)))) // hex escape
- //case 0x75 => (6, hex2Bytes(in.slice(2, 5))) // unicode excape
- case e => // error
- val msg = s"Invalid escape character ${e.toChar}! " +
- "Valid characters [nt'\"\\]"
- throw new InvalidEscapeCharException(msg)
- }
- unescape(in.drop(n), out ++ bytes) // consume n
- } else {
- unescape(in.tail, out :+ in.head)
- }
- }
- }
-
- val byteArray = raw.getBytes(java.nio.charset.StandardCharsets.UTF_8)
- StringLit(unescape(byteArray, Array()))
- }
-
- // Translate from FIRRTL formatting to the specific language's formatting
- def format(lit: StringLit): StringLit = ???
- // Translate from the specific language's formatting to FIRRTL formatting
- def unformat(lit: StringLit): StringLit = ???
-}
-
-object FIRRTLStringLitHandler extends StringLitHandler
-
-object VerilogStringLitHandler extends StringLitHandler {
- // Change FIRRTL format string to Verilog format
- override def format(lit: StringLit): StringLit = {
- @tailrec
- def format(in: Array[Byte], out: Array[Byte], percent: Boolean): Array[Byte] = {
- if (in.isEmpty) out
- else {
- if (percent && in.head == 'x') format(in.tail, out :+ 'h'.toByte, percent = false)
- else format(in.tail, out :+ in.head, in.head == '%' && !percent)
- }
- }
- StringLit(format(lit.array, Array(), percent = false))
- }
-}
-
diff --git a/src/main/scala/firrtl/Visitor.scala b/src/main/scala/firrtl/Visitor.scala
index 29b8a57b..c5a21b23 100644
--- a/src/main/scala/firrtl/Visitor.scala
+++ b/src/main/scala/firrtl/Visitor.scala
@@ -53,12 +53,12 @@ class Visitor(infoMode: InfoMode) extends FIRRTLBaseVisitor[FirrtlNode] {
infoMode match {
case UseInfo =>
if (useInfo.length == 0) NoInfo
- else ir.FileInfo(FIRRTLStringLitHandler.unescape(useInfo))
+ else ir.FileInfo(ir.StringLit.unescape(useInfo))
case AppendInfo(filename) =>
val newInfo = useInfo + ":" + genInfo(filename)
- ir.FileInfo(FIRRTLStringLitHandler.unescape(newInfo))
+ ir.FileInfo(ir.StringLit.unescape(newInfo))
case GenInfo(filename) =>
- ir.FileInfo(FIRRTLStringLitHandler.unescape(genInfo(filename)))
+ ir.FileInfo(ir.StringLit.unescape(genInfo(filename)))
case IgnoreInfo => NoInfo
}
}
@@ -208,7 +208,7 @@ class Visitor(infoMode: InfoMode) extends FIRRTLBaseVisitor[FirrtlNode] {
// visitStringLit
private def visitStringLit[FirrtlNode](node: TerminalNode): StringLit = {
val raw = node.getText.tail.init // Remove surrounding double quotes
- FIRRTLStringLitHandler.unescape(raw)
+ ir.StringLit.unescape(raw)
}
private def visitWhen[FirrtlNode](ctx: WhenContext): Conditionally = {
diff --git a/src/main/scala/firrtl/ir/IR.scala b/src/main/scala/firrtl/ir/IR.scala
index aafa17ca..2f37b05a 100644
--- a/src/main/scala/firrtl/ir/IR.scala
+++ b/src/main/scala/firrtl/ir/IR.scala
@@ -42,8 +42,46 @@ trait HasInfo {
}
trait IsDeclaration extends HasName with HasInfo
-case class StringLit(array: Array[Byte]) extends FirrtlNode {
- def serialize: String = FIRRTLStringLitHandler.escape(this)
+case class StringLit(string: String) extends FirrtlNode {
+ /** Returns an escaped and quoted String */
+ def escape: String = {
+ import scala.reflect.runtime.universe._
+ Literal(Constant(string)).toString
+ }
+ def serialize: String = {
+ val str = escape
+ str.slice(1, str.size - 1)
+ }
+ /** Format the string for Verilog */
+ def verilogFormat: StringLit = {
+ StringLit(string.replaceAll("%x", "%h"))
+ }
+ /** Returns an escaped and quoted String */
+ def verilogEscape: String = {
+ // normalize to turn things like รถ into o
+ import java.text.Normalizer
+ val normalized = Normalizer.normalize(string, Normalizer.Form.NFD)
+ val ascii = normalized flatMap StringLit.toASCII
+ ascii.mkString("\"", "", "\"")
+ }
+}
+object StringLit {
+ /** Maps characters to ASCII for Verilog emission */
+ private def toASCII(char: Char): List[Char] = char match {
+ case nonASCII if !nonASCII.isValidByte => List('?')
+ case letter if letter.isLetter => List(letter)
+ case '\n' => List('\\', 'n')
+ case '\\' => List('\\', '\\')
+ case '\t' => List('\\', 't')
+ case '"' => List('\\', '"')
+ case other => List('?')
+ }
+
+ /** Create a StringLit from a raw parsed String */
+ def unescape(raw: String): StringLit = {
+ val str = StringContext.processEscapes(raw)
+ StringLit(str)
+ }
}
/** Primitive Operation
@@ -269,7 +307,7 @@ case class Print(
clk: Expression,
en: Expression) extends Statement with HasInfo {
def serialize: String = {
- val strs = Seq(clk.serialize, en.serialize, "\"" + string.serialize + "\"") ++
+ val strs = Seq(clk.serialize, en.serialize, string.escape) ++
(args map (_.serialize))
"printf(" + (strs mkString ", ") + ")" + info.serialize
}
diff --git a/src/main/scala/firrtl/passes/Checks.scala b/src/main/scala/firrtl/passes/Checks.scala
index 74c532cc..09c818bd 100644
--- a/src/main/scala/firrtl/passes/Checks.scala
+++ b/src/main/scala/firrtl/passes/Checks.scala
@@ -85,7 +85,7 @@ object CheckHighForm extends Pass {
def checkFstring(info: Info, mname: String, s: StringLit, i: Int) {
val validFormats = "bdxc"
- val (percent, npercents) = (s.array foldLeft (false, 0)){
+ val (percent, npercents) = s.string.foldLeft((false, 0)) {
case ((percentx, n), b) if percentx && (validFormats contains b) =>
(false, n + 1)
case ((percentx, n), b) if percentx && b != '%' =>
diff --git a/src/main/scala/firrtl/passes/Passes.scala b/src/main/scala/firrtl/passes/Passes.scala
index 4a90bd08..61afade6 100644
--- a/src/main/scala/firrtl/passes/Passes.scala
+++ b/src/main/scala/firrtl/passes/Passes.scala
@@ -269,7 +269,7 @@ object VerilogWrap extends Pass {
}
def vWrapS(s: Statement): Statement = {
s map vWrapS map vWrapE match {
- case sx: Print => sx copy (string = VerilogStringLitHandler.format(sx.string))
+ case sx: Print => sx.copy(string = sx.string.verilogFormat)
case sx => sx
}
}
diff --git a/src/test/scala/firrtlTests/StringSpec.scala b/src/test/scala/firrtlTests/StringSpec.scala
index 7e7c040e..87ee9191 100644
--- a/src/test/scala/firrtlTests/StringSpec.scala
+++ b/src/test/scala/firrtlTests/StringSpec.scala
@@ -3,14 +3,17 @@
package firrtlTests
import firrtl._
+import firrtl.ir.StringLit
import java.io._
import scala.sys.process._
+import annotation.tailrec
import org.scalatest._
import org.scalatest.prop._
import org.scalatest.Assertions._
import org.scalacheck._
+import org.scalacheck.Arbitrary._
class PrintfSpec extends FirrtlPropSpec {
@@ -55,17 +58,12 @@ class PrintfSpec extends FirrtlPropSpec {
class StringSpec extends FirrtlPropSpec {
// Whitelist is [0x20 - 0x7e]
- val whitelist =
- """ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ""" +
+ val whitelist =
+ """ !\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ""" +
"""[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"""
- val whitelistBA: Array[Byte] = Array.range(0x20, 0x7e) map (_.toByte)
property(s"Character whitelist should be supported: [$whitelist] ") {
- val lit = firrtl.FIRRTLStringLitHandler.unescape(whitelist)
- // Check internal
- lit.array zip whitelistBA foreach { case (b, e) =>
- assert(b == e, s"(${b.toChar} did not equal expected ${e.toChar})")
- }
+ val lit = StringLit.unescape(whitelist)
// Check result
assert(lit.serialize == whitelist)
}
@@ -74,40 +72,51 @@ class StringSpec extends FirrtlPropSpec {
val esc = """\\\'\"\t\n"""
val validEsc = Seq('n', 't', '\\', '"', '\'')
property(s"Escape characters [$esc] should parse") {
- val lit = firrtl.FIRRTLStringLitHandler.unescape(esc)
- assert(lit.array(0) == 0x5c)
- assert(lit.array(1) == 0x27)
- assert(lit.array(2) == 0x22)
- assert(lit.array(3) == 0x09)
- assert(lit.array(4) == 0x0a)
- assert(lit.array.length == 5)
+ val lit = StringLit.unescape(esc)
+ assert(lit.string(0).toByte == 0x5c)
+ assert(lit.string(1).toByte == 0x27)
+ assert(lit.string(2).toByte == 0x22)
+ assert(lit.string(3).toByte == 0x09)
+ assert(lit.string(4).toByte == 0x0a)
+ assert(lit.string.length == 5)
}
- // Generators for random testing
- val validChar = Gen.oneOf((0x20.toChar to 0x5b.toChar).toSeq ++
- (0x5e.toChar to 0x7e.toChar).toSeq) // exclude '\\'
- val validCharSeq = Gen.containerOf[Seq, Char](validChar)
- val invalidChar = Gen.oneOf(Gen.choose(0x00.toChar, 0x1f.toChar),
- Gen.choose(0x7f.toChar, 0xff.toChar))
- val invalidEsc = Gen.oneOf((0x00.toChar to 0xff.toChar).toSeq diff validEsc)
-
- property("Random invalid strings should fail") {
- forAll(validCharSeq, invalidChar, validCharSeq) {
- (head: Seq[Char], bad: Char, tail: Seq[Char]) =>
- val str = ((head :+ bad) ++ tail).mkString
- intercept[InvalidStringLitException] {
- firrtl.FIRRTLStringLitHandler.unescape(str)
+ // From IEEE 1364-2001 2.6
+ def isValidVerilogString(str: String): Boolean = {
+ @tailrec def rec(xs: List[Char]): Boolean = xs match {
+ case Nil => true
+ case '\\' :: esc =>
+ if (Set('n', 't', '\\', '"').contains(esc.head)) rec(esc.tail)
+ else { // Check valid octal escape, otherwise illegal
+ val next3 = esc.take(3)
+ if (next3.size == 3 && next3.forall(('0' to '7').toSet.contains)) rec(esc.drop(3))
+ else false
}
+ case char :: tail => // Check Legal ASCII
+ if (char.toInt < 256 && char.toInt >= 0) rec(tail)
+ else false
}
+ rec(str.toList)
}
-
- property(s"Invalid escape characters should fail") {
- forAll(validCharSeq, invalidEsc, validCharSeq) {
- (head: Seq[Char], badEsc: Char, tail: Seq[Char]) =>
- val str = (head ++ Seq('\\', badEsc) ++ tail).mkString
- intercept[InvalidEscapeCharException] {
- firrtl.FIRRTLStringLitHandler.unescape(str)
- }
+ // From IEEE 1364-2001 17.1.1.2
+ val legalFormats = "HhDdOoBbCcLlVvMmSsTtUuZz%".toSet
+ def isValidVerilogFormat(str: String): Boolean = str.toSeq.sliding(2).forall {
+ case Seq('%', char) if legalFormats contains char => true
+ case _ => true
+ }
+
+ // Generators for legal Firrtl format strings
+ val genFormat = Gen.oneOf("bdxc%").map(List('%', _))
+ val genEsc = Gen.oneOf(esc.toSeq).map(List(_))
+ val genChar = arbitrary[Char].suchThat(c => (c != '%' && c != '\\')).map(List(_))
+ val genFragment = Gen.frequency((10, genChar), (1, genFormat), (1, genEsc)).map(_.mkString)
+ val genString = Gen.listOf[String](genFragment).map(_.mkString)
+
+ property ("Firrtl Format Strings with Unicode chars should emit as legal Verilog Strings") {
+ forAll (genString) { str =>
+ val verilogStr = StringLit(str).verilogFormat.verilogEscape
+ assert(isValidVerilogString(verilogStr))
+ assert(isValidVerilogFormat(verilogStr))
}
}
}