diff options
| author | Jack Koenig | 2017-09-22 15:36:35 -0700 |
|---|---|---|
| committer | GitHub | 2017-09-22 15:36:35 -0700 |
| commit | 34e9944aaf3c1fc76fcaaacc02509f217c0c0d63 (patch) | |
| tree | 387d82ae7d07861609d9f0e4cf7ac249d379f764 /src | |
| parent | f04a18efdf4ca88fe1ac77acab30e21290957919 (diff) | |
Fix string lit (#663)
Refactor StringLit to use String instead of Array[Byte]
Diffstat (limited to 'src')
| -rw-r--r-- | src/main/scala/firrtl/Emitter.scala | 9 | ||||
| -rw-r--r-- | src/main/scala/firrtl/StringLit.scala | 95 | ||||
| -rw-r--r-- | src/main/scala/firrtl/Visitor.scala | 8 | ||||
| -rw-r--r-- | src/main/scala/firrtl/ir/IR.scala | 44 | ||||
| -rw-r--r-- | src/main/scala/firrtl/passes/Checks.scala | 2 | ||||
| -rw-r--r-- | src/main/scala/firrtl/passes/Passes.scala | 2 | ||||
| -rw-r--r-- | src/test/scala/firrtlTests/StringSpec.scala | 83 |
7 files changed, 95 insertions, 148 deletions
diff --git a/src/main/scala/firrtl/Emitter.scala b/src/main/scala/firrtl/Emitter.scala index 8831e030..6386c33a 100644 --- a/src/main/scala/firrtl/Emitter.scala +++ b/src/main/scala/firrtl/Emitter.scala @@ -197,9 +197,7 @@ class VerilogEmitter extends SeqTransform with Emitter { def stringify(param: Param): String = param match { case IntParam(name, value) => s".$name($value)" case DoubleParam(name, value) => s".$name($value)" - case StringParam(name, value) => - val strx = "\"" + VerilogStringLitHandler.escape(value) + "\"" - s".${name}($strx)" + case StringParam(name, value) => s".${name}(${value.verilogEscape})" case RawStringParam(name, value) => s".$name($value)" } def stringify(tpe: GroundType): String = tpe match { @@ -516,9 +514,7 @@ class VerilogEmitter extends SeqTransform with Emitter { def stop(ret: Int): Seq[Any] = Seq(if (ret == 0) "$finish;" else "$fatal;") def printf(str: StringLit, args: Seq[Expression]): Seq[Any] = { - val q = '"'.toString - val strx = s"""$q${VerilogStringLitHandler escape str}$q""" +: - (args flatMap (Seq("," , _))) + val strx = str.verilogEscape +: args.flatMap(Seq(",",_)) Seq("$fwrite(32'h80000002,", strx, ");") } @@ -575,7 +571,6 @@ class VerilogEmitter extends SeqTransform with Emitter { assign(WRef(sx.name, sx.value.tpe, NodeKind, MALE), sx.value) sx case sx: Stop => - val errorString = StringLit(s"${sx.ret}\n".getBytes) simulate(sx.clk, sx.en, stop(sx.ret), Some("STOP_COND")) sx case sx: Print => diff --git a/src/main/scala/firrtl/StringLit.scala b/src/main/scala/firrtl/StringLit.scala deleted file mode 100644 index 206fe6a4..00000000 --- a/src/main/scala/firrtl/StringLit.scala +++ /dev/null @@ -1,95 +0,0 @@ -// See LICENSE for license details. - -package firrtl - -import firrtl.ir._ - -import java.nio.charset.StandardCharsets.UTF_8 -import scala.annotation.tailrec - -// Default implementations are for valid FIRRTL Strings -trait StringLitHandler { - - // Apply correct escaping for strings in a given language - def escape(lit: StringLit): String = { - @tailrec - def escape(in: Array[Byte], str: String): String = { - if (in.isEmpty) str - else { - val s = in.head match { - case 0x5c => "\\\\" - case 0x0a => "\\n" - case 0x09 => "\\t" - case 0x22 => "\\\"" - case c => - if (c >= 0x20 && c <= 0x7e) new String(Array(c), UTF_8) - else throw new FIRRTLException(s"Unsupported byte in StringLit: $c") - } - escape(in.tail, str + s) - } - } - escape(lit.array, "") - } - - // Turn raw parsed strings into String Literals - def unescape(raw: String): StringLit = { - @tailrec - def unescape(in: Array[Byte], out: Array[Byte]): Array[Byte] = { - if (in.isEmpty) out - else { - val c = in.head - if ((c < 0x20) || (c > 0x7e)) { // Range from ' ' to '~' - val msg = "Unsupported unescaped UTF-8 Byte 0x%02x! ".format(c) + - "Valid range [0x20-0x7e]" - throw new InvalidStringLitException(msg) - } - // Handle escaped characters - if (c == 0x5c) { // backslash - val (n: Int, bytes: Array[Byte]) = in(1) match { - case 0x5c => (2, Array[Byte](0x5c)) // backslash - case 0x6e => (2, Array[Byte](0x0a)) // newline - case 0x74 => (2, Array[Byte](0x09)) // tab - case 0x27 => (2, Array[Byte](0x27)) // single quote - case 0x22 => (2, Array[Byte](0x22)) // double quote - // TODO Finalize supported escapes, implement hex2Bytes - //case 0x78 => (4, hex2Bytes(in.slice(2, 3)))) // hex escape - //case 0x75 => (6, hex2Bytes(in.slice(2, 5))) // unicode excape - case e => // error - val msg = s"Invalid escape character ${e.toChar}! " + - "Valid characters [nt'\"\\]" - throw new InvalidEscapeCharException(msg) - } - unescape(in.drop(n), out ++ bytes) // consume n - } else { - unescape(in.tail, out :+ in.head) - } - } - } - - val byteArray = raw.getBytes(java.nio.charset.StandardCharsets.UTF_8) - StringLit(unescape(byteArray, Array())) - } - - // Translate from FIRRTL formatting to the specific language's formatting - def format(lit: StringLit): StringLit = ??? - // Translate from the specific language's formatting to FIRRTL formatting - def unformat(lit: StringLit): StringLit = ??? -} - -object FIRRTLStringLitHandler extends StringLitHandler - -object VerilogStringLitHandler extends StringLitHandler { - // Change FIRRTL format string to Verilog format - override def format(lit: StringLit): StringLit = { - @tailrec - def format(in: Array[Byte], out: Array[Byte], percent: Boolean): Array[Byte] = { - if (in.isEmpty) out - else { - if (percent && in.head == 'x') format(in.tail, out :+ 'h'.toByte, percent = false) - else format(in.tail, out :+ in.head, in.head == '%' && !percent) - } - } - StringLit(format(lit.array, Array(), percent = false)) - } -} - diff --git a/src/main/scala/firrtl/Visitor.scala b/src/main/scala/firrtl/Visitor.scala index 29b8a57b..c5a21b23 100644 --- a/src/main/scala/firrtl/Visitor.scala +++ b/src/main/scala/firrtl/Visitor.scala @@ -53,12 +53,12 @@ class Visitor(infoMode: InfoMode) extends FIRRTLBaseVisitor[FirrtlNode] { infoMode match { case UseInfo => if (useInfo.length == 0) NoInfo - else ir.FileInfo(FIRRTLStringLitHandler.unescape(useInfo)) + else ir.FileInfo(ir.StringLit.unescape(useInfo)) case AppendInfo(filename) => val newInfo = useInfo + ":" + genInfo(filename) - ir.FileInfo(FIRRTLStringLitHandler.unescape(newInfo)) + ir.FileInfo(ir.StringLit.unescape(newInfo)) case GenInfo(filename) => - ir.FileInfo(FIRRTLStringLitHandler.unescape(genInfo(filename))) + ir.FileInfo(ir.StringLit.unescape(genInfo(filename))) case IgnoreInfo => NoInfo } } @@ -208,7 +208,7 @@ class Visitor(infoMode: InfoMode) extends FIRRTLBaseVisitor[FirrtlNode] { // visitStringLit private def visitStringLit[FirrtlNode](node: TerminalNode): StringLit = { val raw = node.getText.tail.init // Remove surrounding double quotes - FIRRTLStringLitHandler.unescape(raw) + ir.StringLit.unescape(raw) } private def visitWhen[FirrtlNode](ctx: WhenContext): Conditionally = { diff --git a/src/main/scala/firrtl/ir/IR.scala b/src/main/scala/firrtl/ir/IR.scala index aafa17ca..2f37b05a 100644 --- a/src/main/scala/firrtl/ir/IR.scala +++ b/src/main/scala/firrtl/ir/IR.scala @@ -42,8 +42,46 @@ trait HasInfo { } trait IsDeclaration extends HasName with HasInfo -case class StringLit(array: Array[Byte]) extends FirrtlNode { - def serialize: String = FIRRTLStringLitHandler.escape(this) +case class StringLit(string: String) extends FirrtlNode { + /** Returns an escaped and quoted String */ + def escape: String = { + import scala.reflect.runtime.universe._ + Literal(Constant(string)).toString + } + def serialize: String = { + val str = escape + str.slice(1, str.size - 1) + } + /** Format the string for Verilog */ + def verilogFormat: StringLit = { + StringLit(string.replaceAll("%x", "%h")) + } + /** Returns an escaped and quoted String */ + def verilogEscape: String = { + // normalize to turn things like รถ into o + import java.text.Normalizer + val normalized = Normalizer.normalize(string, Normalizer.Form.NFD) + val ascii = normalized flatMap StringLit.toASCII + ascii.mkString("\"", "", "\"") + } +} +object StringLit { + /** Maps characters to ASCII for Verilog emission */ + private def toASCII(char: Char): List[Char] = char match { + case nonASCII if !nonASCII.isValidByte => List('?') + case letter if letter.isLetter => List(letter) + case '\n' => List('\\', 'n') + case '\\' => List('\\', '\\') + case '\t' => List('\\', 't') + case '"' => List('\\', '"') + case other => List('?') + } + + /** Create a StringLit from a raw parsed String */ + def unescape(raw: String): StringLit = { + val str = StringContext.processEscapes(raw) + StringLit(str) + } } /** Primitive Operation @@ -269,7 +307,7 @@ case class Print( clk: Expression, en: Expression) extends Statement with HasInfo { def serialize: String = { - val strs = Seq(clk.serialize, en.serialize, "\"" + string.serialize + "\"") ++ + val strs = Seq(clk.serialize, en.serialize, string.escape) ++ (args map (_.serialize)) "printf(" + (strs mkString ", ") + ")" + info.serialize } diff --git a/src/main/scala/firrtl/passes/Checks.scala b/src/main/scala/firrtl/passes/Checks.scala index 74c532cc..09c818bd 100644 --- a/src/main/scala/firrtl/passes/Checks.scala +++ b/src/main/scala/firrtl/passes/Checks.scala @@ -85,7 +85,7 @@ object CheckHighForm extends Pass { def checkFstring(info: Info, mname: String, s: StringLit, i: Int) { val validFormats = "bdxc" - val (percent, npercents) = (s.array foldLeft (false, 0)){ + val (percent, npercents) = s.string.foldLeft((false, 0)) { case ((percentx, n), b) if percentx && (validFormats contains b) => (false, n + 1) case ((percentx, n), b) if percentx && b != '%' => diff --git a/src/main/scala/firrtl/passes/Passes.scala b/src/main/scala/firrtl/passes/Passes.scala index 4a90bd08..61afade6 100644 --- a/src/main/scala/firrtl/passes/Passes.scala +++ b/src/main/scala/firrtl/passes/Passes.scala @@ -269,7 +269,7 @@ object VerilogWrap extends Pass { } def vWrapS(s: Statement): Statement = { s map vWrapS map vWrapE match { - case sx: Print => sx copy (string = VerilogStringLitHandler.format(sx.string)) + case sx: Print => sx.copy(string = sx.string.verilogFormat) case sx => sx } } diff --git a/src/test/scala/firrtlTests/StringSpec.scala b/src/test/scala/firrtlTests/StringSpec.scala index 7e7c040e..87ee9191 100644 --- a/src/test/scala/firrtlTests/StringSpec.scala +++ b/src/test/scala/firrtlTests/StringSpec.scala @@ -3,14 +3,17 @@ package firrtlTests import firrtl._ +import firrtl.ir.StringLit import java.io._ import scala.sys.process._ +import annotation.tailrec import org.scalatest._ import org.scalatest.prop._ import org.scalatest.Assertions._ import org.scalacheck._ +import org.scalacheck.Arbitrary._ class PrintfSpec extends FirrtlPropSpec { @@ -55,17 +58,12 @@ class PrintfSpec extends FirrtlPropSpec { class StringSpec extends FirrtlPropSpec { // Whitelist is [0x20 - 0x7e] - val whitelist = - """ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ""" + + val whitelist = + """ !\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ""" + """[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~""" - val whitelistBA: Array[Byte] = Array.range(0x20, 0x7e) map (_.toByte) property(s"Character whitelist should be supported: [$whitelist] ") { - val lit = firrtl.FIRRTLStringLitHandler.unescape(whitelist) - // Check internal - lit.array zip whitelistBA foreach { case (b, e) => - assert(b == e, s"(${b.toChar} did not equal expected ${e.toChar})") - } + val lit = StringLit.unescape(whitelist) // Check result assert(lit.serialize == whitelist) } @@ -74,40 +72,51 @@ class StringSpec extends FirrtlPropSpec { val esc = """\\\'\"\t\n""" val validEsc = Seq('n', 't', '\\', '"', '\'') property(s"Escape characters [$esc] should parse") { - val lit = firrtl.FIRRTLStringLitHandler.unescape(esc) - assert(lit.array(0) == 0x5c) - assert(lit.array(1) == 0x27) - assert(lit.array(2) == 0x22) - assert(lit.array(3) == 0x09) - assert(lit.array(4) == 0x0a) - assert(lit.array.length == 5) + val lit = StringLit.unescape(esc) + assert(lit.string(0).toByte == 0x5c) + assert(lit.string(1).toByte == 0x27) + assert(lit.string(2).toByte == 0x22) + assert(lit.string(3).toByte == 0x09) + assert(lit.string(4).toByte == 0x0a) + assert(lit.string.length == 5) } - // Generators for random testing - val validChar = Gen.oneOf((0x20.toChar to 0x5b.toChar).toSeq ++ - (0x5e.toChar to 0x7e.toChar).toSeq) // exclude '\\' - val validCharSeq = Gen.containerOf[Seq, Char](validChar) - val invalidChar = Gen.oneOf(Gen.choose(0x00.toChar, 0x1f.toChar), - Gen.choose(0x7f.toChar, 0xff.toChar)) - val invalidEsc = Gen.oneOf((0x00.toChar to 0xff.toChar).toSeq diff validEsc) - - property("Random invalid strings should fail") { - forAll(validCharSeq, invalidChar, validCharSeq) { - (head: Seq[Char], bad: Char, tail: Seq[Char]) => - val str = ((head :+ bad) ++ tail).mkString - intercept[InvalidStringLitException] { - firrtl.FIRRTLStringLitHandler.unescape(str) + // From IEEE 1364-2001 2.6 + def isValidVerilogString(str: String): Boolean = { + @tailrec def rec(xs: List[Char]): Boolean = xs match { + case Nil => true + case '\\' :: esc => + if (Set('n', 't', '\\', '"').contains(esc.head)) rec(esc.tail) + else { // Check valid octal escape, otherwise illegal + val next3 = esc.take(3) + if (next3.size == 3 && next3.forall(('0' to '7').toSet.contains)) rec(esc.drop(3)) + else false } + case char :: tail => // Check Legal ASCII + if (char.toInt < 256 && char.toInt >= 0) rec(tail) + else false } + rec(str.toList) } - - property(s"Invalid escape characters should fail") { - forAll(validCharSeq, invalidEsc, validCharSeq) { - (head: Seq[Char], badEsc: Char, tail: Seq[Char]) => - val str = (head ++ Seq('\\', badEsc) ++ tail).mkString - intercept[InvalidEscapeCharException] { - firrtl.FIRRTLStringLitHandler.unescape(str) - } + // From IEEE 1364-2001 17.1.1.2 + val legalFormats = "HhDdOoBbCcLlVvMmSsTtUuZz%".toSet + def isValidVerilogFormat(str: String): Boolean = str.toSeq.sliding(2).forall { + case Seq('%', char) if legalFormats contains char => true + case _ => true + } + + // Generators for legal Firrtl format strings + val genFormat = Gen.oneOf("bdxc%").map(List('%', _)) + val genEsc = Gen.oneOf(esc.toSeq).map(List(_)) + val genChar = arbitrary[Char].suchThat(c => (c != '%' && c != '\\')).map(List(_)) + val genFragment = Gen.frequency((10, genChar), (1, genFormat), (1, genEsc)).map(_.mkString) + val genString = Gen.listOf[String](genFragment).map(_.mkString) + + property ("Firrtl Format Strings with Unicode chars should emit as legal Verilog Strings") { + forAll (genString) { str => + val verilogStr = StringLit(str).verilogFormat.verilogEscape + assert(isValidVerilogString(verilogStr)) + assert(isValidVerilogFormat(verilogStr)) } } } |
