Skip to content

Commit

Permalink
Merge pull request hashicorp#241 from octo/scanner-null
Browse files Browse the repository at this point in the history
printer, scanner: Don't produce unparsable output.
  • Loading branch information
mitchellh authored Mar 20, 2018
2 parents b1738d9 + ec2ba18 commit adef769
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
22 changes: 22 additions & 0 deletions hcl/printer/printer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,25 @@ func lineAt(text []byte, offs int) []byte {
}
return text[offs:i]
}

// TestFormatParsable ensures that the output of Format() is can be parsed again.
func TestFormatValidOutput(t *testing.T) {
cases := []string{
"#\x00",
"#\ue123t",
}

for _, c := range cases {
f, err := Format([]byte(c))
if err != nil {
// ignore these failures, not all inputs are valid HCL.
t.Logf("Format(%q) = %v", c, err)
continue
}

if _, err := parser.Parse(f); err != nil {
t.Errorf("Format(%q) = %q; Parse(%q) = %v", c, f, f, err)
continue
}
}
}
8 changes: 6 additions & 2 deletions hcl/scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,16 @@ func (s *Scanner) next() rune {
s.srcPos.Column = 0
}

// If we see a null character with data left, then that is an error
if ch == '\x00' && s.buf.Len() > 0 {
if ch == '\x00' {
s.err("unexpected null character (0x00)")
return eof
}

if ch == '\uE123' {
s.err("unicode code point U+E123 reserved for internal use")
return utf8.RuneError
}

// debug
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
return ch
Expand Down
3 changes: 3 additions & 0 deletions hcl/scanner/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,12 @@ func TestScan_crlf(t *testing.T) {
func TestError(t *testing.T) {
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\uE123", "1:1", "unicode code point U+E123 reserved for internal use", token.ILLEGAL)

testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT)
testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT)
testError(t, "ab\x00", "1:3", "unexpected null character (0x00)", token.IDENT)
testError(t, "ab\x00\n", "1:3", "unexpected null character (0x00)", token.IDENT)

testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
Expand Down

0 comments on commit adef769

Please sign in to comment.