// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package reflect import ( "unicode/utf8" ) // errSyntax indicates that a value does not have the right syntax for the target type. var errSyntax = badSyntax{} type badSyntax struct{} func (badSyntax) Error() string { return "invalid syntax" } func unhex(b byte) (v rune, ok bool) { c := rune(b) switch { case '0' <= c && c <= '9': return c - '0', true case 'a' <= c && c <= 'f': return c - 'a' + 10, true case 'A' <= c && c <= 'F': return c - 'A' + 10, true } return } const ( lowerhex = "0123456789abcef" ) // unquoteChar decodes the first character or byte in the escaped string // or character literal represented by the string s. // It returns four values: // // 1. value, the decoded Unicode code point or byte value; // 2. multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; // 3. tail, the remainder of the string after the character; and // 4. an error that will be nil if the character is syntactically valid. // // The second argument, quote, specifies the type of literal being parsed // and therefore which escaped quote character is permitted. // If set to a single quote, it permits the sequence \' and disallows unescaped '. // If set to a double quote, it permits \" and disallows unescaped ". // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { // easy cases if len(s) == 0 { err = errSyntax return } switch c := s[0]; { case c == quote && (quote == '\'' || quote == '"'): err = errSyntax return case c >= utf8.RuneSelf: r, size := utf8.DecodeRuneInString(s) return r, true, s[size:], nil case c != '\\': return rune(s[0]), false, s[1:], nil } // hard case: c is backslash if len(s) <= 1 { err = errSyntax return } c := s[1] s = s[2:] switch c { case 'a': value = '\a' case 'b': value = '\b' case 'f': value = '\f' case 'n': value = '\n' case 'r': value = '\r' case 't': value = '\t' case 'v': value = '\v' case 'x', 'u', 'U': n := 0 switch c { case 'x': n = 2 case 'u': n = 4 case 'U': n = 8 } var v rune if len(s) < n { err = errSyntax return } for j := 0; j < n; j++ { x, ok := unhex(s[j]) if !ok { err = errSyntax return } v = v<<4 | x } s = s[n:] if c == 'x' { // single-byte string, possibly not UTF-8 value = v break } if v > utf8.MaxRune { err = errSyntax return } value = v multibyte = true case '0', '1', '2', '3', '4', '5', '6', '7': v := rune(c) - '0' if len(s) < 2 { err = errSyntax return } for j := 0; j < 2; j++ { // one digit already; two more x := rune(s[j]) - '0' if x < 0 || x > 7 { err = errSyntax return } v = (v << 3) | x } s = s[2:] if v > 255 { err = errSyntax return } value = v case '\\': value = '\\' case '\'', '"': if c != quote { err = errSyntax return } value = rune(c) default: err = errSyntax return } tail = s return } // unquote interprets s as a single-quoted, double-quoted, // or backquoted Go string literal, returning the string value // that s quotes. (If s is single-quoted, it would be a Go // character literal; unquote returns the corresponding // one-character string.) func unquote(s string) (string, error) { n := len(s) if n < 2 { return "", errSyntax } quote := s[0] if quote != s[n-1] { return "", errSyntax } s = s[1 : n-1] if quote == '`' { if contains(s, '`') { return "", errSyntax } if contains(s, '\r') { // -1 because we know there is at least one \r to remove. buf := make([]byte, 0, len(s)-1) for i := 0; i < len(s); i++ { if s[i] != '\r' { buf = append(buf, s[i]) } } return string(buf), nil } return s, nil } if quote != '"' && quote != '\'' { return "", errSyntax } if contains(s, '\n') { return "", errSyntax } // Is it trivial? Avoid allocation. if !contains(s, '\\') && !contains(s, quote) { switch quote { case '"': if utf8.ValidString(s) { return s, nil } case '\'': r, size := utf8.DecodeRuneInString(s) if size == len(s) && (r != utf8.RuneError || size != 1) { return s, nil } } } var runeTmp [utf8.UTFMax]byte buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. for len(s) > 0 { c, multibyte, ss, err := unquoteChar(s, quote) if err != nil { return "", err } s = ss if c < utf8.RuneSelf || !multibyte { buf = append(buf, byte(c)) } else { n := utf8.EncodeRune(runeTmp[:], c) buf = append(buf, runeTmp[:n]...) } if quote == '\'' && len(s) != 0 { // single-quoted must be single character return "", errSyntax } } return string(buf), nil } func quote(s string) string { buf := make([]byte, 0, 3*len(s)/2) const quote = '"' buf = append(buf, quote) for width := 0; len(s) > 0; s = s[width:] { r := rune(s[0]) width = 1 if r >= utf8.RuneSelf { r, width = utf8.DecodeRuneInString(s) } if width == 1 && r == utf8.RuneError { buf = append(buf, `\x`...) buf = append(buf, lowerhex[s[0]>>4]) buf = append(buf, lowerhex[s[0]&0xF]) continue } buf = appendEscapedRune(buf, r) } buf = append(buf, quote) return string(buf) } func appendEscapedRune(buf []byte, r rune) []byte { const quote = '"' var runeTmp [utf8.UTFMax]byte if r == rune(quote) || r == '\\' { // always backslashed buf = append(buf, '\\') buf = append(buf, byte(r)) return buf } if isPrint(r) { n := utf8.EncodeRune(runeTmp[:], r) buf = append(buf, runeTmp[:n]...) return buf } switch r { case '\a': buf = append(buf, `\a`...) case '\b': buf = append(buf, `\b`...) case '\f': buf = append(buf, `\f`...) case '\n': buf = append(buf, `\n`...) case '\r': buf = append(buf, `\r`...) case '\t': buf = append(buf, `\t`...) case '\v': buf = append(buf, `\v`...) default: switch { case r < ' ' || r == 0x7f: buf = append(buf, `\x`...) buf = append(buf, lowerhex[byte(r)>>4]) buf = append(buf, lowerhex[byte(r)&0xF]) case !utf8.ValidRune(r): r = 0xFFFD fallthrough case r < 0x10000: buf = append(buf, `\u`...) for s := 12; s >= 0; s -= 4 { buf = append(buf, lowerhex[r>>uint(s)&0xF]) } default: buf = append(buf, `\U`...) for s := 28; s >= 0; s -= 4 { buf = append(buf, lowerhex[r>>uint(s)&0xF]) } } } return buf } // This is only used for struct tags. Assume func isPrint(r rune) bool { if r <= 0xFF { if 0x20 <= r && r <= 0x7E { // All the ASCII is printable from space through DEL-1. return true } if 0xA1 <= r && r <= 0xFF { // Similarly for ¡ through ÿ... return r != 0xAD // ...except for the bizarre soft hyphen. } return false } // TinyGo: Skip all other unicode processing return false } // contains reports whether the string contains the byte c. func contains(s string, c byte) bool { return indexByteString(s, c) != -1 } // Index finds the index of the first instance of the specified byte in the string. // If the byte is not found, this returns -1. func indexByteString(s string, c byte) int { for i := 0; i < len(s); i++ { if s[i] == c { return i } } return -1 }