diff options
author | Mattias Wadman <[email protected]> | 2016-04-13 00:14:00 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2016-04-14 10:46:27 +0200 |
commit | 5d50c46482d231efa26c95e4705e720fb9bf753c (patch) | |
tree | 413d6bfc1f632d6e0f398471baf0f3a56b392522 /parser | |
parent | d48b986c45f8d5a5fef7c98da17af8ca82867b25 (diff) | |
download | hugo-5d50c46482d231efa26c95e4705e720fb9bf753c.tar.gz hugo-5d50c46482d231efa26c95e4705e720fb9bf753c.zip |
Chomp Unicode BOM if present
Useful if using or sharing files with users that use editors that
append a unicode byte order marker header (like Windows notepad).
This will still assume files are UTF-8 encoded.
Closes #2075
Diffstat (limited to 'parser')
-rw-r--r-- | parser/page.go | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/parser/page.go b/parser/page.go index 949bfdac6..5092caddc 100644 --- a/parser/page.go +++ b/parser/page.go @@ -50,6 +50,8 @@ const ( HTMLCommentStart = "<!--" // HTMLCommentEnd identifies the end of HTML comment. HTMLCommentEnd = "-->" + // BOM Unicode byte order marker + BOM = '\ufeff' ) var ( @@ -101,6 +103,10 @@ func (p *page) Metadata() (meta interface{}, err error) { func ReadFrom(r io.Reader) (p Page, err error) { reader := bufio.NewReader(r) + // chomp BOM and assume UTF-8 + if err = chompBOM(reader); err != nil && err != io.EOF { + return + } if err = chompWhitespace(reader); err != nil && err != io.EOF { return } @@ -135,6 +141,19 @@ func ReadFrom(r io.Reader) (p Page, err error) { return newp, nil } +func chompBOM(r io.RuneScanner) (err error) { + for { + c, _, err := r.ReadRune() + if err != nil { + return err + } + if c != BOM { + r.UnreadRune() + return nil + } + } +} + func chompWhitespace(r io.RuneScanner) (err error) { for { c, _, err := r.ReadRune() |