diff options
author | Bjørn Erik Pedersen <[email protected]> | 2019-08-01 10:19:19 +0200 |
---|---|---|
committer | GitHub <[email protected]> | 2019-08-01 10:19:19 +0200 |
commit | 53077b0da54906feee64a03612e5186043e17341 (patch) | |
tree | c99b1456485cfc2ed41f3a1b732e44c4acbc3061 /compare | |
parent | a4f96a9d8c2d2da40796757f7e9a3023157abd2f (diff) | |
download | hugo-53077b0da54906feee64a03612e5186043e17341.tar.gz hugo-53077b0da54906feee64a03612e5186043e17341.zip |
Merge pull request #6149 from bep/sort-caseinsensitive
Implement lexicographically string sorting
Diffstat (limited to 'compare')
-rw-r--r-- | compare/compare.go | 4 | ||||
-rw-r--r-- | compare/compare_strings.go | 113 | ||||
-rw-r--r-- | compare/compare_strings_test.go | 66 |
3 files changed, 181 insertions, 2 deletions
diff --git a/compare/compare.go b/compare/compare.go index 18c0de777..537a66676 100644 --- a/compare/compare.go +++ b/compare/compare.go @@ -1,4 +1,4 @@ -// Copyright 2017-present The Hugo Authors. All rights reserved. +// Copyright 2019 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ type Eqer interface { Eq(other interface{}) bool } -// ProbablyEq is an equal check that may return false positives, but never +// ProbablyEqer is an equal check that may return false positives, but never // a false negative. type ProbablyEqer interface { ProbablyEq(other interface{}) bool diff --git a/compare/compare_strings.go b/compare/compare_strings.go new file mode 100644 index 000000000..1fd954081 --- /dev/null +++ b/compare/compare_strings.go @@ -0,0 +1,113 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compare + +import ( + "strings" + "unicode" + "unicode/utf8" +) + +// Strings returns an integer comparing two strings lexicographically. +func Strings(s, t string) int { + c := compareFold(s, t) + + if c == 0 { + // "B" and "b" would be the same so we need a tiebreaker. + return strings.Compare(s, t) + } + + return c +} + +// This function is derived from strings.EqualFold in Go's stdlib. +// https://github.com/golang/go/blob/ad4a58e31501bce5de2aad90a620eaecdc1eecb8/src/strings/strings.go#L893 +func compareFold(s, t string) int { + for s != "" && t != "" { + var sr, tr rune + if s[0] < utf8.RuneSelf { + sr, s = rune(s[0]), s[1:] + } else { + r, size := utf8.DecodeRuneInString(s) + sr, s = r, s[size:] + } + if t[0] < utf8.RuneSelf { + tr, t = rune(t[0]), t[1:] + } else { + r, size := utf8.DecodeRuneInString(t) + tr, t = r, t[size:] + } + + if tr == sr { + continue + } + + c := 1 + if tr < sr { + tr, sr = sr, tr + c = -c + } + + // ASCII only. + if tr < utf8.RuneSelf { + if sr >= 'A' && sr <= 'Z' { + if tr <= 'Z' { + // Same case. + return -c + } + + diff := tr - (sr + 'a' - 'A') + + if diff == 0 { + continue + } + + if diff < 0 { + return c + } + + if diff > 0 { + return -c + } + } + } + + // Unicode. + r := unicode.SimpleFold(sr) + for r != sr && r < tr { + r = unicode.SimpleFold(r) + } + + if r == tr { + continue + } + + return -c + } + + if s == "" && t == "" { + return 0 + } + + if s == "" { + return -1 + } + + return 1 +} + +// LessStrings returns whether s is less than t lexicographically. +func LessStrings(s, t string) bool { + return Strings(s, t) < 0 +} diff --git a/compare/compare_strings_test.go b/compare/compare_strings_test.go new file mode 100644 index 000000000..82132cf11 --- /dev/null +++ b/compare/compare_strings_test.go @@ -0,0 +1,66 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compare + +import ( + "fmt" + "sort" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCompare(t *testing.T) { + assert := require.New(t) + for i, test := range []struct { + a string + b string + }{ + {"a", "a"}, + {"A", "a"}, + {"Ab", "Ac"}, + {"az", "Za"}, + {"C", "D"}, + {"B", "a"}, + {"C", ""}, + {"", ""}, + {"αβδC", "ΑΒΔD"}, + {"αβδC", "ΑΒΔ"}, + {"αβδ", "ΑΒΔD"}, + {"αβδ", "ΑΒΔ"}, + {"β", "δ"}, + {"好", strings.ToLower("好")}, + } { + + expect := strings.Compare(strings.ToLower(test.a), strings.ToLower(test.b)) + got := compareFold(test.a, test.b) + + assert.Equal(expect, got, fmt.Sprintf("test %d: %d", i, expect)) + + } +} + +func TestLexicographicSort(t *testing.T) { + assert := require.New(t) + + s := []string{"b", "Bz", "ba", "A", "Ba", "ba"} + + sort.Slice(s, func(i, j int) bool { + return LessStrings(s[i], s[j]) + }) + + assert.Equal([]string{"A", "b", "Ba", "ba", "ba", "Bz"}, s) + +} |