-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #946 from go-kivik/collaction
Add collation support
- Loading branch information
Showing
9 changed files
with
460 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
[![Go Reference](https://pkg.go.dev/badge/github.com/go-kivik/kivik/x/sqlite/v4.svg)](https://pkg.go.dev/github.com/go-kivik/kivik/x/sqlite/v4) | ||
|
||
# Kivik SQLite backend | ||
|
||
SQLite-backed driver for [Kivik](https://github.com/go-kivik/kivik). | ||
|
||
## Usage | ||
|
||
This package provides a (partial, experimental) implementation of the | ||
[`github.com/go-kivik/kivik/v4/driver`](http:https://pkg.go.dev/github.com/go-kivik/kivik/v4/driver) | ||
interface. You must import the driver and can then use the | ||
[`Kivik`](http:https://pkg.go.dev/github.com/go-kivik/kivik/v4) API. | ||
|
||
```go | ||
package main | ||
|
||
import ( | ||
"context" | ||
|
||
kivik "github.com/go-kivik/kivik/v4" | ||
_ "github.com/go-kivik/kivik/x/sqlite/v4" // The SQLite driver | ||
) | ||
|
||
func main() { | ||
client, err := kivik.New(context.TODO(), "sqlite", "") | ||
// ... | ||
} | ||
``` | ||
|
||
## Why? | ||
|
||
The primary intended purpose of this driver is for testing. The goal is to allow | ||
you to test your CouchDB apps without relying on a full-fledged CouchDB server. | ||
|
||
## Status | ||
|
||
This driver is incomplete, and not yet usable. But watch this space. | ||
|
||
## Incompatibilities | ||
|
||
The SQLite implementation of CouchDB is incompatible with the CouchDB specification in a few subtle ways, which are outlined here: | ||
|
||
- The Collation order supported by Go is slightly different than that described by the [CouchDB documentation](https://docs.couchdb.org/en/stable/ddocs/views/collation.html#collation-specification). In particular: | ||
- The Unicode UCI algorithm supported natively by Go sorts <code>`</code> and <code>^</code> after other symbols, not before. | ||
- Becuase Go's maps are unordered, this implementation does not honor the order of object key members when collating. That is to say, the object `{b:2,a:1}` is treated as `{a:1,b:2}` for collation purposes. This is tracked in [issue #952](https://github.com/go-kivik/kivik/issues/952). Please leave a comment there if this is affecting you. | ||
|
||
## License | ||
|
||
This software is released under the terms of the Apache 2.0 license. See | ||
LICENCE.md, or read the [full license](http:https://www.apache.org/licenses/LICENSE-2.0). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||
// use this file except in compliance with the License. You may obtain a copy of | ||
// the License at | ||
// | ||
// http:https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
// License for the specific language governing permissions and limitations under | ||
// the License. | ||
|
||
package sqlite | ||
|
||
import ( | ||
"bytes" | ||
"encoding/json" | ||
"slices" | ||
"sort" | ||
"strconv" | ||
|
||
"golang.org/x/text/collate" | ||
"golang.org/x/text/language" | ||
) | ||
|
||
var collator = collate.New(language.Und) | ||
|
||
func couchdbCmpString(a, b string) int { | ||
return couchdbCmpJSON(json.RawMessage(a), json.RawMessage(b)) | ||
} | ||
|
||
// couchdbCmpJSON is a comparison function for CouchDB collation. | ||
// See https://docs.couchdb.org/en/stable/ddocs/views/collation.html | ||
func couchdbCmpJSON(a, b json.RawMessage) int { | ||
if bytes.Equal(a, b) { | ||
return 0 | ||
} | ||
at, bt := jsType(a), jsType(b) | ||
switch { | ||
|
||
// Null | ||
case at == jsTypeNull: | ||
return -1 | ||
case bt == jsTypeNull: | ||
return 1 | ||
|
||
// Booleans | ||
case at == jsTypeBoolean: | ||
if bt != jsTypeBoolean { | ||
return -1 | ||
} | ||
if bytes.Equal(a, []byte("false")) { | ||
return -1 | ||
} | ||
return 1 | ||
case bt == jsTypeBoolean: | ||
return 1 | ||
|
||
// Numbers | ||
case at == jsTypeNumber: | ||
if bt != jsTypeNumber { | ||
return -1 | ||
} | ||
av, _ := strconv.ParseFloat(string(a), 64) | ||
bv, _ := strconv.ParseFloat(string(b), 64) | ||
switch { | ||
case av < bv: | ||
return -1 | ||
case av > bv: | ||
return 1 | ||
default: | ||
return 0 | ||
} | ||
case bt == jsTypeNumber: | ||
return 1 | ||
|
||
// Strings | ||
case at == jsTypeString: | ||
if bt != jsTypeString { | ||
return -1 | ||
} | ||
return collator.CompareString(string(a), string(b)) | ||
case bt == jsTypeString: | ||
return 1 | ||
|
||
// Arrays | ||
case at == jsTypeArray: | ||
if bt != jsTypeArray { | ||
return -1 | ||
} | ||
var av, bv []json.RawMessage | ||
_ = json.Unmarshal(a, &av) | ||
_ = json.Unmarshal(b, &bv) | ||
for i := 0; i < len(av) && i < len(bv); i++ { | ||
if r := couchdbCmpJSON(av[i], bv[i]); r != 0 { | ||
return r | ||
} | ||
} | ||
return len(av) - len(bv) | ||
|
||
case bt == jsTypeArray: | ||
return 1 | ||
|
||
// Objects | ||
case at == jsTypeObject: | ||
if bt != jsTypeObject { | ||
return -1 | ||
} | ||
|
||
var av, bv rawObject | ||
_ = json.Unmarshal(a, &av) | ||
_ = json.Unmarshal(b, &bv) | ||
for i := 0; i < len(av) && i < len(bv); i++ { | ||
// First compare keys | ||
if r := couchdbCmpJSON(av[i][0], bv[i][0]); r != 0 { | ||
return r | ||
} | ||
// Then values | ||
if r := couchdbCmpJSON(av[i][1], bv[i][1]); r != 0 { | ||
return r | ||
} | ||
} | ||
|
||
return len(av) - len(bv) | ||
} | ||
|
||
return 1 | ||
} | ||
|
||
// rawObject represents an ordered JSON object. | ||
type rawObject [][2]json.RawMessage | ||
|
||
func (r *rawObject) UnmarshalJSON(b []byte) error { | ||
var o map[string]json.RawMessage | ||
if err := json.Unmarshal(b, &o); err != nil { | ||
return err | ||
} | ||
*r = make([][2]json.RawMessage, 0, len(o)) | ||
for k, v := range o { | ||
rawKey, _ := json.Marshal(k) | ||
*r = append(*r, [2]json.RawMessage{rawKey, v}) | ||
} | ||
// This sort is a hack, to make sorting stable in light of the limitation | ||
// outlined in #952. Without this, the order is arbitrary, and the collation | ||
// order is unstable. This could be simplified, but I'm leaving it as-is | ||
// for the moment, so that it's easy to revert to CouchDB behavior if #952 | ||
// is ever implemented. If it is, deleting this sort call should be the | ||
// only change needed in the [rawObject] type. | ||
slices.SortFunc(*r, func(a, b [2]json.RawMessage) int { | ||
if r := couchdbCmpJSON(a[0], b[0]); r != 0 { | ||
return r | ||
} | ||
return couchdbCmpJSON(a[1], b[1]) | ||
}) | ||
return nil | ||
} | ||
|
||
const ( | ||
jsTypeString = iota | ||
jsTypeArray | ||
jsTypeObject | ||
jsTypeNull | ||
jsTypeBoolean | ||
jsTypeNumber | ||
) | ||
|
||
func jsType(s json.RawMessage) int { | ||
switch s[0] { | ||
case '"': | ||
return jsTypeString | ||
case '[': | ||
return jsTypeArray | ||
case '{': | ||
return jsTypeObject | ||
case 'n': | ||
return jsTypeNull | ||
case 't', 'f': | ||
return jsTypeBoolean | ||
} | ||
return jsTypeNumber | ||
} | ||
|
||
type couchdbKeys []json.RawMessage | ||
|
||
var _ sort.Interface = &couchdbKeys{} | ||
|
||
func (c couchdbKeys) Len() int { return len(c) } | ||
func (c couchdbKeys) Less(i, j int) bool { return couchdbCmpJSON(c[i], c[j]) < 0 } | ||
func (c couchdbKeys) Swap(i, j int) { c[i], c[j] = c[j], c[i] } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||
// use this file except in compliance with the License. You may obtain a copy of | ||
// the License at | ||
// | ||
// http:https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
// License for the specific language governing permissions and limitations under | ||
// the License. | ||
|
||
//go:build !js | ||
// +build !js | ||
|
||
package sqlite | ||
|
||
import ( | ||
"encoding/json" | ||
"math/rand" | ||
"sort" | ||
"testing" | ||
|
||
"github.com/google/go-cmp/cmp" | ||
) | ||
|
||
func Test_couchdbCmp(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
want []string | ||
}{ | ||
{ | ||
name: "js types", // See https://docs.couchdb.org/en/stable/ddocs/views/collation.html#collation-specification | ||
want: []string{ | ||
`null`, | ||
`false`, | ||
`true`, | ||
|
||
// then numbers | ||
`1`, | ||
`2`, | ||
`3.0`, | ||
`4`, | ||
|
||
// then text, case sensitive | ||
`"a"`, | ||
`"A"`, | ||
`"aa"`, | ||
`"b"`, | ||
`"B"`, | ||
`"ba"`, | ||
`"bb"`, | ||
|
||
// then arrays. compared element by element until different. | ||
// Longer arrays sort after their prefixes | ||
`["a"]`, | ||
`["b"]`, | ||
`["b","c"]`, | ||
`["b","c", "a"]`, | ||
`["b","d"]`, | ||
`["b","d", "e"]`, | ||
|
||
// then object, compares each key value in the list until different. | ||
// larger objects sort after their subset objects. | ||
`{"a":1}`, | ||
`{"a":2}`, | ||
`{"b":1}`, | ||
`{"b":2}`, | ||
// TODO: See #952 | ||
// `{"b":2, "a":1}`, // Member order does matter for collation. CouchDB preserves member order but doesn't require that clients will. this test might fail if used with a js engine that doesn't preserve order. | ||
`{"b":2, "c":2}`, | ||
}, | ||
}, | ||
{ | ||
name: "7-bit ASCII", | ||
want: []string{ | ||
// "\"`\"", `"^"`, // TODO: These don't sort according to CouchDB rules | ||
`"_"`, `"-"`, `","`, `";"`, `":"`, `"!"`, `"?"`, | ||
`"."`, `"'"`, `"""`, `"("`, `")"`, `"["`, `"]"`, `"{"`, `"}"`, | ||
`"@"`, `"*"`, `"/"`, `"\"`, `"&"`, `"#"`, `"%"`, `"+"`, `"<"`, | ||
`"="`, `">"`, `"|"`, `"~"`, `"$"`, `"0"`, `"1"`, `"2"`, `"3"`, | ||
`"4"`, `"5"`, `"6"`, `"7"`, `"8"`, `"9"`, | ||
`"a"`, `"A"`, `"b"`, `"B"`, `"c"`, `"C"`, `"d"`, `"D"`, `"e"`, | ||
`"E"`, `"f"`, `"F"`, `"g"`, `"G"`, `"h"`, `"H"`, `"i"`, `"I"`, | ||
`"j"`, `"J"`, `"k"`, `"K"`, `"l"`, `"L"`, `"m"`, `"M"`, `"n"`, | ||
`"N"`, `"o"`, `"O"`, `"p"`, `"P"`, `"q"`, `"Q"`, `"r"`, `"R"`, | ||
`"s"`, `"S"`, `"t"`, `"T"`, `"u"`, `"U"`, `"v"`, `"V"`, `"w"`, | ||
`"W"`, `"x"`, `"X"`, `"y"`, `"Y"`, `"z"`, `"Z"`, | ||
}, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
// Shuffle the input | ||
input := make([]json.RawMessage, len(tt.want)) | ||
for i, v := range tt.want { | ||
input[i] = json.RawMessage(v) | ||
} | ||
rand.Shuffle(len(input), func(i, j int) { input[i], input[j] = input[j], input[i] }) | ||
|
||
sort.Sort(couchdbKeys(input)) | ||
|
||
got := make([]string, len(input)) | ||
for i, v := range input { | ||
got[i] = string(v) | ||
} | ||
if d := cmp.Diff(tt.want, got); d != "" { | ||
t.Errorf("Unexpected result:\n%s", d) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.