Skip to content

Commit

Permalink
Merge pull request #946 from go-kivik/collaction
Browse files Browse the repository at this point in the history
Add collation support
  • Loading branch information
flimzy committed Apr 26, 2024
2 parents 0f1c059 + 6acd05b commit f6eb95f
Show file tree
Hide file tree
Showing 9 changed files with 460 additions and 21 deletions.
50 changes: 50 additions & 0 deletions x/sqlite/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
[![Go Reference](https://pkg.go.dev/badge/github.com/go-kivik/kivik/x/sqlite/v4.svg)](https://pkg.go.dev/github.com/go-kivik/kivik/x/sqlite/v4)

# Kivik SQLite backend

SQLite-backed driver for [Kivik](https://github.com/go-kivik/kivik).

## Usage

This package provides a (partial, experimental) implementation of the
[`github.com/go-kivik/kivik/v4/driver`](http:https://pkg.go.dev/github.com/go-kivik/kivik/v4/driver)
interface. You must import the driver and can then use the
[`Kivik`](http:https://pkg.go.dev/github.com/go-kivik/kivik/v4) API.

```go
package main

import (
"context"

kivik "github.com/go-kivik/kivik/v4"
_ "github.com/go-kivik/kivik/x/sqlite/v4" // The SQLite driver
)

func main() {
client, err := kivik.New(context.TODO(), "sqlite", "")
// ...
}
```

## Why?

The primary intended purpose of this driver is for testing. The goal is to allow
you to test your CouchDB apps without relying on a full-fledged CouchDB server.

## Status

This driver is incomplete, and not yet usable. But watch this space.

## Incompatibilities

The SQLite implementation of CouchDB is incompatible with the CouchDB specification in a few subtle ways, which are outlined here:

- The Collation order supported by Go is slightly different than that described by the [CouchDB documentation](https://docs.couchdb.org/en/stable/ddocs/views/collation.html#collation-specification). In particular:
- The Unicode UCI algorithm supported natively by Go sorts <code>`</code> and <code>^</code> after other symbols, not before.
- Becuase Go's maps are unordered, this implementation does not honor the order of object key members when collating. That is to say, the object `{b:2,a:1}` is treated as `{a:1,b:2}` for collation purposes. This is tracked in [issue #952](https://github.com/go-kivik/kivik/issues/952). Please leave a comment there if this is affecting you.

## License

This software is released under the terms of the Apache 2.0 license. See
LICENCE.md, or read the [full license](http:https://www.apache.org/licenses/LICENSE-2.0).
189 changes: 189 additions & 0 deletions x/sqlite/collation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// http:https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

package sqlite

import (
"bytes"
"encoding/json"
"slices"
"sort"
"strconv"

"golang.org/x/text/collate"
"golang.org/x/text/language"
)

var collator = collate.New(language.Und)

func couchdbCmpString(a, b string) int {
return couchdbCmpJSON(json.RawMessage(a), json.RawMessage(b))
}

// couchdbCmpJSON is a comparison function for CouchDB collation.
// See https://docs.couchdb.org/en/stable/ddocs/views/collation.html
func couchdbCmpJSON(a, b json.RawMessage) int {
if bytes.Equal(a, b) {
return 0
}
at, bt := jsType(a), jsType(b)
switch {

// Null
case at == jsTypeNull:
return -1
case bt == jsTypeNull:
return 1

// Booleans
case at == jsTypeBoolean:
if bt != jsTypeBoolean {
return -1
}
if bytes.Equal(a, []byte("false")) {
return -1
}
return 1
case bt == jsTypeBoolean:
return 1

// Numbers
case at == jsTypeNumber:
if bt != jsTypeNumber {
return -1
}
av, _ := strconv.ParseFloat(string(a), 64)
bv, _ := strconv.ParseFloat(string(b), 64)
switch {
case av < bv:
return -1
case av > bv:
return 1
default:
return 0
}
case bt == jsTypeNumber:
return 1

// Strings
case at == jsTypeString:
if bt != jsTypeString {
return -1
}
return collator.CompareString(string(a), string(b))
case bt == jsTypeString:
return 1

// Arrays
case at == jsTypeArray:
if bt != jsTypeArray {
return -1
}
var av, bv []json.RawMessage
_ = json.Unmarshal(a, &av)
_ = json.Unmarshal(b, &bv)
for i := 0; i < len(av) && i < len(bv); i++ {
if r := couchdbCmpJSON(av[i], bv[i]); r != 0 {
return r
}
}
return len(av) - len(bv)

case bt == jsTypeArray:
return 1

// Objects
case at == jsTypeObject:
if bt != jsTypeObject {
return -1
}

var av, bv rawObject
_ = json.Unmarshal(a, &av)
_ = json.Unmarshal(b, &bv)
for i := 0; i < len(av) && i < len(bv); i++ {
// First compare keys
if r := couchdbCmpJSON(av[i][0], bv[i][0]); r != 0 {
return r
}
// Then values
if r := couchdbCmpJSON(av[i][1], bv[i][1]); r != 0 {
return r
}
}

return len(av) - len(bv)
}

return 1
}

// rawObject represents an ordered JSON object.
type rawObject [][2]json.RawMessage

func (r *rawObject) UnmarshalJSON(b []byte) error {
var o map[string]json.RawMessage
if err := json.Unmarshal(b, &o); err != nil {
return err
}
*r = make([][2]json.RawMessage, 0, len(o))
for k, v := range o {
rawKey, _ := json.Marshal(k)
*r = append(*r, [2]json.RawMessage{rawKey, v})
}
// This sort is a hack, to make sorting stable in light of the limitation
// outlined in #952. Without this, the order is arbitrary, and the collation
// order is unstable. This could be simplified, but I'm leaving it as-is
// for the moment, so that it's easy to revert to CouchDB behavior if #952
// is ever implemented. If it is, deleting this sort call should be the
// only change needed in the [rawObject] type.
slices.SortFunc(*r, func(a, b [2]json.RawMessage) int {
if r := couchdbCmpJSON(a[0], b[0]); r != 0 {
return r
}
return couchdbCmpJSON(a[1], b[1])
})
return nil
}

const (
jsTypeString = iota
jsTypeArray
jsTypeObject
jsTypeNull
jsTypeBoolean
jsTypeNumber
)

func jsType(s json.RawMessage) int {
switch s[0] {
case '"':
return jsTypeString
case '[':
return jsTypeArray
case '{':
return jsTypeObject
case 'n':
return jsTypeNull
case 't', 'f':
return jsTypeBoolean
}
return jsTypeNumber
}

type couchdbKeys []json.RawMessage

var _ sort.Interface = &couchdbKeys{}

func (c couchdbKeys) Len() int { return len(c) }
func (c couchdbKeys) Less(i, j int) bool { return couchdbCmpJSON(c[i], c[j]) < 0 }
func (c couchdbKeys) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
113 changes: 113 additions & 0 deletions x/sqlite/collation_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// http:https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

//go:build !js
// +build !js

package sqlite

import (
"encoding/json"
"math/rand"
"sort"
"testing"

"github.com/google/go-cmp/cmp"
)

func Test_couchdbCmp(t *testing.T) {
tests := []struct {
name string
want []string
}{
{
name: "js types", // See https://docs.couchdb.org/en/stable/ddocs/views/collation.html#collation-specification
want: []string{
`null`,
`false`,
`true`,

// then numbers
`1`,
`2`,
`3.0`,
`4`,

// then text, case sensitive
`"a"`,
`"A"`,
`"aa"`,
`"b"`,
`"B"`,
`"ba"`,
`"bb"`,

// then arrays. compared element by element until different.
// Longer arrays sort after their prefixes
`["a"]`,
`["b"]`,
`["b","c"]`,
`["b","c", "a"]`,
`["b","d"]`,
`["b","d", "e"]`,

// then object, compares each key value in the list until different.
// larger objects sort after their subset objects.
`{"a":1}`,
`{"a":2}`,
`{"b":1}`,
`{"b":2}`,
// TODO: See #952
// `{"b":2, "a":1}`, // Member order does matter for collation. CouchDB preserves member order but doesn't require that clients will. this test might fail if used with a js engine that doesn't preserve order.
`{"b":2, "c":2}`,
},
},
{
name: "7-bit ASCII",
want: []string{
// "\"`\"", `"^"`, // TODO: These don't sort according to CouchDB rules
`"_"`, `"-"`, `","`, `";"`, `":"`, `"!"`, `"?"`,
`"."`, `"'"`, `"""`, `"("`, `")"`, `"["`, `"]"`, `"{"`, `"}"`,
`"@"`, `"*"`, `"/"`, `"\"`, `"&"`, `"#"`, `"%"`, `"+"`, `"<"`,
`"="`, `">"`, `"|"`, `"~"`, `"$"`, `"0"`, `"1"`, `"2"`, `"3"`,
`"4"`, `"5"`, `"6"`, `"7"`, `"8"`, `"9"`,
`"a"`, `"A"`, `"b"`, `"B"`, `"c"`, `"C"`, `"d"`, `"D"`, `"e"`,
`"E"`, `"f"`, `"F"`, `"g"`, `"G"`, `"h"`, `"H"`, `"i"`, `"I"`,
`"j"`, `"J"`, `"k"`, `"K"`, `"l"`, `"L"`, `"m"`, `"M"`, `"n"`,
`"N"`, `"o"`, `"O"`, `"p"`, `"P"`, `"q"`, `"Q"`, `"r"`, `"R"`,
`"s"`, `"S"`, `"t"`, `"T"`, `"u"`, `"U"`, `"v"`, `"V"`, `"w"`,
`"W"`, `"x"`, `"X"`, `"y"`, `"Y"`, `"z"`, `"Z"`,
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Shuffle the input
input := make([]json.RawMessage, len(tt.want))
for i, v := range tt.want {
input[i] = json.RawMessage(v)
}
rand.Shuffle(len(input), func(i, j int) { input[i], input[j] = input[j], input[i] })

sort.Sort(couchdbKeys(input))

got := make([]string, len(input))
for i, v := range input {
got[i] = string(v)
}
if d := cmp.Diff(tt.want, got); d != "" {
t.Errorf("Unexpected result:\n%s", d)
}
})
}
}
2 changes: 1 addition & 1 deletion x/sqlite/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/go-kivik/kivik/v4 v4.2.1
github.com/google/go-cmp v0.6.0
gitlab.com/flimzy/testy v0.14.0
golang.org/x/text v0.13.0
modernc.org/sqlite v1.29.8
)

Expand All @@ -27,7 +28,6 @@ require (
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
golang.org/x/sync v0.4.0 // indirect
golang.org/x/sys v0.19.0 // indirect
golang.org/x/text v0.13.0 // indirect
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
modernc.org/libc v1.49.3 // indirect
modernc.org/mathutil v1.6.0 // indirect
Expand Down
Loading

0 comments on commit f6eb95f

Please sign in to comment.