Skip to content

Commit

Permalink
use PointsConfigMap so numeric query and numeric range query work cle…
Browse files Browse the repository at this point in the history
…anly
  • Loading branch information
rnewson committed Apr 10, 2024
1 parent 8bbf776 commit b738ccb
Show file tree
Hide file tree
Showing 13 changed files with 340 additions and 237 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http:https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.apache.couchdb.nouveau.core.ser;

public class NullWrapper extends PrimitiveWrapper<Void> {

public NullWrapper() {
super(null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
@JsonSubTypes.Type(value = IntWrapper.class, name = "int"),
@JsonSubTypes.Type(value = LongWrapper.class, name = "long"),
@JsonSubTypes.Type(value = StringWrapper.class, name = "string"),
@JsonSubTypes.Type(value = NullWrapper.class, name = "null"),
})
public class PrimitiveWrapper<T> {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
Expand All @@ -50,6 +51,7 @@
import org.apache.couchdb.nouveau.core.ser.FloatWrapper;
import org.apache.couchdb.nouveau.core.ser.IntWrapper;
import org.apache.couchdb.nouveau.core.ser.LongWrapper;
import org.apache.couchdb.nouveau.core.ser.NullWrapper;
import org.apache.couchdb.nouveau.core.ser.PrimitiveWrapper;
import org.apache.couchdb.nouveau.core.ser.StringWrapper;
import org.apache.lucene.analysis.Analyzer;
Expand All @@ -67,16 +69,22 @@
import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.MultiCollectorManager;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.Sort;
Expand All @@ -93,11 +101,12 @@ public class Lucene9Index extends Index {

private static final Sort DEFAULT_SORT =
new Sort(SortField.FIELD_SCORE, new SortField("_id", SortField.Type.STRING));
private static final Pattern SORT_FIELD_RE = Pattern.compile("^([-+])?([\\.\\w]+)(?:<(\\w+)>)$");
private static final Pattern SORT_FIELD_RE = Pattern.compile("^([-+])?([\\.\\w]+)$");

private final Analyzer analyzer;
private final IndexWriter writer;
private final SearcherManager searcherManager;
private final Lucene9IndexSchema schema;

public Lucene9Index(
final Analyzer analyzer,
Expand All @@ -109,6 +118,7 @@ public Lucene9Index(
this.analyzer = Objects.requireNonNull(analyzer);
this.writer = Objects.requireNonNull(writer);
this.searcherManager = Objects.requireNonNull(searcherManager);
this.schema = initSchema(writer);
}

@Override
Expand All @@ -134,6 +144,7 @@ public long doDiskSize() throws IOException {
public void doUpdate(final String docId, final DocumentUpdateRequest request) throws IOException {
final Term docIdTerm = docIdTerm(docId);
final Document doc = toDocument(docId, request);
schema.update(request.getFields());
writer.updateDocument(docIdTerm, doc);
}

Expand All @@ -148,7 +159,13 @@ public boolean doCommit(final long updateSeq, final long purgeSeq) throws IOExce
if (!writer.hasUncommittedChanges()) {
return false;
}
writer.setLiveCommitData(Map.of("update_seq", Long.toString(updateSeq), "purge_seq", Long.toString(purgeSeq))
writer.setLiveCommitData(Map.of(
"update_seq",
Long.toString(updateSeq),
"purge_seq",
Long.toString(purgeSeq),
"_schema",
schema.toString())
.entrySet());
writer.commit();
return true;
Expand Down Expand Up @@ -339,11 +356,11 @@ private Sort toSort(final SearchRequest searchRequest) {
final String last = sort.get(sort.size() - 1);
// Append _id field if not already present.
switch (last) {
case "-_id<string>":
case "_id<string>":
case "-_id":
case "_id":
break;
default:
sort.add("_id<string>");
sort.add("_id");
}
return convertSort(sort);
}
Expand All @@ -365,13 +382,18 @@ private SortField convertSortField(final String sortString) {
throw new WebApplicationException(sortString + " is not a valid sort parameter", Status.BAD_REQUEST);
}
final boolean reverse = "-".equals(m.group(1));
switch (m.group(3)) {
case "string":

var type = schema.getType(m.group(2));
if (type == null) {
return new UnknownSortField(m.group(2), reverse);
}
switch (type) {
case STRING:
return new SortedSetSortField(m.group(2), reverse);
case "double":
case DOUBLE:
return new SortedNumericSortField(m.group(2), SortField.Type.DOUBLE, reverse);
default:
throw new WebApplicationException(m.group(3) + " is not a valid sort type", Status.BAD_REQUEST);
throw new WebApplicationException("can't sort on field " + m.group(2), Status.BAD_REQUEST);
}
}

Expand Down Expand Up @@ -469,6 +491,8 @@ private PrimitiveWrapper<?>[] toAfter(final FieldDoc fieldDoc) {
fields[i] = new LongWrapper((long) fieldDoc.fields[i]);
} else if (fieldDoc.fields[i] instanceof Float) {
fields[i] = new FloatWrapper((float) fieldDoc.fields[i]);
} else if (fieldDoc.fields[i] == null) {
fields[i] = new NullWrapper();
} else {
throw new WebApplicationException(fieldDoc.fields[i].getClass() + " is not valid", Status.BAD_REQUEST);
}
Expand All @@ -489,7 +513,10 @@ private static Term docIdTerm(final String docId) {
}

private Query parse(final SearchRequest request) {
var queryParser = new NouveauQueryParser(analyzer, request.getLocale());
var locale = request.getLocale() != null ? request.getLocale() : Locale.getDefault();
var pointsConfigMap = schema.toPointsConfigMap(locale);
var queryParser = new NouveauQueryParser(analyzer, pointsConfigMap);

Query result;
try {
result = queryParser.parse(request.getQuery(), "default");
Expand All @@ -505,9 +532,88 @@ private Query parse(final SearchRequest request) {
return result;
}

private Lucene9IndexSchema initSchema(IndexWriter writer) {
var commitData = writer.getLiveCommitData();
if (commitData == null) {
return Lucene9IndexSchema.emptySchema();
}
for (var entry : commitData) {
if (entry.getKey().equals("_schema")) {
return Lucene9IndexSchema.fromString(entry.getValue());
}
}
return Lucene9IndexSchema.emptySchema();
}

@Override
public String toString() {
return "Lucene9Index [analyzer=" + analyzer + ", writer=" + writer + ", searcherManager=" + searcherManager
+ "]";
}

/**
* This shard is unaware of the type of a sort field as no document within it currently has that field. This
* custom sort field therefore assumes all documents have null for this field and therefore all hits are equal.
*/
private static final class UnknownSortField extends SortField {

private static final FieldComparatorSource COMPARATOR = new FieldComparatorSource() {

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new FieldComparator<Void>() {

@Override
public int compare(int slot1, int slot2) {
return 0;
}

@Override
public void setTopValue(Void value) {
// empty
}

@Override
public Void value(int slot) {
return null;
}

@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new LeafFieldComparator() {

@Override
public void setBottom(int slot) throws IOException {
// empty
}

@Override
public int compareBottom(int doc) throws IOException {
return 0;
}

@Override
public int compareTop(int doc) throws IOException {
return 0;
}

@Override
public void copy(int slot, int doc) throws IOException {
// empty
}

@Override
public void setScorer(Scorable scorer) throws IOException {
// empty
}
};
}
};
}
};

private UnknownSortField(final String fieldName, final boolean reverse) {
super(fieldName, COMPARATOR, reverse);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http:https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.apache.couchdb.nouveau.lucene9;

import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.Response.Status;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Collectors;
import org.apache.couchdb.nouveau.api.DoubleField;
import org.apache.couchdb.nouveau.api.Field;
import org.apache.couchdb.nouveau.api.StringField;
import org.apache.couchdb.nouveau.api.TextField;
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;

final class Lucene9IndexSchema {

public enum Type {
STRING,
TEXT,
DOUBLE;

private static Type fromField(final Field field) {
if (field instanceof StringField) {
return STRING;
} else if (field instanceof TextField) {
return TEXT;
} else if (field instanceof DoubleField) {
return DOUBLE;
}
throw new IllegalArgumentException(field + " not supported");
}
}

private final ConcurrentMap<String, Type> map;

private Lucene9IndexSchema(Map<String, Type> map) {
this.map = new ConcurrentHashMap<>(map);
this.map.put("_id", Type.STRING);
}

public static Lucene9IndexSchema emptySchema() {
return new Lucene9IndexSchema(new HashMap<String, Type>());
}

public static Lucene9IndexSchema fromString(final String schemaStr) {
Objects.requireNonNull(schemaStr);
if (schemaStr.isEmpty()) {
return emptySchema();
}
var map = Arrays.stream(schemaStr.split(","))
.collect(Collectors.toMap(i -> i.split(":")[0], i -> Type.valueOf(i.split(":")[1])));
return new Lucene9IndexSchema(map);
}

public void update(final Collection<Field> fields) {
Objects.requireNonNull(fields);
for (var field : fields) {
map.putIfAbsent(field.getName(), Type.fromField(field));
assertType(field);
}
}

public Type getType(final String fieldName) {
return map.get(fieldName);
}

public void assertType(final Field field) {
Objects.requireNonNull(field);
var expectedType = Type.fromField(field);
var actualType = map.get(field.getName());
if (actualType == null) {
throw new WebApplicationException("Unknown field " + field.getName(), Status.BAD_REQUEST);
}
if (expectedType != actualType) {
throw new WebApplicationException(
String.format("field %s is of type %s not %s", field.getName(), expectedType, actualType),
Status.BAD_REQUEST);
}
}

public Map<String, PointsConfig> toPointsConfigMap(final Locale locale) {
Objects.requireNonNull(locale);
var numberFormat = NumberFormat.getInstance(locale);
var doublePointsConfig = new PointsConfig(numberFormat, Double.class);
return map.entrySet().stream()
.filter(e -> e.getValue() == Type.DOUBLE)
.collect(Collectors.toMap(e -> e.getKey(), e -> doublePointsConfig));
}

@Override
public String toString() {
return map.entrySet().stream()
.map(e -> String.format("%s:%s", e.getKey(), e.getValue()))
.collect(Collectors.joining(","));
}
}
Loading

0 comments on commit b738ccb

Please sign in to comment.