Skip to content

Commit

Permalink
Mapper: An analyzer mapper allowing to control the index analyzer of …
Browse files Browse the repository at this point in the history
…a document based on a document field, closes elastic#485.
  • Loading branch information
kimchy committed Nov 7, 2010
1 parent b45ade4 commit 171fa4a
Show file tree
Hide file tree
Showing 16 changed files with 332 additions and 63 deletions.
1 change: 1 addition & 0 deletions .idea/dictionaries/kimchy.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,11 @@ public SimpleEngineBenchmark build() {
String sId = Integer.toString(id);
Document doc = doc().add(field("_id", sId))
.add(field("content", contentItem)).build();
ParsedDocument pDoc = new ParsedDocument(sId, sId, "type", doc, TRANSLOG_PAYLOAD, false);
ParsedDocument pDoc = new ParsedDocument(sId, sId, "type", doc, Lucene.STANDARD_ANALYZER, TRANSLOG_PAYLOAD, false);
if (create) {
engine.create(new Engine.Create(pDoc, Lucene.STANDARD_ANALYZER));
engine.create(new Engine.Create(pDoc));
} else {
engine.index(new Engine.Index(new Term("_id", sId), pDoc, Lucene.STANDARD_ANALYZER));
engine.index(new Engine.Index(new Term("_id", sId), pDoc));
}
}
engine.refresh(new Engine.Refresh(true));
Expand Down Expand Up @@ -278,11 +278,11 @@ private class WriterThread implements Runnable {
String sId = Integer.toString(id);
Document doc = doc().add(field("_id", sId))
.add(field("content", content(id))).build();
ParsedDocument pDoc = new ParsedDocument(sId, sId, "type", doc, TRANSLOG_PAYLOAD, false);
ParsedDocument pDoc = new ParsedDocument(sId, sId, "type", doc, Lucene.STANDARD_ANALYZER, TRANSLOG_PAYLOAD, false);
if (create) {
engine.create(new Engine.Create(pDoc, Lucene.STANDARD_ANALYZER));
engine.create(new Engine.Create(pDoc));
} else {
engine.index(new Engine.Index(new Term("_id", sId), pDoc, Lucene.STANDARD_ANALYZER));
engine.index(new Engine.Index(new Term("_id", sId), pDoc));
}
}
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ public FieldNameAnalyzer(Map<String, Analyzer> analyzers, Analyzer defaultAnalyz
this.defaultAnalyzer = defaultAnalyzer;
}

public ImmutableMap<String, Analyzer> analyzers() {
return analyzers;
}

public Analyzer defaultAnalyzer() {
return defaultAnalyzer;
}

@Override public TokenStream tokenStream(String fieldName, Reader reader) {
return getAnalyzer(fieldName).tokenStream(fieldName, reader);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,10 @@ public Operation[] ops() {

static class Create implements Operation {
private final ParsedDocument doc;
private final Analyzer analyzer;
private boolean refresh;

public Create(ParsedDocument doc, Analyzer analyzer) {
public Create(ParsedDocument doc) {
this.doc = doc;
this.analyzer = analyzer;
}

@Override public Type opType() {
Expand All @@ -293,7 +291,7 @@ public Document doc() {
}

public Analyzer analyzer() {
return this.analyzer;
return this.doc.analyzer();
}

public byte[] source() {
Expand All @@ -312,13 +310,11 @@ public void refresh(boolean refresh) {
static class Index implements Operation {
private final Term uid;
private final ParsedDocument doc;
private final Analyzer analyzer;
private boolean refresh;

public Index(Term uid, ParsedDocument doc, Analyzer analyzer) {
public Index(Term uid, ParsedDocument doc) {
this.uid = uid;
this.doc = doc;
this.analyzer = analyzer;
}

@Override public Type opType() {
Expand All @@ -338,7 +334,7 @@ public Document doc() {
}

public Analyzer analyzer() {
return this.analyzer;
return this.doc.analyzer();
}

public String id() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ public Analyzer indexAnalyzer() {
return this.indexAnalyzer;
}

/**
* A smart analyzer used for indexing that takes into account specific analyzers configured
* per {@link FieldMapper} with a custom default analyzer for no explicit field analyzer.
*/
public Analyzer indexAnalyzer(Analyzer defaultAnalyzer) {
return new FieldNameAnalyzer(indexAnalyzer.analyzers(), defaultAnalyzer);
}

/**
* A smart analyzer used for searching that takes into account specific analyzers configured
* per {@link FieldMapper}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;

/**
Expand All @@ -34,16 +35,19 @@ public class ParsedDocument {

private final Document document;

private final Analyzer analyzer;

private final byte[] source;

private boolean mappersAdded;

public ParsedDocument(String uid, String id, String type, Document document, byte[] source, boolean mappersAdded) {
public ParsedDocument(String uid, String id, String type, Document document, Analyzer analyzer, byte[] source, boolean mappersAdded) {
this.uid = uid;
this.id = id;
this.type = type;
this.document = document;
this.source = source;
this.analyzer = analyzer;
this.mappersAdded = mappersAdded;
}

Expand All @@ -63,6 +67,10 @@ public Document doc() {
return this.document;
}

public Analyzer analyzer() {
return this.analyzer;
}

public byte[] source() {
return this.source;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.elasticsearch.common.lucene.all.AllTermQuery;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MergeMappingException;

import java.io.IOException;
Expand Down Expand Up @@ -118,14 +117,14 @@ public boolean enabled() {
// reset the entries
context.allEntries().reset();

Analyzer analyzer = findAnalyzer(context.docMapper());
Analyzer analyzer = findAnalyzer(context);
return new AllField(names.indexName(), store, termVector, context.allEntries(), analyzer);
}

private Analyzer findAnalyzer(DocumentMapper docMapper) {
private Analyzer findAnalyzer(ParseContext context) {
Analyzer analyzer = indexAnalyzer;
if (analyzer == null) {
analyzer = docMapper.indexAnalyzer();
analyzer = context.analyzer();
if (analyzer == null) {
analyzer = Lucene.STANDARD_ANALYZER;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http:https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.mapper.xcontent;

import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.FieldMapperListener;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MergeMappingException;

import java.io.IOException;

/**
* @author kimchy (shay.banon)
*/
public class AnalyzerMapper implements XContentMapper {

public static final String CONTENT_TYPE = "_analyzer";

public static class Builder extends XContentMapper.Builder<Builder, AnalyzerMapper> {

private String field = null;

public Builder() {
super(CONTENT_TYPE);
this.builder = this;
}

public Builder field(String field) {
this.field = field;
return this;
}

@Override public AnalyzerMapper build(BuilderContext context) {
return new AnalyzerMapper(field);
}
}

// for now, it is parsed directly in the document parser, need to move this internal types parsing to be done here as well...
// public static class TypeParser implements XContentMapper.TypeParser {
// @Override public XContentMapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
// AnalyzerMapper.Builder builder = analyzer();
// for (Map.Entry<String, Object> entry : node.entrySet()) {
// String fieldName = Strings.toUnderscoreCase(entry.getKey());
// Object fieldNode = entry.getValue();
// if ("field".equals(fieldName)) {
// builder.field(fieldNode.toString());
// }
// }
// return builder;
// }
// }

private final String field;

public AnalyzerMapper(String field) {
this.field = field;
}

@Override public String name() {
return CONTENT_TYPE;
}

@Override public void parse(ParseContext context) throws IOException {
Analyzer analyzer = context.docMapper().mappers().indexAnalyzer();
if (field != null) {
String value = context.doc().get(field);
if (value != null) {
analyzer = context.analysisService().analyzer(value);
if (analyzer == null) {
throw new MapperParsingException("No analyzer found for [" + value + "] from field [" + field + "]");
}
analyzer = context.docMapper().mappers().indexAnalyzer(analyzer);
}
}
context.analyzer(analyzer);
}

@Override public void merge(XContentMapper mergeWith, MergeContext mergeContext) throws MergeMappingException {
}

@Override public void traverse(FieldMapperListener fieldMapperListener) {
}

@Override public void toXContent(XContentBuilder builder, Params params) throws IOException {
if (field == null) {
return;
}
builder.startObject(CONTENT_TYPE);
if (field != null) {
builder.field("field", field);
}
builder.endObject();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@

package org.elasticsearch.index.mapper.xcontent;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.elasticsearch.common.lucene.all.AllEntries;
import org.elasticsearch.common.util.concurrent.NotThreadSafe;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.mapper.DocumentMapper;

/**
Expand All @@ -41,6 +43,8 @@ public class ParseContext {

private Document document;

private Analyzer analyzer;

private String index;

private String type;
Expand Down Expand Up @@ -75,6 +79,7 @@ public ParseContext(String index, XContentDocumentMapperParser docMapperParser,
public void reset(XContentParser parser, Document document, String type, byte[] source, DocumentMapper.ParseListener listener) {
this.parser = parser;
this.document = document;
this.analyzer = null;
this.type = type;
this.source = source;
this.path.reset();
Expand Down Expand Up @@ -132,6 +137,10 @@ public XContentDocumentMapper docMapper() {
return this.docMapper;
}

public AnalysisService analysisService() {
return docMapperParser.analysisService;
}

public String id() {
return id;
}
Expand Down Expand Up @@ -166,6 +175,14 @@ public AllEntries allEntries() {
return this.allEntries;
}

public Analyzer analyzer() {
return this.analyzer;
}

public void analyzer(Analyzer analyzer) {
this.analyzer = analyzer;
}

public void externalValue(Object externalValue) {
this.externalValueSet = true;
this.externalValue = externalValue;
Expand Down
Loading

0 comments on commit 171fa4a

Please sign in to comment.