Mapper: An analyzer mapper allowing to control the index analyzer of …

…a document based on a document field, closes elastic#485.
mibesr · Nov 7, 2010 · 171fa4a · 171fa4a
1 parent b45ade4
commit 171fa4a
Show file tree

Hide file tree

Showing 16 changed files with 332 additions and 63 deletions.
diff --git a/.idea/dictionaries/kimchy.xml b/.idea/dictionaries/kimchy.xml
diff --git a/...k/micro/src/main/java/org/elasticsearch/benchmark/index/engine/SimpleEngineBenchmark.java b/...k/micro/src/main/java/org/elasticsearch/benchmark/index/engine/SimpleEngineBenchmark.java
@@ -164,11 +164,11 @@ public SimpleEngineBenchmark build() {
  String sId = Integer.toString(id);
  Document doc = doc().add(field("_id", sId))
  .add(field("content", contentItem)).build();
- ParsedDocument pDoc = new ParsedDocument(sId, sId, "type", doc, TRANSLOG_PAYLOAD, false);
+ ParsedDocument pDoc = new ParsedDocument(sId, sId, "type", doc, Lucene.STANDARD_ANALYZER, TRANSLOG_PAYLOAD, false);
  if (create) {
- engine.create(new Engine.Create(pDoc, Lucene.STANDARD_ANALYZER));
+ engine.create(new Engine.Create(pDoc));
  } else {
- engine.index(new Engine.Index(new Term("_id", sId), pDoc, Lucene.STANDARD_ANALYZER));
+ engine.index(new Engine.Index(new Term("_id", sId), pDoc));
  }
  }
  engine.refresh(new Engine.Refresh(true));
@@ -278,11 +278,11 @@ private class WriterThread implements Runnable {
  String sId = Integer.toString(id);
  Document doc = doc().add(field("_id", sId))
  .add(field("content", content(id))).build();
- ParsedDocument pDoc = new ParsedDocument(sId, sId, "type", doc, TRANSLOG_PAYLOAD, false);
+ ParsedDocument pDoc = new ParsedDocument(sId, sId, "type", doc, Lucene.STANDARD_ANALYZER, TRANSLOG_PAYLOAD, false);
  if (create) {
- engine.create(new Engine.Create(pDoc, Lucene.STANDARD_ANALYZER));
+ engine.create(new Engine.Create(pDoc));
  } else {
- engine.index(new Engine.Index(new Term("_id", sId), pDoc, Lucene.STANDARD_ANALYZER));
+ engine.index(new Engine.Index(new Term("_id", sId), pDoc));
  }
  }
  } catch (Exception e) {

diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FieldNameAnalyzer.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/FieldNameAnalyzer.java
@@ -42,6 +42,14 @@ public FieldNameAnalyzer(Map<String, Analyzer> analyzers, Analyzer defaultAnalyz
  this.defaultAnalyzer = defaultAnalyzer;
  }
 
+ public ImmutableMap<String, Analyzer> analyzers() {
+ return analyzers;
+ }
+
+ public Analyzer defaultAnalyzer() {
+ return defaultAnalyzer;
+ }
+
  @Override public TokenStream tokenStream(String fieldName, Reader reader) {
  return getAnalyzer(fieldName).tokenStream(fieldName, reader);
  }

diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/engine/Engine.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/engine/Engine.java
@@ -264,12 +264,10 @@ public Operation[] ops() {
 
  static class Create implements Operation {
  private final ParsedDocument doc;
- private final Analyzer analyzer;
  private boolean refresh;
 
- public Create(ParsedDocument doc, Analyzer analyzer) {
+ public Create(ParsedDocument doc) {
  this.doc = doc;
- this.analyzer = analyzer;
  }
 
  @Override public Type opType() {
@@ -293,7 +291,7 @@ public Document doc() {
  }
 
  public Analyzer analyzer() {
- return this.analyzer;
+ return this.doc.analyzer();
  }
 
  public byte[] source() {
@@ -312,13 +310,11 @@ public void refresh(boolean refresh) {
  static class Index implements Operation {
  private final Term uid;
  private final ParsedDocument doc;
- private final Analyzer analyzer;
  private boolean refresh;
 
- public Index(Term uid, ParsedDocument doc, Analyzer analyzer) {
+ public Index(Term uid, ParsedDocument doc) {
  this.uid = uid;
  this.doc = doc;
- this.analyzer = analyzer;
  }
 
  @Override public Type opType() {
@@ -338,7 +334,7 @@ public Document doc() {
  }
 
  public Analyzer analyzer() {
- return this.analyzer;
+ return this.doc.analyzer();
  }
 
  public String id() {

diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/DocumentFieldMappers.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/DocumentFieldMappers.java
@@ -151,6 +151,14 @@ public Analyzer indexAnalyzer() {
  return this.indexAnalyzer;
  }
 
+ /**
+ * A smart analyzer used for indexing that takes into account specific analyzers configured
+ * per {@link FieldMapper} with a custom default analyzer for no explicit field analyzer.
+ */
+ public Analyzer indexAnalyzer(Analyzer defaultAnalyzer) {
+ return new FieldNameAnalyzer(indexAnalyzer.analyzers(), defaultAnalyzer);
+ }
+
  /**
  * A smart analyzer used for searching that takes into account specific analyzers configured
  * per {@link FieldMapper}.

diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/ParsedDocument.java
@@ -19,6 +19,7 @@
 
 package org.elasticsearch.index.mapper;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 
 /**
@@ -34,16 +35,19 @@ public class ParsedDocument {
 
  private final Document document;
 
+ private final Analyzer analyzer;
+
  private final byte[] source;
 
  private boolean mappersAdded;
 
- public ParsedDocument(String uid, String id, String type, Document document, byte[] source, boolean mappersAdded) {
+ public ParsedDocument(String uid, String id, String type, Document document, Analyzer analyzer, byte[] source, boolean mappersAdded) {
  this.uid = uid;
  this.id = id;
  this.type = type;
  this.document = document;
  this.source = source;
+ this.analyzer = analyzer;
  this.mappersAdded = mappersAdded;
  }
 
@@ -63,6 +67,10 @@ public Document doc() {
  return this.document;
  }
 
+ public Analyzer analyzer() {
+ return this.analyzer;
+ }
+
  public byte[] source() {
  return this.source;
  }

diff --git a/...s/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/AllFieldMapper.java b/...s/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/AllFieldMapper.java
@@ -29,7 +29,6 @@
 import org.elasticsearch.common.lucene.all.AllTermQuery;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
-import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.MergeMappingException;
 
 import java.io.IOException;
@@ -118,14 +117,14 @@ public boolean enabled() {
  // reset the entries
  context.allEntries().reset();
 
- Analyzer analyzer = findAnalyzer(context.docMapper());
+ Analyzer analyzer = findAnalyzer(context);
  return new AllField(names.indexName(), store, termVector, context.allEntries(), analyzer);
  }
 
- private Analyzer findAnalyzer(DocumentMapper docMapper) {
+ private Analyzer findAnalyzer(ParseContext context) {
  Analyzer analyzer = indexAnalyzer;
  if (analyzer == null) {
- analyzer = docMapper.indexAnalyzer();
+ analyzer = context.analyzer();
  if (analyzer == null) {
  analyzer = Lucene.STANDARD_ANALYZER;
  }

diff --git a/...s/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/AnalyzerMapper.java b/...s/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/AnalyzerMapper.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http:https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.mapper.xcontent;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.index.mapper.FieldMapperListener;
+import org.elasticsearch.index.mapper.MapperParsingException;
+import org.elasticsearch.index.mapper.MergeMappingException;
+
+import java.io.IOException;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+public class AnalyzerMapper implements XContentMapper {
+
+ public static final String CONTENT_TYPE = "_analyzer";
+
+ public static class Builder extends XContentMapper.Builder<Builder, AnalyzerMapper> {
+
+ private String field = null;
+
+ public Builder() {
+ super(CONTENT_TYPE);
+ this.builder = this;
+ }
+
+ public Builder field(String field) {
+ this.field = field;
+ return this;
+ }
+
+ @Override public AnalyzerMapper build(BuilderContext context) {
+ return new AnalyzerMapper(field);
+ }
+ }
+
+ // for now, it is parsed directly in the document parser, need to move this internal types parsing to be done here as well...
+// public static class TypeParser implements XContentMapper.TypeParser {
+// @Override public XContentMapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
+// AnalyzerMapper.Builder builder = analyzer();
+// for (Map.Entry<String, Object> entry : node.entrySet()) {
+// String fieldName = Strings.toUnderscoreCase(entry.getKey());
+// Object fieldNode = entry.getValue();
+// if ("field".equals(fieldName)) {
+// builder.field(fieldNode.toString());
+// }
+// }
+// return builder;
+// }
+// }
+
+ private final String field;
+
+ public AnalyzerMapper(String field) {
+ this.field = field;
+ }
+
+ @Override public String name() {
+ return CONTENT_TYPE;
+ }
+
+ @Override public void parse(ParseContext context) throws IOException {
+ Analyzer analyzer = context.docMapper().mappers().indexAnalyzer();
+ if (field != null) {
+ String value = context.doc().get(field);
+ if (value != null) {
+ analyzer = context.analysisService().analyzer(value);
+ if (analyzer == null) {
+ throw new MapperParsingException("No analyzer found for [" + value + "] from field [" + field + "]");
+ }
+ analyzer = context.docMapper().mappers().indexAnalyzer(analyzer);
+ }
+ }
+ context.analyzer(analyzer);
+ }
+
+ @Override public void merge(XContentMapper mergeWith, MergeContext mergeContext) throws MergeMappingException {
+ }
+
+ @Override public void traverse(FieldMapperListener fieldMapperListener) {
+ }
+
+ @Override public void toXContent(XContentBuilder builder, Params params) throws IOException {
+ if (field == null) {
+ return;
+ }
+ builder.startObject(CONTENT_TYPE);
+ if (field != null) {
+ builder.field("field", field);
+ }
+ builder.endObject();
+ }
+}
diff --git a/...les/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/ParseContext.java b/...les/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/ParseContext.java
@@ -19,10 +19,12 @@
 
 package org.elasticsearch.index.mapper.xcontent;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.elasticsearch.common.lucene.all.AllEntries;
 import org.elasticsearch.common.util.concurrent.NotThreadSafe;
 import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.analysis.AnalysisService;
 import org.elasticsearch.index.mapper.DocumentMapper;
 
 /**
@@ -41,6 +43,8 @@ public class ParseContext {
 
  private Document document;
 
+ private Analyzer analyzer;
+
  private String index;
 
  private String type;
@@ -75,6 +79,7 @@ public ParseContext(String index, XContentDocumentMapperParser docMapperParser,
  public void reset(XContentParser parser, Document document, String type, byte[] source, DocumentMapper.ParseListener listener) {
  this.parser = parser;
  this.document = document;
+ this.analyzer = null;
  this.type = type;
  this.source = source;
  this.path.reset();
@@ -132,6 +137,10 @@ public XContentDocumentMapper docMapper() {
  return this.docMapper;
  }
 
+ public AnalysisService analysisService() {
+ return docMapperParser.analysisService;
+ }
+
  public String id() {
  return id;
  }
@@ -166,6 +175,14 @@ public AllEntries allEntries() {
  return this.allEntries;
  }
 
+ public Analyzer analyzer() {
+ return this.analyzer;
+ }
+
+ public void analyzer(Analyzer analyzer) {
+ this.analyzer = analyzer;
+ }
+
  public void externalValue(Object externalValue) {
  this.externalValueSet = true;
  this.externalValue = externalValue;