From 87e4abe393d70165e3d7dfaec5be7365ff2ac965 Mon Sep 17 00:00:00 2001 From: sixcooler Date: Mon, 31 Aug 2015 20:24:41 +0200 Subject: [PATCH] =?UTF-8?q?fight=20the=20fieldcache=20by=20usind=20DocValu?= =?UTF-8?q?es:=20in=20Solr-5.x=20the=20fieldcache=20has=20moved=20and=20wa?= =?UTF-8?q?s=20not=20cleared=20anymore.=20This=20results=20in=20an=20huge?= =?UTF-8?q?=20fieldcache.=20(http://lucene.apache.org/#highlights-of-the-l?= =?UTF-8?q?ucene-release-include=20https://issues.apache.org/jira/browse/L?= =?UTF-8?q?UCENE-5666)=20Here=20I=20try=20to=20use=20DovValues=20where=20i?= =?UTF-8?q?t=20is=20possible.=20For=20this=20I=20used=20the=20Api-Scheme?= =?UTF-8?q?=20as=20new=20basis=20f=C3=BCr=20the=20Solr-Schema.=20This=20ne?= =?UTF-8?q?eds=20at=20least=20a=20complete=20optimization=20of=20the=20Sol?= =?UTF-8?q?r-Index=20to=20get=20a=20smaller=20FieldCache.=20Everything=20t?= =?UTF-8?q?hat=20is=20indexed=20with=20these=20setting=20will=20not=20use?= =?UTF-8?q?=20the=20Fieldcache=20at=20all.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- defaults/solr/schema.xml | 751 +++--------------- htroot/api/schema.java | 1 + htroot/api/schema.xml | 30 +- .../cora/federate/solr/SchemaDeclaration.java | 2 + .../solr/connector/AbstractSolrConnector.java | 2 +- source/net/yacy/search/query/QueryParams.java | 4 +- .../yacy/search/schema/CollectionSchema.java | 8 +- .../yacy/search/schema/WebgraphSchema.java | 8 +- 8 files changed, 144 insertions(+), 662 deletions(-) diff --git a/defaults/solr/schema.xml b/defaults/solr/schema.xml index ba80d14ef..a774593ce 100644 --- a/defaults/solr/schema.xml +++ b/defaults/solr/schema.xml @@ -1,655 +1,122 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - id + + - + + - + + - + + - - - - - - + + - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + id + text_t + + + + diff --git a/htroot/api/schema.java b/htroot/api/schema.java index ee86d71b5..06d5b3c3a 100644 --- a/htroot/api/schema.java +++ b/htroot/api/schema.java @@ -106,5 +106,6 @@ public class schema { prop.put("fields_" + c + "_storedChecked", field.isStored() ? 1 : 0); prop.put("fields_" + c + "_multiValuedChecked", field.isMultiValued() ? 1 : 0); prop.put("fields_" + c + "_omitNormsChecked", field.isOmitNorms() ? 1 : 0); + prop.put("fields_" + c + "_docValueChecked", field.isDocValue() ? 1 : 0); } } diff --git a/htroot/api/schema.xml b/htroot/api/schema.xml index 1cac45be8..e0dac0903 100644 --- a/htroot/api/schema.xml +++ b/htroot/api/schema.xml @@ -1,18 +1,18 @@ - + - + - - - - - - - - - + + + + + + + + + @@ -30,12 +30,12 @@ - + - + @@ -55,12 +55,12 @@ - + #{fields}# - + #{/fields}# diff --git a/source/net/yacy/cora/federate/solr/SchemaDeclaration.java b/source/net/yacy/cora/federate/solr/SchemaDeclaration.java index 7bcc53e53..94f7f9da2 100644 --- a/source/net/yacy/cora/federate/solr/SchemaDeclaration.java +++ b/source/net/yacy/cora/federate/solr/SchemaDeclaration.java @@ -46,6 +46,8 @@ public interface SchemaDeclaration { public boolean isSearchable(); public boolean isOmitNorms(); + + public boolean isDocValue(); public String getComment(); diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java index 90ac84375..c6f4df49c 100644 --- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java @@ -523,7 +523,7 @@ public abstract class AbstractSolrConnector implements SolrConnector { params.setFacetMinCount(1); // there are many 0-count facets in the uninverted index cache params.setFacetLimit(maxresults); params.setFacetSort(FacetParams.FACET_SORT_COUNT); - params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_fc /*FACET_METHOD_fcs*/); + params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_enum); // fight the fieldcache params.setFields(fields); params.clearSorts(); params.setIncludeScore(false); diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 1adafe904..74b2697ee 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -461,10 +461,10 @@ public final class QueryParams { params.setFacetMinCount(1); params.setFacetLimit(FACETS_STANDARD_MAXCOUNT); params.setFacetSort(FacetParams.FACET_SORT_COUNT); - params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_fcs); + params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_enum); // fight the fieldcache for (String field: this.facetfields) params.addFacetField("{!ex=" + field + "}" + field); // params.addFacetField("{!ex=" + field + "}" + field); if (this.facetfields.contains(CollectionSchema.dates_in_content_dts.name())) { - params.setParam("facet.range", CollectionSchema.dates_in_content_dts.name()); + params.setParam(FacetParams.FACET_RANGE, CollectionSchema.dates_in_content_dts.name()); String start = TrieDateField.formatExternal(new Date(System.currentTimeMillis() - 1000L * 60L * 60L * 24L * 3)); String end = TrieDateField.formatExternal(new Date(System.currentTimeMillis() + 1000L * 60L * 60L * 24L * 3)); params.setParam("f." + CollectionSchema.dates_in_content_dts.getSolrFieldName() + ".facet.range.start", start); diff --git a/source/net/yacy/search/schema/CollectionSchema.java b/source/net/yacy/search/schema/CollectionSchema.java index 85cb1a091..a4c66cbaf 100644 --- a/source/net/yacy/search/schema/CollectionSchema.java +++ b/source/net/yacy/search/schema/CollectionSchema.java @@ -252,7 +252,7 @@ public enum CollectionSchema implements SchemaDeclaration { private String solrFieldName = null; // solr field name in custom solr schema, defaults to solcell schema field name (= same as this.name() ) private final SolrType type; - private final boolean indexed, stored, searchable, multiValued, omitNorms; + private final boolean indexed, stored, searchable, multiValued, omitNorms, docValues; private String comment; private CollectionSchema(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final boolean omitNorms, final boolean searchable, final String comment) { @@ -263,6 +263,7 @@ public enum CollectionSchema implements SchemaDeclaration { this.omitNorms = omitNorms; this.searchable = searchable; this.comment = comment; + this.docValues = (type == SolrType.string || type == SolrType.date); // verify our naming scheme String name = this.name(); int p = name.indexOf('_'); @@ -336,6 +337,11 @@ public enum CollectionSchema implements SchemaDeclaration { public final boolean isSearchable() { return this.searchable; } + + @Override + public boolean isDocValue() { + return this.docValues; + } @Override public final String getComment() { diff --git a/source/net/yacy/search/schema/WebgraphSchema.java b/source/net/yacy/search/schema/WebgraphSchema.java index 18837ad58..ee7ad8eac 100644 --- a/source/net/yacy/search/schema/WebgraphSchema.java +++ b/source/net/yacy/search/schema/WebgraphSchema.java @@ -105,7 +105,7 @@ public enum WebgraphSchema implements SchemaDeclaration { private String solrFieldName = null; // solr field name in custom solr schema private final SolrType type; - private final boolean indexed, stored, multiValued, omitNorms, searchable; + private final boolean indexed, stored, multiValued, omitNorms, searchable, docValues; private String comment; @@ -117,6 +117,7 @@ public enum WebgraphSchema implements SchemaDeclaration { this.omitNorms = omitNorms; this.searchable = searchable; this.comment = comment; + this.docValues = (type == SolrType.string || type == SolrType.date); // verify our naming scheme String name = this.name(); int p = name.indexOf('_'); @@ -190,6 +191,11 @@ public enum WebgraphSchema implements SchemaDeclaration { return this.searchable; } + @Override + public boolean isDocValue() { + return this.docValues; + } + @Override public final String getComment() { return this.comment;