From 0aa6fcf2592eb525b1cf23927b799a56604358df Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 10 Jul 2015 16:47:19 +0200 Subject: [PATCH 1/3] remove old vocabularies and synonyms before adding new --- .classpath | 2 +- .../net/yacy/cora/federate/solr/SchemaConfiguration.java | 8 ++++++++ source/net/yacy/cora/federate/solr/SchemaDeclaration.java | 2 ++ .../net/yacy/search/schema/CollectionConfiguration.java | 6 ++++++ source/net/yacy/search/schema/CollectionSchema.java | 6 +++++- source/net/yacy/search/schema/WebgraphSchema.java | 4 ++++ 6 files changed, 26 insertions(+), 2 deletions(-) diff --git a/.classpath b/.classpath index 9e4aea59b..139752929 100644 --- a/.classpath +++ b/.classpath @@ -68,7 +68,7 @@ - + diff --git a/source/net/yacy/cora/federate/solr/SchemaConfiguration.java b/source/net/yacy/cora/federate/solr/SchemaConfiguration.java index 9c563bf07..f57068dc9 100644 --- a/source/net/yacy/cora/federate/solr/SchemaConfiguration.java +++ b/source/net/yacy/cora/federate/solr/SchemaConfiguration.java @@ -112,6 +112,14 @@ public class SchemaConfiguration extends Configuration implements Serializable { return this.contains(field.getSolrFieldName()); } + public void remove(final SolrInputDocument doc, final SchemaDeclaration key) { + key.remove(doc); + } + + public void remove(final SolrInputDocument doc, final String key) { + doc.removeField(key); + } + public void add(final SolrInputDocument doc, final SchemaDeclaration key, final String value) { assert !key.isMultiValued() : "key = " + key.getSolrFieldName(); if ((isEmpty() || contains(key)) && (!this.lazy || (value != null && !value.isEmpty()))) key.add(doc, value); diff --git a/source/net/yacy/cora/federate/solr/SchemaDeclaration.java b/source/net/yacy/cora/federate/solr/SchemaDeclaration.java index 56d4c9655..7bcc53e53 100644 --- a/source/net/yacy/cora/federate/solr/SchemaDeclaration.java +++ b/source/net/yacy/cora/federate/solr/SchemaDeclaration.java @@ -72,5 +72,7 @@ public interface SchemaDeclaration { public void add(final SolrInputDocument doc, final double value); public void add(final SolrInputDocument doc, final boolean value); + + public void remove(final SolrInputDocument doc); } \ No newline at end of file diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 04b6a7884..7211a363a 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -101,6 +101,7 @@ import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; import org.eclipse.jetty.util.ConcurrentHashSet; @@ -1006,6 +1007,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri } public void enrich(SolrInputDocument doc, List synonyms, Map> genericFacets) { + remove(doc, CollectionSchema.vocabularies_sxt); // delete old values + for (SolrInputField sif: doc) { + if (sif.getName().startsWith(CollectionSchema.VOCABULARY_PREFIX)) remove(doc, sif.getName()); + } if (this.isEmpty() || contains(CollectionSchema.vocabularies_sxt)) { // write generic navigation // there are no pre-defined solr fields for navigation because the vocabulary is generic @@ -1027,6 +1032,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri } if (vocabularies.size() > 0) add(doc, CollectionSchema.vocabularies_sxt, vocabularies); } + remove(doc, CollectionSchema.synonyms_sxt); // delete old values if (this.isEmpty() || contains(CollectionSchema.synonyms_sxt)) { if (synonyms.size() > 0) add(doc, CollectionSchema.synonyms_sxt, synonyms); } diff --git a/source/net/yacy/search/schema/CollectionSchema.java b/source/net/yacy/search/schema/CollectionSchema.java index c23f5640a..85cb1a091 100644 --- a/source/net/yacy/search/schema/CollectionSchema.java +++ b/source/net/yacy/search/schema/CollectionSchema.java @@ -341,7 +341,7 @@ public enum CollectionSchema implements SchemaDeclaration { public final String getComment() { return this.comment; } - + @Override public final void add(final SolrInputDocument doc, final String value) { assert !this.isMultiValued(); @@ -431,5 +431,9 @@ public enum CollectionSchema implements SchemaDeclaration { doc.setField(this.getSolrFieldName(), value); } + @Override + public final void remove(final SolrInputDocument doc) { + doc.removeField(this.getSolrFieldName()); + } } diff --git a/source/net/yacy/search/schema/WebgraphSchema.java b/source/net/yacy/search/schema/WebgraphSchema.java index 8f2ceded9..18837ad58 100644 --- a/source/net/yacy/search/schema/WebgraphSchema.java +++ b/source/net/yacy/search/schema/WebgraphSchema.java @@ -284,5 +284,9 @@ public enum WebgraphSchema implements SchemaDeclaration { doc.setField(this.getSolrFieldName(), value); } + @Override + public final void remove(final SolrInputDocument doc) { + doc.removeField(this.getSolrFieldName()); + } } From e5b6424eeda75ac16861ccb941f54ec30d74ef91 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 10 Jul 2015 17:14:14 +0200 Subject: [PATCH 2/3] patch for bad windows file paths --- .../net/yacy/cora/document/id/MultiProtocolURL.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 18758b72e..c3c1924c2 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -271,12 +271,16 @@ public class MultiProtocolURL implements Serializable, Comparable 4 && h.charAt(3) == ':' && h.charAt(4) != '/' && h.charAt(4) != '\\') { // wrong windows path, after the doublepoint there should be a backslash h = h.substring(0, 4) + '\\' + h.substring(4); } int q = h.indexOf('/', 2); - if (q < 0) { + if (q < 0 || h.length() > 3 && h.charAt(3) == ':') { this.path = h.substring(2); // "path" or "c:/path" } else { this.host = h.substring(2, q ); // TODO: handle "c:" ? @@ -2314,13 +2314,13 @@ public class MultiProtocolURL implements Serializable, Comparable 0) normalizedURL = normalizedURL.substring(p + 2); return splitpattern.split(normalizedURL.toLowerCase()); // word components of the url } -/* - public static void main(final String[] args) { - for (final String s: args) System.out.println(toTokens(s)); - } -*/ + public static void main(final String[] args) { final String[][] test = new String[][]{ + new String[]{null, "file://y:/"}, + new String[]{null, "file://y:/yacy"}, + new String[]{null, "file://y:/yacy/"}, + new String[]{null, "file://y:"}, new String[]{null, "file://Z:admin\\home"}, // thats wrong but may appear new String[]{null, "file://Z:\\admin\\home"}, new String[]{null, "https://www.example.com/shoe/?p=2&ps=75#t={%22san_NaviPaging%22:2}"}, // ugly strange pagination link @@ -2395,6 +2395,10 @@ public class MultiProtocolURL implements Serializable, Comparable