diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list
index ed6392b50..bb33c44ac 100644
--- a/defaults/solr.keys.list
+++ b/defaults/solr.keys.list
@@ -111,6 +111,9 @@ attr_paths
## host of the url, string
host_s
+## url inside the canonical link element, string
+canonical_s
+
## all texts in
tags, textgen
attr_li
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 6657def71..7da70867a 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -561,15 +561,21 @@ public final class Switchboard extends serverSwitch {
TextParser.setDenyMime(getConfig(SwitchboardConstants.PARSER_MIME_DENY, ""));
// prepare a solr index profile switch list
+ final File solrBackupProfile = new File("defaults/solr.keys.list");
final File solrWorkProfile = new File(getDataPath(), "DATA/SETTINGS/solr.keys.default.list");
- if (!solrWorkProfile.exists()) FileUtils.copy(new File("defaults/solr.keys.list"), solrWorkProfile);
- final SolrScheme scheme = new SolrScheme(solrWorkProfile);
+ if (!solrWorkProfile.exists()) FileUtils.copy(solrBackupProfile, solrWorkProfile);
+ final SolrScheme backupScheme = new SolrScheme(solrBackupProfile);
+ final SolrScheme workingScheme = new SolrScheme(solrWorkProfile);
+
+ // update the working scheme with the backup scheme. This is necessary to include new features.
+ // new features are always activated by default
+
// set up the solr interface
final String solrurls = getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
final boolean usesolr = getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
try {
- this.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, scheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
+ this.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, workingScheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
} catch (final IOException e) {
Log.logException(e);
this.solrConnector = null;
diff --git a/source/net/yacy/cora/services/federated/solr/SolrScheme.java b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
index 7b548221a..7d6bd5513 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrScheme.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
@@ -181,6 +181,9 @@ public class SolrScheme extends ConfigurationSet {
}
addSolr(solrdoc, "htags_i", h);
+ // canonical tag
+ if (html.getCanonical() != null) addSolr(solrdoc, "canonical_s", html.getCanonical().toNormalform(false, false));
+
// meta tags
final Map metas = html.getMetas();
final String robots = metas.get("robots");
diff --git a/source/net/yacy/cora/storage/ConfigurationSet.java b/source/net/yacy/cora/storage/ConfigurationSet.java
index 3f2f1d176..3232fad1a 100644
--- a/source/net/yacy/cora/storage/ConfigurationSet.java
+++ b/source/net/yacy/cora/storage/ConfigurationSet.java
@@ -72,6 +72,7 @@ public class ConfigurationSet extends AbstractSet implements Set
}
}
+
@Override
public boolean isEmpty() {
// a shortcut to a fast 'true' in case that we initialized the class without a configuration file
diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 908447d22..6e292d4cd 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -125,6 +125,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
private CharBuffer content;
private final EventListenerList htmlFilterEventListeners;
private float lon, lat;
+ private MultiProtocolURI canonical;
/**
* {@link MultiProtocolURI} to the favicon that belongs to the document
@@ -167,6 +168,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
this.lon = 0.0f;
this.lat = 0.0f;
this.evaluationScores.match(Element.url, root.toNormalform(false, false));
+ this.canonical = null;
}
public void scrapeText(final char[] newtext, final String insideTag) {
@@ -345,6 +347,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
final ImageEntry ie = new ImageEntry(newLink, linktitle, -1, -1, -1);
this.images.put(ie.url(), ie);
this.favicon = newLink;
+ } else if (rel.equalsIgnoreCase("canonical")) {
+ final Properties p = new Properties(); p.put("name", this.title);
+ this.anchors.put(newLink, p);
+ this.canonical = newLink;
} else if (rel.equalsIgnoreCase("alternate") && type.equalsIgnoreCase("application/rss+xml")) {
this.rss.put(newLink, linktitle);
} else if (rel.equalsIgnoreCase("stylesheet") && type.equalsIgnoreCase("text/css")) {
@@ -599,6 +605,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
return this.script;
}
+ public MultiProtocolURI getCanonical() {
+ return this.canonical;
+ }
+
/**
* get all images
* @return a map of