diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list
index 0bec57872..ed6392b50 100644
--- a/defaults/solr.keys.list
+++ b/defaults/solr.keys.list
@@ -141,7 +141,7 @@ attr_images
## number of images, int
imagescount_i
-## flag that shows if a swf file is linked, boolen
+## flag that shows if a swf file is linked, boolean
flash_b
## list of all links to frames, textgen
diff --git a/defaults/yacy.init b/defaults/yacy.init
index aa8e7f955..f962a24db 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -1029,4 +1029,4 @@ federated.service.yacy.indexing.enabled = true
federated.service.solr.indexing.enabled = false
federated.service.solr.indexing.url = http://127.0.0.1:8983/solr
federated.service.solr.indexing.charding = MODULO_HOST_MD5
-federated.service.solr.indexing.scheme = SolrCellExtended
+federated.service.solr.indexing.schemefile = solr.keys.default.list
diff --git a/htroot/IndexFederated_p.html b/htroot/IndexFederated_p.html
index 6888725db..86f735e18 100644
--- a/htroot/IndexFederated_p.html
+++ b/htroot/IndexFederated_p.html
@@ -21,6 +21,7 @@
You can just switch on or off this index. If you switch it off, you will not be able to search with YaCy any more.
+
diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java
index 54b39fe0d..627093db0 100644
--- a/htroot/IndexFederated_p.java
+++ b/htroot/IndexFederated_p.java
@@ -11,25 +11,27 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see .
*/
+import java.io.File;
import java.io.IOException;
+import java.util.Iterator;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.solr.SolrChardingConnector;
import net.yacy.cora.services.federated.solr.SolrChardingSelection;
import net.yacy.cora.services.federated.solr.SolrScheme;
+import net.yacy.cora.storage.ConfigurationSet;
import net.yacy.kelondro.logging.Log;
-
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@@ -39,47 +41,65 @@ public class IndexFederated_p {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
- Switchboard sb = (Switchboard) env;
+ final Switchboard sb = (Switchboard) env;
if (post != null && post.containsKey("set")) {
// yacy
env.setConfig("federated.service.yacy.indexing.enabled", post.getBoolean("yacy.indexing.enabled", false));
-
+
// solr
- boolean solrWasOn = env.getConfigBool("federated.service.solr.indexing.enabled", true);
- boolean solrIsOnAfterwards = post.getBoolean("solr.indexing.enabled", false);
+ final boolean solrWasOn = env.getConfigBool("federated.service.solr.indexing.enabled", true);
+ final boolean solrIsOnAfterwards = post.getBoolean("solr.indexing.enabled", false);
env.setConfig("federated.service.solr.indexing.enabled", solrIsOnAfterwards);
env.setConfig("federated.service.solr.indexing.url", post.get("solr.indexing.url", env.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr")));
env.setConfig("federated.service.solr.indexing.charding", post.get("solr.indexing.charding", env.getConfig("federated.service.solr.indexing.charding", "modulo-host-md5")));
- env.setConfig("federated.service.solr.indexing.scheme", post.get("solr.indexing.scheme", env.getConfig("federated.service.solr.indexing.scheme", "SolrCellExtended")));
+ env.setConfig("federated.service.solr.indexing.schemefile", post.get("solr.indexing.schemefile", env.getConfig("federated.service.solr.indexing.schemefile", "solr.keys.default.list")));
if (solrWasOn && !solrIsOnAfterwards) {
// switch off
sb.solrConnector.close();
sb.solrConnector = null;
}
-
+
if (!solrWasOn && solrIsOnAfterwards) {
// switch on
- String solrurls = sb.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
- boolean usesolr = sb.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
+ final String solrurls = sb.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
+ final boolean usesolr = sb.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
+ final SolrScheme scheme = new SolrScheme(new File(env.getDataPath(), "DATA/SETTINGS/solr.keys.default.list"));
try {
- sb.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, SolrScheme.SolrCellExtended, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
- } catch (IOException e) {
+ sb.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, scheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
+ } catch (final IOException e) {
Log.logException(e);
sb.solrConnector = null;
}
}
+
+ // read index scheme table flags
+ final SolrScheme scheme = sb.solrConnector.getScheme();
+ final Iterator i = scheme.allIterator();
+ ConfigurationSet.Entry entry;
+ while (i.hasNext()) {
+ entry = i.next();
+ final String v = post.get("scheme_" + entry.key());
+ final boolean c = v != null && v.equals("checked");
+ try {
+ if (entry.enabled()) {
+ if (!c) scheme.disable(entry.key());
+ } else {
+ if (c) scheme.enable(entry.key());
+ }
+ } catch (final IOException e) {}
+ }
}
-
+
// show solr host table
if (sb.solrConnector == null) {
prop.put("table", 0);
} else {
prop.put("table", 1);
try {
- long[] size = sb.solrConnector.getSizeList();
- String[] urls = sb.solrConnector.getAdminInterfaceList();
+ final long[] size = sb.solrConnector.getSizeList();
+ final String[] urls = sb.solrConnector.getAdminInterfaceList();
boolean dark = false;
for (int i = 0; i < size.length; i++) {
prop.put("table_list_" + i + "_dark", dark ? 1 : 0); dark = !dark;
@@ -87,18 +107,34 @@ public class IndexFederated_p {
prop.put("table_list_" + i + "_size", size[i]);
}
prop.put("table_list", size.length);
- } catch (IOException e) {
+
+ // write scheme
+ final SolrScheme scheme = sb.solrConnector.getScheme();
+ final Iterator i = scheme.allIterator();
+ int c = 0;
+ dark = false;
+ ConfigurationSet.Entry entry;
+ while (i.hasNext()) {
+ entry = i.next();
+ prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark;
+ prop.put("scheme_" + c + "_checked", scheme.contains(entry.key()) ? 1 : 0);
+ prop.putHTML("scheme_" + c + "_key", entry.key());
+ prop.putHTML("scheme_" + c + "_comment", scheme.commentHeadline(entry.key()));
+ c++;
+ }
+ prop.put("scheme", c);
+ } catch (final IOException e) {
Log.logException(e);
prop.put("table", 0);
}
}
-
+
// fill attribute fields
prop.put("yacy.indexing.enabled.checked", env.getConfigBool("federated.service.yacy.indexing.enabled", true) ? 1 : 0);
prop.put("solr.indexing.enabled.checked", env.getConfigBool("federated.service.solr.indexing.enabled", false) ? 1 : 0);
prop.put("solr.indexing.url", env.getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr"));
prop.put("solr.indexing.charding", env.getConfig("federated.service.solr.indexing.charding", "modulo-host-md5"));
- prop.put("solr.indexing.scheme", env.getConfig("federated.service.solr.indexing.scheme", "SolrCellExtended"));
+ prop.put("solr.indexing.schemefile", env.getConfig("federated.service.solr.indexing.schemefile", "solr.keys.default.list"));
// return rewrite properties
return prop;
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 91c6e8334..6657def71 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -560,11 +560,16 @@ public final class Switchboard extends serverSwitch {
this.log.logConfig("Parser: Initializing Mime Type deny list");
TextParser.setDenyMime(getConfig(SwitchboardConstants.PARSER_MIME_DENY, ""));
+ // prepare a solr index profile switch list
+ final File solrWorkProfile = new File(getDataPath(), "DATA/SETTINGS/solr.keys.default.list");
+ if (!solrWorkProfile.exists()) FileUtils.copy(new File("defaults/solr.keys.list"), solrWorkProfile);
+ final SolrScheme scheme = new SolrScheme(solrWorkProfile);
+
// set up the solr interface
final String solrurls = getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
final boolean usesolr = getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
try {
- this.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, SolrScheme.SolrCellExtended, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
+ this.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, scheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
} catch (final IOException e) {
Log.logException(e);
this.solrConnector = null;
diff --git a/source/net/yacy/cora/services/federated/solr/SolrChardingConnector.java b/source/net/yacy/cora/services/federated/solr/SolrChardingConnector.java
index 34be34b52..d4287a2a0 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrChardingConnector.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrChardingConnector.java
@@ -55,6 +55,10 @@ public class SolrChardingConnector {
this.scheme = scheme;
}
+ public SolrScheme getScheme() {
+ return this.scheme;
+ }
+
public void close() {
for (final SolrSingleConnector connector: this.connectors) connector.close();
}
diff --git a/source/net/yacy/cora/services/federated/solr/SolrScheme.java b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
index 98dda52ca..7b548221a 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrScheme.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
@@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see .
@@ -25,211 +25,271 @@
package net.yacy.cora.services.federated.solr;
+import java.io.File;
import java.net.InetAddress;
import java.util.Collection;
+import java.util.Date;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
+import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.ResponseHeader;
+import net.yacy.cora.storage.ConfigurationSet;
import net.yacy.document.Document;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.DigestURI;
-import net.yacy.cora.document.MultiProtocolURI;
+
import org.apache.solr.common.SolrInputDocument;
-public enum SolrScheme {
+public class SolrScheme extends ConfigurationSet {
+
+ /**
+ * initialize with an empty ConfigurationSet which will cause that all the index
+ * attributes are used
+ */
+ public SolrScheme() {
+ super();
+ }
+
+ /**
+ * initialize the scheme with a given configuration file
+ * the configuration file simply contains a list of lines with keywords
+ * @param configurationFile
+ */
+ public SolrScheme(final File configurationFile) {
+ super(configurationFile);
+ }
- SolrCell,
- SolrCellExtended,
- DublinCore;
+ private void addSolr(final SolrInputDocument solrdoc, final String key, final String value) {
+ if (isEmpty() || contains(key)) solrdoc.setField(key, value);
+ }
-
- public SolrInputDocument yacy2solr(String id, ResponseHeader header, Document document) {
- if (this == SolrCellExtended) return yacy2solrSolrCellExtended(id, header, document);
- return null;
+ private void addSolr(final SolrInputDocument solrdoc, final String key, final Date value) {
+ if (isEmpty() || contains(key)) solrdoc.setField(key, value);
}
-
- public static SolrInputDocument yacy2solrSolrCellExtended(String id, ResponseHeader header, Document yacydoc) {
+
+ private void addSolr(final SolrInputDocument solrdoc, final String key, final int value) {
+ if (isEmpty() || contains(key)) solrdoc.setField(key, value);
+ }
+
+ private void addSolr(final SolrInputDocument solrdoc, final String key, final String[] value) {
+ if (isEmpty() || contains(key)) solrdoc.setField(key, value);
+ }
+
+ private void addSolr(final SolrInputDocument solrdoc, final String key, final float value) {
+ if (isEmpty() || contains(key)) solrdoc.setField(key, value);
+ }
+
+ private void addSolr(final SolrInputDocument solrdoc, final String key, final boolean value) {
+ if (isEmpty() || contains(key)) solrdoc.setField(key, value);
+ }
+
+ private void addSolr(final SolrInputDocument solrdoc, final String key, final String value, final float boost) {
+ if (isEmpty() || contains(key)) solrdoc.setField(key, value, boost);
+ }
+
+ public SolrInputDocument yacy2solr(final String id, final ResponseHeader header, final Document yacydoc) {
// we user the SolrCell design as index scheme
- SolrInputDocument solrdoc = new SolrInputDocument();
- DigestURI digestURI = new DigestURI(yacydoc.dc_source());
- solrdoc.addField("failreason_t", ""); // overwrite a possible fail reason (in case that there was a fail reason before)
- solrdoc.addField("id", id);
- solrdoc.addField("sku", digestURI.toNormalform(true, false), 3.0f);
- InetAddress address = Domains.dnsResolve(digestURI.getHost());
- if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
- if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());
- solrdoc.addField("title", yacydoc.dc_title());
- solrdoc.addField("author", yacydoc.dc_creator());
- solrdoc.addField("description", yacydoc.dc_description());
- solrdoc.addField("content_type", yacydoc.dc_format());
- solrdoc.addField("last_modified", header.lastModified());
- solrdoc.addField("keywords", yacydoc.dc_subject(' '));
- String content = UTF8.String(yacydoc.getTextBytes());
- solrdoc.addField("text_t", content);
- int contentwc = content.split(" ").length;
- solrdoc.addField("wordcount_i", contentwc);
+ final SolrInputDocument solrdoc = new SolrInputDocument();
+ final DigestURI digestURI = new DigestURI(yacydoc.dc_source());
+ addSolr(solrdoc, "failreason_t", ""); // overwrite a possible fail reason (in case that there was a fail reason before)
+ addSolr(solrdoc, "id", id);
+ addSolr(solrdoc, "sku", digestURI.toNormalform(true, false), 3.0f);
+ final InetAddress address = Domains.dnsResolve(digestURI.getHost());
+ if (address != null) addSolr(solrdoc, "ip_s", address.getHostAddress());
+ if (digestURI.getHost() != null) addSolr(solrdoc, "host_s", digestURI.getHost());
+ addSolr(solrdoc, "title", yacydoc.dc_title());
+ addSolr(solrdoc, "author", yacydoc.dc_creator());
+ addSolr(solrdoc, "description", yacydoc.dc_description());
+ addSolr(solrdoc, "content_type", yacydoc.dc_format());
+ addSolr(solrdoc, "last_modified", header.lastModified());
+ addSolr(solrdoc, "keywords", yacydoc.dc_subject(' '));
+ final String content = UTF8.String(yacydoc.getTextBytes());
+ addSolr(solrdoc, "text_t", content);
+ if (contains("wordcount_i")) {
+ final int contentwc = content.split(" ").length;
+ addSolr(solrdoc, "wordcount_i", contentwc);
+ }
// path elements of link
- String path = digestURI.getPath();
- if (path != null) {
- String[] paths = path.split("/");
- if (paths.length > 0) solrdoc.addField("attr_paths", paths);
+ final String path = digestURI.getPath();
+ if (path != null && contains("attr_paths")) {
+ final String[] paths = path.split("/");
+ if (paths.length > 0) addSolr(solrdoc, "attr_paths", paths);
}
-
+
// list all links
- Map alllinks = yacydoc.getAnchors();
+ final Map alllinks = yacydoc.getAnchors();
int c = 0;
- String[] inboundlinks = new String[yacydoc.inboundLinkCount()];
- solrdoc.addField("inboundlinkscount_i", inboundlinks.length);
- for (MultiProtocolURI url: yacydoc.inboundLinks()) {
- Properties p = alllinks.get(url);
- String name = p.getProperty("name", "");
- String rel = p.getProperty("rel", "");
- inboundlinks[c++] =
- "" +
- ((name.length() > 0) ? name : "") + "";
+ addSolr(solrdoc, "inboundlinkscount_i", yacydoc.inboundLinkCount());
+ if (contains("attr_inboundlinks")) {
+ final String[] inboundlinks = new String[yacydoc.inboundLinkCount()];
+ for (final MultiProtocolURI url: yacydoc.inboundLinks()) {
+ final Properties p = alllinks.get(url);
+ final String name = p.getProperty("name", "");
+ final String rel = p.getProperty("rel", "");
+ inboundlinks[c++] =
+ "" +
+ ((name.length() > 0) ? name : "") + "";
+ }
+ addSolr(solrdoc, "attr_inboundlinks", inboundlinks);
}
- solrdoc.addField("attr_inboundlinks", inboundlinks);
c = 0;
- String[] outboundlinks = new String[yacydoc.outboundLinkCount()];
- solrdoc.addField("outboundlinkscount_i", outboundlinks.length);
- for (MultiProtocolURI url: yacydoc.outboundLinks()) {
- Properties p = alllinks.get(url);
- String name = p.getProperty("name", "");
- String rel = p.getProperty("rel", "");
- outboundlinks[c++] =
- "" +
- ((name.length() > 0) ? name : "") + "";
+ final String[] outboundlinks = new String[yacydoc.outboundLinkCount()];
+ if (contains("attr_outboundlinks")) {
+ addSolr(solrdoc, "outboundlinkscount_i", outboundlinks.length);
+ for (final MultiProtocolURI url: yacydoc.outboundLinks()) {
+ final Properties p = alllinks.get(url);
+ final String name = p.getProperty("name", "");
+ final String rel = p.getProperty("rel", "");
+ outboundlinks[c++] =
+ "" +
+ ((name.length() > 0) ? name : "") + "";
+ }
+ addSolr(solrdoc, "attr_outboundlinks", outboundlinks);
}
- solrdoc.addField("attr_outboundlinks", outboundlinks);
-
// charset
- solrdoc.addField("charset_s", yacydoc.getCharset());
+ addSolr(solrdoc, "charset_s", yacydoc.getCharset());
// coordinates
if (yacydoc.lat() != 0.0f && yacydoc.lon() != 0.0f) {
- solrdoc.addField("lon_coordinate", yacydoc.lon());
- solrdoc.addField("lat_coordinate", yacydoc.lat());
+ addSolr(solrdoc, "lon_coordinate", yacydoc.lon());
+ addSolr(solrdoc, "lat_coordinate", yacydoc.lat());
}
- solrdoc.addField("httpstatus_i", 200);
- Object parser = yacydoc.getParserObject();
+ addSolr(solrdoc, "httpstatus_i", 200);
+ final Object parser = yacydoc.getParserObject();
if (parser instanceof ContentScraper) {
- ContentScraper html = (ContentScraper) parser;
-
+ final ContentScraper html = (ContentScraper) parser;
+
// header tags
int h = 0;
int f = 1;
for (int i = 1; i <= 6; i++) {
- String[] hs = html.getHeadlines(i);
+ final String[] hs = html.getHeadlines(i);
h = h | (hs.length > 0 ? f : 0);
f = f * 2;
- solrdoc.addField("attr_h" + i, hs);
+ addSolr(solrdoc, "attr_h" + i, hs);
}
- solrdoc.addField("htags_i", h);
+ addSolr(solrdoc, "htags_i", h);
// meta tags
- Map metas = html.getMetas();
- String robots = metas.get("robots");
- if (robots != null) solrdoc.addField("metarobots_t", robots);
- String generator = metas.get("generator");
- if (generator != null) solrdoc.addField("metagenerator_t", generator);
-
+ final Map metas = html.getMetas();
+ final String robots = metas.get("robots");
+ if (robots != null) addSolr(solrdoc, "metarobots_t", robots);
+ final String generator = metas.get("generator");
+ if (generator != null) addSolr(solrdoc, "metagenerator_t", generator);
+
// bold, italic
- String[] bold = html.getBold();
- solrdoc.addField("boldcount_i", bold.length);
+ final String[] bold = html.getBold();
+ addSolr(solrdoc, "boldcount_i", bold.length);
if (bold.length > 0) {
- solrdoc.addField("attr_bold", bold);
- solrdoc.addField("attr_boldcount", html.getBoldCount(bold));
+ addSolr(solrdoc, "attr_bold", bold);
+ if (contains("attr_boldcount")) {
+ addSolr(solrdoc, "attr_boldcount", html.getBoldCount(bold));
+ }
}
- String[] italic = html.getItalic();
- solrdoc.addField("italiccount_i", italic.length);
+ final String[] italic = html.getItalic();
+ addSolr(solrdoc, "italiccount_i", italic.length);
if (italic.length > 0) {
- solrdoc.addField("attr_italic", italic);
- solrdoc.addField("attr_italiccount", html.getItalicCount(italic));
+ addSolr(solrdoc, "attr_italic", italic);
+ if (contains("attr_italiccount")) {
+ addSolr(solrdoc, "attr_italiccount", html.getItalicCount(italic));
+ }
}
- String[] li = html.getLi();
- solrdoc.addField("licount_i", li.length);
- if (li.length > 0) solrdoc.addField("attr_li", li);
-
+ final String[] li = html.getLi();
+ addSolr(solrdoc, "licount_i", li.length);
+ if (li.length > 0) addSolr(solrdoc, "attr_li", li);
+
// images
- Collection imagesc = html.getImages().values();
- String[] images = new String[imagesc.size()];
- c = 0;
- for (ImageEntry ie: imagesc) images[c++] = ie.toString();
- solrdoc.addField("imagescount_i", images.length);
- if (images.length > 0) solrdoc.addField("attr_images", images);
+ if (contains("attr_images")) {
+ final Collection imagesc = html.getImages().values();
+ final String[] images = new String[imagesc.size()];
+ c = 0;
+ for (final ImageEntry ie: imagesc) images[c++] = ie.toString();
+ addSolr(solrdoc, "imagescount_i", images.length);
+ if (images.length > 0) addSolr(solrdoc, "attr_images", images);
+ }
// style sheets
- Map csss = html.getCSS();
- String[] css = new String[csss.size()];
- c = 0;
- for (Map.Entry entry: csss.entrySet()) {
- css[c++] =
- "";
+ if (contains("attr_css")) {
+ final Map csss = html.getCSS();
+ final String[] css = new String[csss.size()];
+ c = 0;
+ for (final Map.Entry entry: csss.entrySet()) {
+ css[c++] =
+ "";
+ }
+ addSolr(solrdoc, "csscount_i", css.length);
+ if (css.length > 0) addSolr(solrdoc, "attr_css", css);
}
- solrdoc.addField("csscount_i", css.length);
- if (css.length > 0) solrdoc.addField("attr_css", css);
-
+
// Scripts
- Set scriptss = html.getScript();
- String[] scripts = new String[scriptss.size()];
- c = 0;
- for (MultiProtocolURI url: scriptss) {
- scripts[c++] = url.toNormalform(false, false, false, false);
+ if (contains("attr_scripts")) {
+ final Set scriptss = html.getScript();
+ final String[] scripts = new String[scriptss.size()];
+ c = 0;
+ for (final MultiProtocolURI url: scriptss) {
+ scripts[c++] = url.toNormalform(false, false, false, false);
+ }
+ addSolr(solrdoc, "scriptscount_i", scripts.length);
+ if (scripts.length > 0) addSolr(solrdoc, "attr_scripts", scripts);
}
- solrdoc.addField("scriptscount_i", scripts.length);
- if (scripts.length > 0) solrdoc.addField("attr_scripts", scripts);
-
+
// Frames
- Set framess = html.getFrames();
- String[] frames = new String[framess.size()];
- c = 0;
- for (MultiProtocolURI entry: framess) {
- frames[c++] = entry.toNormalform(false, false, false, false);
+ if (contains("attr_frames")) {
+ final Set framess = html.getFrames();
+ final String[] frames = new String[framess.size()];
+ c = 0;
+ for (final MultiProtocolURI entry: framess) {
+ frames[c++] = entry.toNormalform(false, false, false, false);
+ }
+ addSolr(solrdoc, "framesscount_i", frames.length);
+ if (frames.length > 0) addSolr(solrdoc, "attr_frames", frames);
}
- solrdoc.addField("framesscount_i", frames.length);
- if (frames.length > 0) solrdoc.addField("attr_frames", frames);
-
+
// IFrames
- Set iframess = html.getIFrames();
- String[] iframes = new String[iframess.size()];
- c = 0;
- for (MultiProtocolURI entry: iframess) {
- iframes[c++] = entry.toNormalform(false, false, false, false);
+ if (contains("attr_iframes")) {
+ final Set iframess = html.getIFrames();
+ final String[] iframes = new String[iframess.size()];
+ c = 0;
+ for (final MultiProtocolURI entry: iframess) {
+ iframes[c++] = entry.toNormalform(false, false, false, false);
+ }
+ addSolr(solrdoc, "iframesscount_i", iframes.length);
+ if (iframes.length > 0) addSolr(solrdoc, "attr_iframes", iframes);
}
- solrdoc.addField("iframesscount_i", iframes.length);
- if (iframes.length > 0) solrdoc.addField("attr_iframes", iframes);
-
+
// flash embedded
- solrdoc.addField("flash_b", html.containsFlash());
-
+ addSolr(solrdoc, "flash_b", html.containsFlash());
+
// generic evaluation pattern
- for (String model: html.getEvaluationModelNames()) {
- String[] scorenames = html.getEvaluationModelScoreNames(model);
- if (scorenames.length > 0) {
- solrdoc.addField("attr_" + model, scorenames);
- solrdoc.addField("attr_" + model + "count", html.getEvaluationModelScoreCounts(model, scorenames));
+ for (final String model: html.getEvaluationModelNames()) {
+ if (contains("attr_" + model)) {
+ final String[] scorenames = html.getEvaluationModelScoreNames(model);
+ if (scorenames.length > 0) {
+ addSolr(solrdoc, "attr_" + model, scorenames);
+ addSolr(solrdoc, "attr_" + model + "count", html.getEvaluationModelScoreCounts(model, scorenames));
+ }
}
}
-
+
// response time
- solrdoc.addField("responsetime_i", header.get(HeaderFramework.RESPONSE_TIME_MILLIS, "0"));
+ addSolr(solrdoc, "responsetime_i", header.get(HeaderFramework.RESPONSE_TIME_MILLIS, "0"));
}
return solrdoc;
}
-
-
+
+
/*
* standard solr scheme
diff --git a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
index 82e549c28..44867c73b 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
@@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see .
@@ -34,6 +34,13 @@ import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
+import net.yacy.cora.document.ASCII;
+import net.yacy.cora.protocol.Domains;
+import net.yacy.cora.protocol.ResponseHeader;
+import net.yacy.document.Document;
+import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.kelondro.logging.Log;
+
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
@@ -42,38 +49,31 @@ import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
-import net.yacy.cora.document.ASCII;
-import net.yacy.cora.protocol.Domains;
-import net.yacy.cora.protocol.ResponseHeader;
-import net.yacy.document.Document;
-import net.yacy.kelondro.data.meta.DigestURI;
-import net.yacy.kelondro.logging.Log;
-
public class SolrSingleConnector {
- private String solrurl;
+ private final String solrurl;
private SolrServer server;
- private SolrScheme scheme;
-
+ private final SolrScheme scheme;
+
private final static int transmissionQueueCount = 4; // allow concurrent http sessions to solr
private final static int transmissionQueueSize = 50; // number of documents that are collected until a commit is sent
- private Worker[] transmissionWorker; // the transmission workers to solr
- private BlockingQueue[] transmissionQueue; // the queues quere documents are collected
+ private final Worker[] transmissionWorker; // the transmission workers to solr
+ private final BlockingQueue[] transmissionQueue; // the queues quere documents are collected
private int transmissionRoundRobinCounter; // a rount robin counter for the transmission queues
-
+
@SuppressWarnings("unchecked")
- public SolrSingleConnector(String url, SolrScheme scheme) throws IOException {
+ public SolrSingleConnector(final String url, final SolrScheme scheme) throws IOException {
this.solrurl = url;
this.scheme = scheme;
- transmissionRoundRobinCounter = 0;
+ this.transmissionRoundRobinCounter = 0;
this.transmissionQueue = new ArrayBlockingQueue[transmissionQueueCount];
for (int i = 0; i < transmissionQueueCount; i++) {
this.transmissionQueue[i] = new ArrayBlockingQueue(transmissionQueueSize);
}
try {
this.server = new SolrHTTPClient(this.solrurl);
- } catch (MalformedURLException e) {
+ } catch (final MalformedURLException e) {
throw new IOException("bad connector url: " + this.solrurl);
}
this.transmissionWorker = new Worker[transmissionQueueCount];
@@ -86,7 +86,7 @@ public class SolrSingleConnector {
private class Worker extends Thread {
boolean shallRun;
int idx;
- public Worker(int i) {
+ public Worker(final int i) {
this.idx = i;
this.shallRun = true;
}
@@ -95,86 +95,86 @@ public class SolrSingleConnector {
}
public void run() {
while (this.shallRun) {
- if (transmissionQueue[idx].size() > 0) {
+ if (SolrSingleConnector.this.transmissionQueue[this.idx].size() > 0) {
try {
- flushTransmissionQueue(idx);
- } catch (IOException e) {
+ flushTransmissionQueue(this.idx);
+ } catch (final IOException e) {
Log.logSevere("SolrSingleConnector", "flush Transmission failed in worker", e);
continue;
}
} else {
- try {Thread.sleep(1000);} catch (InterruptedException e) {}
+ try {Thread.sleep(1000);} catch (final InterruptedException e) {}
}
}
try {
- flushTransmissionQueue(idx);
- } catch (IOException e) {}
+ flushTransmissionQueue(this.idx);
+ } catch (final IOException e) {}
}
}
-
+
public void close() {
for (int i = 0; i < transmissionQueueCount; i++) {
if (this.transmissionWorker[i].isAlive()) {
this.transmissionWorker[i].pleaseStop();
- try {this.transmissionWorker[i].join();} catch (InterruptedException e) {}
+ try {this.transmissionWorker[i].join();} catch (final InterruptedException e) {}
}
}
for (int i = 0; i < transmissionQueueCount; i++) {
try {
flushTransmissionQueue(i);
- } catch (IOException e) {}
+ } catch (final IOException e) {}
}
}
-
+
/**
* delete everything in the solr index
* @throws IOException
*/
public void clear() throws IOException {
try {
- server.deleteByQuery("*:*");
- server.commit();
- } catch (SolrServerException e) {
+ this.server.deleteByQuery("*:*");
+ this.server.commit();
+ } catch (final SolrServerException e) {
throw new IOException(e);
}
}
-
- public void delete(String id) throws IOException {
+
+ public void delete(final String id) throws IOException {
try {
- server.deleteById(id);
- } catch (SolrServerException e) {
+ this.server.deleteById(id);
+ } catch (final SolrServerException e) {
throw new IOException(e);
}
}
-
- public void delete(List ids) throws IOException {
+
+ public void delete(final List ids) throws IOException {
try {
- server.deleteById(ids);
- } catch (SolrServerException e) {
+ this.server.deleteById(ids);
+ } catch (final SolrServerException e) {
throw new IOException(e);
}
}
-
- public void add(File file, String solrId) throws IOException {
- ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract");
+
+ public void add(final File file, final String solrId) throws IOException {
+ final ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract");
up.addFile(file);
up.setParam("literal.id", solrId);
up.setParam("uprefix", "attr_");
up.setParam("fmap.content", "attr_content");
//up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
try {
- server.request(up);
- server.commit();
- } catch (SolrServerException e) {
+ this.server.request(up);
+ this.server.commit();
+ } catch (final SolrServerException e) {
throw new IOException(e);
}
}
-
- public void add(String id, ResponseHeader header, Document doc) throws IOException {
+
+ public void add(final String id, final ResponseHeader header, final Document doc) throws IOException {
add(this.scheme.yacy2solr(id, header, doc));
}
- protected void add(SolrInputDocument solrdoc) throws IOException {
+ protected void add(final SolrInputDocument solrdoc) throws IOException {
int thisrrc = this.transmissionRoundRobinCounter;
int nextrrc = thisrrc++;
if (nextrrc >= transmissionQueueCount) nextrrc = 0;
@@ -183,81 +183,81 @@ public class SolrSingleConnector {
this.transmissionQueue[thisrrc].offer(solrdoc);
} else {
if (this.transmissionQueue[thisrrc].size() > 0) flushTransmissionQueue(thisrrc);
- Collection docs = new ArrayList();
+ final Collection docs = new ArrayList();
docs.add(solrdoc);
addSolr(docs);
}
}
-
- protected void addSolr(Collection docs) throws IOException {
+
+ protected void addSolr(final Collection docs) throws IOException {
try {
- server.add(docs);
- server.commit();
- /* To immediately commit after adding documents, you could use:
+ this.server.add(docs);
+ this.server.commit();
+ /* To immediately commit after adding documents, you could use:
UpdateRequest req = new UpdateRequest();
req.setAction( UpdateRequest.ACTION.COMMIT, false, false );
req.add( docs );
UpdateResponse rsp = req.process( server );
*/
- } catch (SolrServerException e) {
+ } catch (final SolrServerException e) {
throw new IOException(e);
}
}
-
- public void err(DigestURI digestURI, String failReason, int httpstatus) throws IOException {
-
- SolrInputDocument solrdoc = new SolrInputDocument();
+
+ public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
+
+ final SolrInputDocument solrdoc = new SolrInputDocument();
solrdoc.addField("id", ASCII.String(digestURI.hash()));
solrdoc.addField("sku", digestURI.toNormalform(true, false), 3.0f);
- InetAddress address = Domains.dnsResolve(digestURI.getHost());
+ final InetAddress address = Domains.dnsResolve(digestURI.getHost());
if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());
// path elements of link
- String path = digestURI.getPath();
+ final String path = digestURI.getPath();
if (path != null) {
- String[] paths = path.split("/");
+ final String[] paths = path.split("/");
if (paths.length > 0) solrdoc.addField("attr_paths", paths);
}
solrdoc.addField("failreason_t", failReason);
solrdoc.addField("httpstatus_i", httpstatus);
-
+
add(solrdoc);
}
-
- private void flushTransmissionQueue(int idx) throws IOException {
- Collection c = new ArrayList();
+
+ private void flushTransmissionQueue(final int idx) throws IOException {
+ final Collection c = new ArrayList();
while (this.transmissionQueue[idx].size() > 0) {
try {
c.add(this.transmissionQueue[idx].take());
- } catch (InterruptedException e) {
+ } catch (final InterruptedException e) {
continue;
}
}
addSolr(c);
}
-
-
+
+
/**
* get a query result from solr
* to get all results set the query String to "*:*"
* @param querystring
* @throws IOException
*/
- public SolrDocumentList get(String querystring, int offset, int count) throws IOException {
+ public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException {
// construct query
- SolrQuery query = new SolrQuery();
+ final SolrQuery query = new SolrQuery();
query.setQuery(querystring);
query.setRows(count);
query.setStart(offset);
query.addSortField( "price", SolrQuery.ORDER.asc );
-
+
// query the server
//SearchResult result = new SearchResult(count);
try {
- QueryResponse rsp = server.query( query );
- SolrDocumentList docs = rsp.getResults();
+ final QueryResponse rsp = this.server.query( query );
+ final SolrDocumentList docs = rsp.getResults();
return docs;
// add the docs into the YaCy search result container
/*
@@ -265,22 +265,22 @@ public class SolrSingleConnector {
result.put(element)
}
*/
- } catch (SolrServerException e) {
+ } catch (final SolrServerException e) {
throw new IOException(e);
}
-
+
//return result;
}
-
- public static void main(String args[]) {
+
+ public static void main(final String args[]) {
SolrSingleConnector solr;
try {
- solr = new SolrSingleConnector("http://127.0.0.1:8983/solr", SolrScheme.SolrCellExtended);
+ solr = new SolrSingleConnector("http://127.0.0.1:8983/solr", new SolrScheme());
solr.clear();
- File exampleDir = new File("/Data/workspace2/yacy/test/parsertest/");
+ final File exampleDir = new File("/Data/workspace2/yacy/test/parsertest/");
long t, t0, a = 0;
int c = 0;
- for (String s: exampleDir.list()) {
+ for (final String s: exampleDir.list()) {
if (s.startsWith(".")) continue;
t = System.currentTimeMillis();
solr.add(new File(exampleDir, s), s);
@@ -290,9 +290,9 @@ public class SolrSingleConnector {
System.out.println("pushed file " + s + " to solr, " + t0 + " milliseconds");
}
System.out.println("pushed " + c + " files in " + a + " milliseconds, " + (a / c) + " milliseconds average; " + (60000 / a * c) + " PPM");
- } catch (IOException e) {
+ } catch (final IOException e) {
e.printStackTrace();
}
}
-
+
}
diff --git a/source/net/yacy/cora/storage/ConfigurationSet.java b/source/net/yacy/cora/storage/ConfigurationSet.java
index 6ad6ce12e..3f2f1d176 100644
--- a/source/net/yacy/cora/storage/ConfigurationSet.java
+++ b/source/net/yacy/cora/storage/ConfigurationSet.java
@@ -40,6 +40,11 @@ import java.util.Set;
* the list may contain lines with one keyword, comment lines, empty lines and out-commented keyword lines
* when an attribute is changed here, the list is stored again with the original formatting
*
+ * the syntax of configuration files:
+ * - all lines beginning with '##' are comments
+ * - all non-empty lines not beginning with '#' are keyword lines
+ * - all lines beginning with '#' and where the second character is not '#' are commented-out keyword lines
+ *
* @author Michael Christen
*/
public class ConfigurationSet extends AbstractSet implements Set {
@@ -47,6 +52,11 @@ public class ConfigurationSet extends AbstractSet implements Set
private final File file;
private String[] lines;
+ public ConfigurationSet() {
+ this.file = null;
+ this.lines = new String[0];
+ }
+
public ConfigurationSet(final File file) {
this.file = file;
try {
@@ -62,11 +72,18 @@ public class ConfigurationSet extends AbstractSet implements Set
}
}
+ @Override
+ public boolean isEmpty() {
+ // a shortcut to a fast 'true' in case that we initialized the class without a configuration file
+ return this.lines == null || this.lines.length == 0 || super.isEmpty();
+ }
+
/**
* save the configuration back to the file
* @throws IOException
*/
private void commit() throws IOException {
+ if (this.file == null) return;
final BufferedWriter writer = new BufferedWriter(new FileWriter(this.file));
for (final String s: this.lines) {
writer.write(s);