From b2175ea4ef2ba5d68935a5168f49aa4c4bb0dc27 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 15 May 2012 22:34:02 +0200 Subject: [PATCH] Add possibility to set custom Solr field names for the YaCy default Solr attributes. - Changing the format of YaCy's solr.key.list while maintainig backward compatibility Federated index config screens adjusted accordingly - modified the Solr update request to use a 3 min Solr autocommit intervall --- htroot/IndexFederated_p.html | 6 +- htroot/IndexFederated_p.java | 60 +-- htroot/api/schema_p.java | 36 +- htroot/api/schema_p.xml | 6 +- .../cora/services/federated/solr/SolrDoc.java | 16 +- .../services/federated/solr/SolrField.java | 5 +- .../federated/solr/SolrShardingSelection.java | 3 +- .../federated/solr/SolrSingleConnector.java | 13 +- .../yacy/cora/storage/ConfigurationSet.java | 426 +++++++++--------- source/net/yacy/search/Switchboard.java | 8 +- .../yacy/search/index/SolrConfiguration.java | 66 +-- source/net/yacy/search/index/SolrField.java | 32 +- .../net/yacy/search/query/SnippetProcess.java | 3 +- 13 files changed, 353 insertions(+), 327 deletions(-) mode change 100755 => 100644 htroot/api/schema_p.xml diff --git a/htroot/IndexFederated_p.html b/htroot/IndexFederated_p.html index 6734799e3..00342e738 100644 --- a/htroot/IndexFederated_p.html +++ b/htroot/IndexFederated_p.html @@ -65,24 +65,26 @@
-

Index Scheme

+

Index Scheme

If you use a custom Solr schema you may enter a different field name in the column 'cutom Solr Field Name' of the YacY default attribute name

+ #{scheme}# + #{/scheme}#
Active Attributecustom Solr Field Name Comment
#[key]# #[comment]#
- + #%env/templates/footer.template%# diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java index d0c56afe1..fce03a821 100644 --- a/htroot/IndexFederated_p.java +++ b/htroot/IndexFederated_p.java @@ -24,7 +24,6 @@ import java.io.BufferedReader; import java.io.ByteArrayInputStream; -import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.util.Iterator; @@ -39,10 +38,9 @@ import net.yacy.cora.storage.ConfigurationSet; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; -import net.yacy.search.index.SolrConfiguration; -import net.yacy.search.index.SolrField; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import net.yacy.search.index.SolrField; public class IndexFederated_p { @@ -87,8 +85,6 @@ public class IndexFederated_p { sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null); } - final SolrConfiguration scheme = new SolrConfiguration(new File(env.getDataPath(), "DATA/SETTINGS/" + schemename)); - if (solrIsOnAfterwards) { // switch on final boolean usesolr = sb.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0; @@ -101,23 +97,32 @@ public class IndexFederated_p { } // read index scheme table flags - final Iterator i = scheme.allIterator(); + final Iterator i = sb.solrScheme.entryIterator(); ConfigurationSet.Entry entry; + boolean modified = false; // flag to remember changes while (i.hasNext()) { entry = i.next(); final String v = post.get("scheme_" + entry.key()); + final String sfn = post.get("scheme_solrfieldname_" + entry.key()); + if (sfn != null ) { + // set custom solr field name + if (!sfn.equals(entry.getValue())) { + entry.setValue(sfn); + modified = true; + } + } + // set enable flag final boolean c = v != null && v.equals("checked"); + if (entry.enabled() != c) { + entry.setEnable(c); + modified = true; + } + } + if (modified) { // save settings to config file if modified try { - if (entry.enabled()) { - if (!c) { - scheme.disable(entry.key()); - } - } else { - if (c) { - scheme.enable(entry.key()); - } - } - } catch (final IOException e) {} + sb.solrScheme.commit(); + modified = false; + } catch (IOException ex) {} } } @@ -140,26 +145,29 @@ public class IndexFederated_p { // write scheme final String schemename = sb.getConfig("federated.service.solr.indexing.schemefile", "solr.keys.default.list"); - final Iterator i = sb.solrScheme.allIterator(); int c = 0; boolean dark = false; + // use enum SolrField to keep defined order + for(SolrField field : SolrField.values()) { + prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark; + prop.put("scheme_" + c + "_checked", sb.solrScheme.contains(field.name()) ? 1 : 0); + prop.putHTML("scheme_" + c + "_key", field.name()); + prop.putHTML("scheme_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName()); + if (field.getComment() != null) prop.putHTML("scheme_" + c + "_comment",field.getComment()); + c++; + } + /* final Iterator i = sb.solrScheme.entryIterator(); ConfigurationSet.Entry entry; - SolrField field; while (i.hasNext()) { entry = i.next(); - try { - field = SolrField.valueOf(entry.key()); - } catch (IllegalArgumentException e) { - continue; - } - if (field == null) continue; prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark; prop.put("scheme_" + c + "_checked", entry.enabled() ? 1 : 0); prop.putHTML("scheme_" + c + "_key", entry.key()); - prop.putHTML("scheme_" + c + "_comment",field.getComment() /*scheme.commentHeadline(entry.key())*/); + prop.putHTML("scheme_" + c + "_solrfieldname",entry.getValue() == null ? "" : entry.getValue()); + if (entry.getComment() != null) prop.putHTML("scheme_" + c + "_comment",entry.getComment()); c++; - } + }*/ prop.put("scheme", c); // fill attribute fields diff --git a/htroot/api/schema_p.java b/htroot/api/schema_p.java index b4a63ba45..d20082921 100644 --- a/htroot/api/schema_p.java +++ b/htroot/api/schema_p.java @@ -22,10 +22,7 @@ * If not, see . */ -import java.util.Iterator; - import net.yacy.cora.protocol.RequestHeader; -import net.yacy.cora.storage.ConfigurationSet; import net.yacy.search.Switchboard; import net.yacy.search.index.SolrField; import de.anomic.server.serverObjects; @@ -39,30 +36,23 @@ public class schema_p { final Switchboard sb = (Switchboard) env; // write scheme - final Iterator i = sb.solrScheme.allIterator(); - int c = 0; - ConfigurationSet.Entry entry; - SolrField field = null; - while (i.hasNext()) { - entry = i.next(); - if (!entry.enabled()) continue; //scheme.contains(entry.key()) - try { - field = SolrField.valueOf(entry.key()); - } catch (IllegalArgumentException e) { - continue; + for (SolrField field : SolrField.values()) { + if (sb.solrScheme.contains(field.name())) { + prop.put("fields_" + c + "_solrname", field.getSolrFieldName()); + prop.put("fields_" + c + "_type", field.getType().printName()); + prop.put("fields_" + c + "_comment", field.getComment()); + prop.put("fields_" + c + "_indexedChecked", field.isIndexed() ? 1 : 0); + prop.put("fields_" + c + "_storedChecked", field.isStored() ? 1 : 0); + prop.put("fields_" + c + "_multiValuedChecked", field.isMultiValued() ? 1 : 0); + prop.put("fields_" + c + "_omitNormsChecked", field.isOmitNorms() ? 1 : 0); + c++; } - prop.put("fields_" + c + "_name", field.name()); - prop.put("fields_" + c + "_type", field.getType().printName()); - prop.put("fields_" + c + "_comment", field.getComment()); - prop.put("fields_" + c + "_indexedChecked", field.isIndexed() ? 1 : 0); - prop.put("fields_" + c + "_storedChecked", field.isStored() ? 1 : 0); - prop.put("fields_" + c + "_multiValuedChecked", field.isMultiValued() ? 1 : 0); - prop.put("fields_" + c + "_omitNormsChecked", field.isOmitNorms() ? 1 : 0); - c++; } prop.put("fields", c); - + + prop.put("solruniquekey",SolrField.id.getSolrFieldName()); + prop.put("solrdefaultsearchfield",SolrField.text_t.getSolrFieldName()); // return rewrite properties return prop; } diff --git a/htroot/api/schema_p.xml b/htroot/api/schema_p.xml old mode 100755 new mode 100644 index 667588e44..981cf914f --- a/htroot/api/schema_p.xml +++ b/htroot/api/schema_p.xml @@ -51,13 +51,13 @@ #{fields}# - + #{/fields}# - id - sku + #[solruniquekey]# + #[solrdefaultsearchfield]# diff --git a/source/net/yacy/cora/services/federated/solr/SolrDoc.java b/source/net/yacy/cora/services/federated/solr/SolrDoc.java index 074360dbe..fba332073 100644 --- a/source/net/yacy/cora/services/federated/solr/SolrDoc.java +++ b/source/net/yacy/cora/services/federated/solr/SolrDoc.java @@ -41,35 +41,35 @@ public class SolrDoc extends SolrInputDocument { } public final void addSolr(final SolrField key, final String value) { - this.setField(key.name(), value); + this.setField(key.getSolrFieldName(), value); } public final void addSolr(final SolrField key, final Date value) { - this.setField(key.name(), value); + this.setField(key.getSolrFieldName(), value); } public final void addSolr(final SolrField key, final int value) { - this.setField(key.name(), value); + this.setField(key.getSolrFieldName(), value); } public final void addSolr(final SolrField key, final String[] value) { - this.setField(key.name(), value); + this.setField(key.getSolrFieldName(), value); } public final void addSolr(final SolrField key, final List value) { - this.setField(key.name(), value.toArray(new String[value.size()])); + this.setField(key.getSolrFieldName(), value.toArray(new String[value.size()])); } public final void addSolr(final SolrField key, final float value) { - this.setField(key.name(), value); + this.setField(key.getSolrFieldName(), value); } public final void addSolr(final SolrField key, final boolean value) { - this.setField(key.name(), value); + this.setField(key.getSolrFieldName(), value); } public final void addSolr(final SolrField key, final String value, final float boost) { - this.setField(key.name(), value, boost); + this.setField(key.getSolrFieldName(), value, boost); } } diff --git a/source/net/yacy/cora/services/federated/solr/SolrField.java b/source/net/yacy/cora/services/federated/solr/SolrField.java index 55138f516..449619993 100644 --- a/source/net/yacy/cora/services/federated/solr/SolrField.java +++ b/source/net/yacy/cora/services/federated/solr/SolrField.java @@ -31,8 +31,9 @@ public interface SolrField { * this shall be implemented as enum, thus shall have the name() method * @return the name of the enum constant */ - public String name(); - + public String name(); // default field name (according to SolCell default scheme) <= enum.name() + + public String getSolrFieldName(); // return the default or custom solr field name to use for solr requests public SolrType getType(); diff --git a/source/net/yacy/cora/services/federated/solr/SolrShardingSelection.java b/source/net/yacy/cora/services/federated/solr/SolrShardingSelection.java index 35bbe6c53..ae86a3411 100644 --- a/source/net/yacy/cora/services/federated/solr/SolrShardingSelection.java +++ b/source/net/yacy/cora/services/federated/solr/SolrShardingSelection.java @@ -31,6 +31,7 @@ import java.nio.charset.Charset; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.concurrent.atomic.AtomicLong; +import net.yacy.search.index.SolrField; public class SolrShardingSelection { @@ -58,7 +59,7 @@ public class SolrShardingSelection { public int select(final SolrDoc solrdoc) throws IOException { if (this.method == Method.MODULO_HOST_MD5) { - final String sku = (String) solrdoc.getField("sku").getValue(); + final String sku = (String) solrdoc.getField(SolrField.sku.getSolrFieldName()).getValue(); return selectURL(sku); } diff --git a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java index 112a1fd7b..876c60a64 100644 --- a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java +++ b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java @@ -53,6 +53,7 @@ import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; +import net.yacy.search.index.SolrField; public class SolrSingleConnector implements SolrConnector { @@ -108,7 +109,7 @@ public class SolrSingleConnector implements SolrConnector { } this.server.setAllowCompression(true); this.server.setConnectionTimeout(60000); - this.server.setMaxRetries(10); + this.server.setMaxRetries(1); // Solr-Doc: No more than 1 recommended (depreciated) this.server.setSoTimeout(60000); } @@ -169,7 +170,7 @@ public class SolrSingleConnector implements SolrConnector { @Override public boolean exists(final String id) throws IOException { try { - final SolrDocumentList list = get("id:" + id, 0, 1); + final SolrDocumentList list = get(SolrField.id.getSolrFieldName() + ":" + id, 0, 1); return list.getNumFound() > 0; } catch (final Throwable e) { Log.logException(e); @@ -195,7 +196,7 @@ public class SolrSingleConnector implements SolrConnector { @Override public void add(final SolrDoc solrdoc) throws IOException, SolrException { try { - this.server.add(solrdoc); + this.server.add(solrdoc,180000); // commitWithIn 180s //this.server.commit(); } catch (SolrServerException e) { Log.logWarning("SolrConnector", e.getMessage() + " DOC=" + solrdoc.toString()); @@ -208,7 +209,7 @@ public class SolrSingleConnector implements SolrConnector { ArrayList l = new ArrayList(); for (SolrDoc d: solrdocs) l.add(d); try { - this.server.add(l); + this.server.add(l,180000); // commitWithIn 120s //this.server.commit(); } catch (SolrServerException e) { Log.logWarning("SolrConnector", e.getMessage() + " DOC=" + solrdocs.toString()); @@ -263,12 +264,12 @@ public class SolrSingleConnector implements SolrConnector { public static void main(final String args[]) { SolrSingleConnector solr; try { - //SolrScheme scheme = new SolrScheme(); solr = new SolrSingleConnector("http://127.0.0.1:8983/solr"); solr.clear(); - final File exampleDir = new File("/Data/workspace2/yacy/test/parsertest/"); + final File exampleDir = new File("test/parsertest/"); long t, t0, a = 0; int c = 0; + System.out.println("push files in " + exampleDir.getAbsolutePath() + " to Solr"); for (final String s: exampleDir.list()) { if (s.startsWith(".")) continue; t = System.currentTimeMillis(); diff --git a/source/net/yacy/cora/storage/ConfigurationSet.java b/source/net/yacy/cora/storage/ConfigurationSet.java index bb0e84497..fd6c02216 100644 --- a/source/net/yacy/cora/storage/ConfigurationSet.java +++ b/source/net/yacy/cora/storage/ConfigurationSet.java @@ -30,11 +30,12 @@ import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; -import java.util.AbstractSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.Set; - +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; +import net.yacy.kelondro.util.FileUtils; +import net.yacy.cora.storage.ConfigurationSet.Entry; +import net.yacy.search.index.SolrField; /** * this class reads configuration attributes as a list of keywords from a list * the list may contain lines with one keyword, comment lines, empty lines and out-commented keyword lines @@ -44,61 +45,85 @@ import java.util.Set; * - all lines beginning with '##' are comments * - all non-empty lines not beginning with '#' are keyword lines * - all lines beginning with '#' and where the second character is not '#' are commented-out keyword lines - * + * - all text after a '#' not at beginn of line is treated as comment (like 'key = value # comment' ) + * - a line may contain a key only or a key=value pair * @author Michael Christen */ -public class ConfigurationSet extends AbstractSet implements Set { - +public class ConfigurationSet extends TreeMap { + private static final long serialVersionUID = 1L; private final File file; - private String[] lines; public ConfigurationSet() { this.file = null; - this.lines = new String[0]; } public ConfigurationSet(final File file) { this.file = file; try { final BufferedReader br = new BufferedReader(new FileReader(this.file)); - final LinkedList sl = new LinkedList(); String s; - while ((s = br.readLine()) != null) sl.add(s.trim()); - this.lines = new String[sl.size()]; - int c = 0; - for (final String s0: sl) this.lines[c++] = s0; - } catch (final IOException e) { - this.lines = new String[0]; - } + boolean enabled; + String comment, key, value; + int i; + comment = null; + while ((s = br.readLine()) != null) { + + if (s.startsWith("##") || s.isEmpty()){ + // is comment line - do nothing + if (s.startsWith("##")) comment = s.substring(2); + continue; + } else { + if (s.startsWith("#")) { + enabled = false ; + s = s.substring (1).trim(); + } else { + enabled = true; + } + if (s.contains("#")) { + // second # = text afterwards is a comment + i = s.indexOf("#"); + comment = s.substring(i+1); + s = s.substring(0,i).trim(); + } else { + // comment = null; + } + if (s.contains("=")) { + i = s.indexOf("="); + key = s.substring(0,i).trim(); + value = s.substring(i+1).trim(); + if (value.isEmpty()) value = null; + + } else { + key = s.trim(); + value = null; + } + if (!key.isEmpty()) { + Entry entry = new Entry(key, value, enabled); + if (comment != null) { + entry.setComment(comment); + comment = null; + } + this.put(key, entry); + } + } + } + } catch (final IOException e) {} } /** * override the abstract implementation because that is not stable in concurrent requests */ - @Override - public boolean contains(Object o) { - if (o == null || !(o instanceof String)) return false; - String s = (String) o; - synchronized (this) { - for (String line : this.lines) { - if (line != null && line.equals(s)) return true; - } - } - return false; + public boolean contains (String key) { + if (key == null) return false; + Entry e = this.get(key); + return e == null ? false : e.enabled(); } - public boolean containsDisabled(final String o) { if (o == null) return false; - final Iterator i = new EntryIterator(); - Entry e; - while (i.hasNext()) { - e = i.next(); - if (!e.enabled() && o.equals(e.key)) return true; - } - return false; + Entry e = this.get(o); + return e == null ? false : !e.enabled(); } - @Override public boolean add(final String key) { return add(key, null); } @@ -108,244 +133,205 @@ public class ConfigurationSet extends AbstractSet implements Set } public boolean add(final String key, final String comment, final boolean enabled) { - if (contains(key)) { - try { - if (!enabled) disable(key); - } catch (final IOException e) { + boolean modified = false; + Entry entry = get(key); + if (entry == null) { + entry = new Entry (key,enabled); + if (comment != null) entry.setComment(comment); + this.put (key,entry); + modified = true; + } else { + if (entry.enabled() != enabled) { + entry.setEnable(enabled); + modified = true; } - return true; - } - if (containsDisabled(key)) { - try { - if (enabled) enable(key); - } catch (final IOException e) { + if ( (comment != null) && ( !comment.equals(entry.getComment()) )) { + entry.setComment(comment); + modified = true; } - return false; } - // extend the lines - final String[] l = new String[this.lines.length + (comment == null ? 2 : 3)]; - System.arraycopy(this.lines, 0, l, 0, this.lines.length); - l[this.lines.length] = ""; - if (comment != null) l[this.lines.length + 1] = "## " + comment; - l[this.lines.length + (comment == null ? 1 : 2)] = enabled ? key : "#" + key; - this.lines = l; + try { - commit(); + if (modified) { + commit(); + try { + SolrField f = SolrField.valueOf(key); + f.setSolrFieldName(entry.getValue()); + } catch (IllegalArgumentException e) {} + } + } catch (final IOException e) { } - return false; + return modified; } public void fill(final ConfigurationSet other, final boolean defaultActivated) { - final Iterator i = other.allIterator(); - Entry e; + final Iterator i = other.entryIterator(); + Entry e, enew = null; while (i.hasNext()) { e = i.next(); if (contains(e.key) || containsDisabled(e.key)) continue; - this.add(e.key(), other.commentHeadline(e.key()), defaultActivated && e.enabled()); + // add as new entry + enew = new Entry(e.key(),e.getValue(),defaultActivated && e.enabled()); + enew.setComment(e.getComment()); + this.put (e.key(),enew); + } + if (enew != null) { + try { + commit(); + } catch (IOException ex) { + Logger.getLogger(ConfigurationSet.class.getName()).log(Level.SEVERE, null, ex); + } } - } - - @Override - public boolean isEmpty() { - // a shortcut to a fast 'true' in case that we initialized the class without a configuration file - return this.lines == null || this.lines.length == 0 || super.isEmpty(); } /** * save the configuration back to the file * @throws IOException */ - private void commit() throws IOException { + public void commit() throws IOException { if (this.file == null) return; - final BufferedWriter writer = new BufferedWriter(new FileWriter(this.file)); - for (final String s: this.lines) { - writer.write(s); - writer.write("\n"); - } - writer.close(); - } - - @Override - public Iterator iterator() { - return new LineIterator(true); - } - - public Iterator disabledIterator() { - return new LineIterator(false); - } - - public Iterator allIterator() { - return new EntryIterator(); - } - - private boolean isCommentLine(final int line) { - return this.lines[line].startsWith("##"); - } - - private boolean isKeyLine(final int line) { - return this.lines[line].length() > 0 && this.lines[line].charAt(0) != '#'; - } + // create a temporary bak file, use it as template to preserve user comments + File bakfile = new File (file.getAbsolutePath() + ".bak"); + FileUtils.copy (this.file, bakfile); - private boolean isDisabledLine(final int line) { - return this.lines[line].length() > 1 && this.lines[line].charAt(0) == '#' && this.lines[line].charAt(1) != '#'; - } + TreeMap tclone = (TreeMap) this.clone(); // clone to write appended entries - public void enable(final String key) throws IOException { - for (int i = 0; i < this.lines.length; i++) { - if (isDisabledLine(i) && this.lines[i].substring(1).trim().equals(key)) { - this.lines[i] = key; - commit(); - return; + final BufferedWriter writer = new BufferedWriter(new FileWriter(this.file)); + try { + final BufferedReader reader = new BufferedReader(new FileReader(bakfile)); + String s, sorig; + String key; + int i; + while ((sorig = reader.readLine()) != null) { + + if (sorig.startsWith("##") || sorig.isEmpty()){ + // is comment line - write as is + writer.write(sorig + "\n"); + continue; + } else { + if (sorig.startsWith("#")) { + s = sorig.substring (1).trim(); + } else { + s = sorig; + } + if (s.contains("#")) { + // second # = is a line comment + i = s.indexOf("#"); + s = s.substring(0,i).trim(); + } + if (s.contains("=")) { + i = s.indexOf("="); + key = s.substring(0,i).trim(); + } else { + key = s.trim(); + } + if (!key.isEmpty()) { + Entry e = this.get(key); + if (e != null) { + writer.write (e.toString()); + tclone.remove(key); // remove written entries from clone + } else {writer.write(sorig); } + writer.write("\n"); + } else { + writer.write(sorig+"\n"); + } + } } + reader.close(); + bakfile.delete(); + } catch (final IOException e) {} + + // write remainig entries (not already written) + Iterator ie = tclone.entrySet().iterator(); + while (ie.hasNext()) { + Object e = ie.next(); + writer.write (e.toString() + "\n"); } + writer.close(); } - - public void disable(final String key) throws IOException { - for (int i = 0; i < this.lines.length; i++) { - if (isKeyLine(i) && this.lines[i].equals(key)) { - this.lines[i] = "#" + key; - commit(); - return; - } - } + /* + public Iterator iterator() { + return this.keySet().iterator(); } - - public String commentHeadline(final String key) { - for (int i = 1; i < this.lines.length; i++) { - if (this.lines[i].equals(key) || - (isDisabledLine(i) && this.lines[i].substring(1).trim().equals(key)) - ) { - return isCommentLine(i - 1) ? this.lines[i - 1].substring(2).trim() : ""; - } - } - return ""; +*/ + public Iterator entryIterator() { + return this.values().iterator(); } - public class LineIterator implements Iterator { - - EntryIterator i; - Entry nextEntry; - private final boolean enabled; + public class Entry { + private final String key; + private String value; + private boolean enabled; + private String comment; - public LineIterator(final boolean enabled) { + public Entry(final String key, final boolean enabled) { this.enabled = enabled; - this.i = new EntryIterator(); - findNextValid(); - } - - public void findNextValid() { - while (this.i.hasNext()) { - this.nextEntry = this.i.next(); - if (this.nextEntry.enabled() == this.enabled) return; + // split in key, value if line contains a "=" (equal sign) e.g. myattribute = 123 + // for backward compatibility here the key parameter is checked to contain a "=" + if (key.contains("=")) { + int i = key.indexOf("="); + this.key = key.substring(0,i).trim(); + this.value = key.substring(i+1).trim(); + } else { + this.key = key; + this.value = null; } - this.nextEntry = null; - } - - @Override - public boolean hasNext() { - return this.nextEntry != null; } - - @Override - public String next() { - if (this.nextEntry == null) return null; - final String s = this.nextEntry.key(); - findNextValid(); - return s; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - } - - public class EntryIterator implements Iterator { - - private int line; - - public EntryIterator() { - this.line = -1; - findNextKeywordLine(); + public Entry (final String key, String value, final boolean enabled) { + this.enabled = enabled; + this.key = key; + this.value = value; } - - /** - * increase line counter until it points to the next keyword line - * @return true if a next line was found, false if EOL - */ - private boolean findNextKeywordLine() { - this.line++; - if (this.line >= ConfigurationSet.this.lines.length) return false; - while (ConfigurationSet.this.lines[this.line].length() == 0 || - ConfigurationSet.this.lines[this.line].startsWith("##")) { - this.line++; - if (this.line >= ConfigurationSet.this.lines.length) return false; - } - return true; + public String key() { + return this.key; } - - @Override - public boolean hasNext() { - return this.line < ConfigurationSet.this.lines.length; + public void setValue(String theValue) { + //empty string not wanted + if ((theValue != null) && theValue.isEmpty()) { + this.value = null; + } else { + this.value = theValue; + } } - - @Override - public Entry next() { - final String s = ConfigurationSet.this.lines[this.line]; - findNextKeywordLine(); - if (s.charAt(0) == '#') return new Entry(s.substring(1).trim(), false); - return new Entry(s, true); + public String getValue() { + return this.value; } - - @Override - public void remove() { - throw new UnsupportedOperationException(); + public void setComment(String comment) { + this.comment = comment; } - - } - - public class Entry { - private final String key; - private final boolean enabled; - public Entry(final String key, final boolean enabled) { - this.enabled = enabled; - this.key = key; + public String getComment() { + return this.comment; } - public String key() { - return this.key; + public void setEnable(boolean value){ + this.enabled = value; } public boolean enabled() { return this.enabled; } - } - - @Override - public int size() { - int c = 0; - for (final String s: this.lines) { - if (s.length() > 0 && s.charAt(0) != '#') c++; + @Override + public String toString(){ + // output string to write to config file + return (this.enabled ? "" : "#") + (this.value != null ? this.key + " = " + this.value : this.key ) + (this.comment != null ? " #" + this.comment : ""); } - return c; } public static void main(final String[] args) { if (args.length == 0) return; - final File f = new File(args[0]); + final File f = new File (args[0]); final ConfigurationSet cs = new ConfigurationSet(f); - Iterator i = cs.iterator(); - String k; + Iterator i = cs.entryIterator(); + Entry k; System.out.println("\nall activated attributes:"); while (i.hasNext()) { k = i.next(); - System.out.println(k + " - " + cs.commentHeadline(k)); + if (k.enabled()) System.out.println(k.toString()); } - i = cs.disabledIterator(); + i = cs.entryIterator(); System.out.println("\nall deactivated attributes:"); while (i.hasNext()) { k = i.next(); - System.out.println(k + " - " + cs.commentHeadline(k)); + if (!k.enabled()) System.out.println(k.toString() ); } } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 3bc44b88d..37a53f424 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -645,8 +645,8 @@ public final class Switchboard extends serverSwitch this.solrScheme = new SolrConfiguration(solrWorkProfile); // update the working scheme with the backup scheme. This is necessary to include new features. - // new features are always activated by default - this.solrScheme.fill(backupScheme, false); + // new features are always activated by default (if activated in input-backupScheme) + this.solrScheme.fill(backupScheme, true); // set up the solr interface final String solrurls = getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr"); @@ -1538,7 +1538,7 @@ public final class Switchboard extends serverSwitch * @throws InterruptedException if the current thread has been interrupted, i.e. by the shutdown * procedure */ - public boolean cleanProfiles() throws InterruptedException { + public boolean cleanProfiles() throws InterruptedException { if (getIndexingProcessorsQueueSize() > 0 || this.crawlQueues.workerSize() > 0 || this.crawlQueues.coreCrawlJobSize() > 0 || @@ -1546,7 +1546,7 @@ public final class Switchboard extends serverSwitch this.crawlQueues.remoteTriggeredCrawlJobSize() > 0 || this.crawlQueues.noloadCrawlJobSize() > 0 || (this.crawlStacker != null && !this.crawlStacker.isEmpty()) || - this.crawlQueues.noticeURL.notEmpty()) { + this.crawlQueues.noticeURL.notEmpty()) { return false; } return this.crawler.clear(); diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 3272a9d79..739938e34 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -29,13 +29,7 @@ import java.io.File; import java.io.IOException; import java.net.InetAddress; import java.net.MalformedURLException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; +import java.util.*; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.MultiProtocolURI; @@ -65,26 +59,24 @@ public class SolrConfiguration extends ConfigurationSet { /** * initialize the scheme with a given configuration file * the configuration file simply contains a list of lines with keywords + * or keyword = value lines (while value is a custom Solr field name * @param configurationFile */ public SolrConfiguration(final File configurationFile) { super(configurationFile); // check consistency: compare with YaCyField enum - for (String name: this) { + if (this.isEmpty()) return; + Iterator it = this.entryIterator(); + for (ConfigurationSet.Entry etr = it.next(); it.hasNext(); etr = it.next()) { try { - SolrField.valueOf(name); + SolrField f = SolrField.valueOf(etr.key()); + f.setSolrFieldName(etr.getValue()); } catch (IllegalArgumentException e) { - Log.logWarning("SolrScheme", "solr scheme file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + name + "'"); + Log.logWarning("SolrScheme", "solr scheme file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + etr.toString() + "'"); + it.remove(); } } - /* - for (YaCyField field: YaCyField.values()) { - if (!this.contains(field.name())) { - Log.logWarning("SolrScheme", "solr scheme file " + configurationFile.getAbsolutePath() + " omits known attribute '" + field.name() + "'"); } - } - */ - } protected void addSolr(final SolrDoc solrdoc, final SolrField key, final String value) { if (isEmpty() || contains(key.name())) solrdoc.addSolr(key, value); @@ -114,8 +106,30 @@ public class SolrConfiguration extends ConfigurationSet { if (isEmpty() || contains(key.name())) solrdoc.addSolr(key, value, boost); } + + /** + * save configuration to file and update enum SolrFields + * @throws IOException + */ + @Override + public void commit() throws IOException { + try { + super.commit(); + // make sure the enum SolrField.SolrFieldName is current + Iterator it = this.entryIterator(); + for (ConfigurationSet.Entry etr = it.next(); it.hasNext(); etr = it.next()) { + try { + SolrField f = SolrField.valueOf(etr.key()); + f.setSolrFieldName(etr.getValue()); + } catch (IllegalArgumentException e) { + continue; + } + } + } catch (final IOException e) {} + } + public SolrDoc yacy2solr(final String id, final ResponseHeader header, final Document yacydoc) { - // we user the SolrCell design as index scheme + // we use the SolrCell design as index scheme final SolrDoc solrdoc = new SolrDoc(); final DigestURI digestURI = new DigestURI(yacydoc.dc_source()); addSolr(solrdoc, SolrField.failreason_t, ""); // overwrite a possible fail reason (in case that there was a fail reason before) @@ -453,39 +467,39 @@ public class SolrConfiguration extends ConfigurationSet { } public String solrGetID(final SolrDocument solr) { - return (String) solr.getFieldValue("id"); + return (String) solr.getFieldValue(SolrField.id.getSolrFieldName()); } public DigestURI solrGetURL(final SolrDocument solr) { try { - return new DigestURI((String) solr.getFieldValue("sku")); + return new DigestURI((String) solr.getFieldValue(SolrField.sku.getSolrFieldName())); } catch (final MalformedURLException e) { return null; } } public String solrGetTitle(final SolrDocument solr) { - return (String) solr.getFieldValue("title"); + return (String) solr.getFieldValue(SolrField.title.getSolrFieldName()); } public String solrGetText(final SolrDocument solr) { - return (String) solr.getFieldValue("text_t"); + return (String) solr.getFieldValue(SolrField.text_t.getSolrFieldName()); } public String solrGetAuthor(final SolrDocument solr) { - return (String) solr.getFieldValue("author"); + return (String) solr.getFieldValue(SolrField.author.getSolrFieldName()); } public String solrGetDescription(final SolrDocument solr) { - return (String) solr.getFieldValue("description"); + return (String) solr.getFieldValue(SolrField.description.getSolrFieldName()); } public Date solrGetDate(final SolrDocument solr) { - return (Date) solr.getFieldValue("last_modified"); + return (Date) solr.getFieldValue(SolrField.last_modified.getSolrFieldName()); } public Collection solrGetKeywords(final SolrDocument solr) { - final Collection c = solr.getFieldValues("keywords"); + final Collection c = solr.getFieldValues(SolrField.keywords.getSolrFieldName()); final ArrayList a = new ArrayList(); for (final Object s: c) { a.add((String) s); diff --git a/source/net/yacy/search/index/SolrField.java b/source/net/yacy/search/index/SolrField.java index 676877768..d822732e1 100644 --- a/source/net/yacy/search/index/SolrField.java +++ b/source/net/yacy/search/index/SolrField.java @@ -28,7 +28,7 @@ import net.yacy.cora.services.federated.solr.SolrType; public enum SolrField implements net.yacy.cora.services.federated.solr.SolrField { - id(SolrType.string, true, true, "primary key of document, the URL hash"), + id(SolrType.string, true, true, "primary key of document, the URL hash **mandatory field**"), sku(SolrType.text_en_splitting_tight, true, true, false, true, "url of document"), ip_s(SolrType.string, true, true, "ip of host of url (after DNS lookup)"), host_s(SolrType.string, true, true, "host of the url"), @@ -121,10 +121,11 @@ public enum SolrField implements net.yacy.cora.services.federated.solr.SolrField ext_title_val(SolrType.integer, true, true, true, "number of matching title expressions"), failreason_t(SolrType.text_general, true, true, "fail reason if a page was not loaded. if the page was loaded then this field is empty"); - final SolrType type; - final boolean indexed, stored; - boolean multiValued, omitNorms; - final String comment; + private String solrFieldName = null; // solr field name in custom solr schema, defaults to solcell schema field name (= same as this.name() ) + private final SolrType type; + private final boolean indexed, stored; + private boolean multiValued, omitNorms; + private String comment; private SolrField(final SolrType type, final boolean indexed, final boolean stored, final String comment) { this.type = type; @@ -145,6 +146,27 @@ public enum SolrField implements net.yacy.cora.services.federated.solr.SolrField this.omitNorms = omitNorms; } + /** + * Returns the YaCy default or (if available) custom field name for Solr + * @return SolrFieldname String + */ + public final String getSolrFieldName() { + return (this.solrFieldName == null ? this.name() : this.solrFieldName); + } + + /** + * Set a custom Solr field name (and converts it to lower case) + * @param theValue = the field name + */ + public final void setSolrFieldName(String theValue) { + // make sure no empty string is assigned + if ( (theValue != null) && (!theValue.isEmpty()) ) { + this.solrFieldName = theValue.toLowerCase(); + } else { + this.solrFieldName = null; + } + } + public final SolrType getType() { return this.type; } diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index e2772b0c2..9eeb816d0 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -62,6 +62,7 @@ import org.apache.solr.common.SolrDocumentList; import de.anomic.crawler.Cache; import de.anomic.data.WorkTables; +import net.yacy.search.index.SolrField; public class SnippetProcess { @@ -497,7 +498,7 @@ public class SnippetProcess { String solrContent = null; if (this.solr != null) { SolrDocument sd = null; - final SolrDocumentList sdl = this.solr.get("id:" + ASCII.String(page.hash()), 0, 1); + final SolrDocumentList sdl = this.solr.get(SolrField.id.getSolrFieldName()+ ":" + ASCII.String(page.hash()), 0, 1); if (sdl.size() > 0) { sd = sdl.get(0); }