- generalized SchemaConfiguration into super-class Configuration and

adopted other classes which used the configuration-only access for that
class
- removed many warnings
- adjusted logging
pull/1/head
Michael Peter Christen 12 years ago
parent c16de49f64
commit 089dee1770

1
.gitignore vendored

@ -10,3 +10,4 @@ RELEASE/
/yacy.pid /yacy.pid
.DS_Store .DS_Store
lib/yacy-cora.jar lib/yacy-cora.jar
/DATA.bkp

@ -80,12 +80,18 @@ org.apache.pdfbox.level = INFO
# Properties for solr # Properties for solr
org.apache.solr.core.Config.level = INFO org.apache.solr.core.Config.level = INFO
org.apache.solr.core.SolrCore.level = SEVERE
org.apache.solr.core.SolrResourceLoader.level = INFO org.apache.solr.core.SolrResourceLoader.level = INFO
org.apache.solr.core.CachingDirectoryFactory.level = OFF org.apache.solr.core.CachingDirectoryFactory.level = OFF
org.apache.solr.util.plugin.AbstractPluginLoader.level = INFO org.apache.solr.util.plugin.AbstractPluginLoader.level = INFO
org.apache.solr.schema.IndexSchema.level = INFO org.apache.solr.schema.IndexSchema.level = INFO
org.apache.solr.schema.FieldTypePluginLoader.level = INFO
org.apache.solr.handler.UpdateRequestHandler.level = INFO org.apache.solr.handler.UpdateRequestHandler.level = INFO
org.apache.solr.handler.loader.XMLLoader.level = INFO org.apache.solr.handler.loader.XMLLoader.level = INFO
org.apache.solr.search.SolrIndexSearcher.level = INFO org.apache.solr.search.SolrIndexSearcher.level = INFO
org.apache.solr.update.processor.LogUpdateProcessor.level = OFF org.apache.solr.update.processor.LogUpdateProcessor.level = OFF
org.apache.solr.update.SolrIndexWriter.level = INFO
# java properties
javax.management.misc.level = INFO
javax.management.mbeanserver.level = INFO javax.management.mbeanserver.level = INFO

@ -75,11 +75,9 @@ network.unit.bootstrap.seedlist0 = http://www.yacy.net/seed.txt
network.unit.bootstrap.seedlist1 = http://home.arcor.de/hermens/yacy/seed.txt network.unit.bootstrap.seedlist1 = http://home.arcor.de/hermens/yacy/seed.txt
network.unit.bootstrap.seedlist2 = http://low.audioattack.de/yacy/seed.txt network.unit.bootstrap.seedlist2 = http://low.audioattack.de/yacy/seed.txt
network.unit.bootstrap.seedlist3 = http://www.lulabad.de/seed.txt network.unit.bootstrap.seedlist3 = http://www.lulabad.de/seed.txt
network.unit.bootstrap.seedlist4 = http://fennec.cx/yacy/seed.txt network.unit.bootstrap.seedlist4 = http://sixcooler.de/yacy/seed.txt
network.unit.bootstrap.seedlist5 = http://sixcooler.de/yacy/seed.txt network.unit.bootstrap.seedlist5 = http://headrift.dyndns.org/yacy/seed.txt
network.unit.bootstrap.seedlist6 = http://headrift.dyndns.org/yacy/seed.txt network.unit.bootstrap.seedlist6 = http://dk5ras.dyndns.org/seed.txt
network.unit.bootstrap.seedlist7 = http://dk5ras.dyndns.org/seed.txt
network.unit.bootstrap.seedlist8 = http://yacy.bonus-communis-bibliotheca.eu/seed.txt
# each network may use different yacy distributions. # each network may use different yacy distributions.
# the auto-updater can access network-specific update locations # the auto-updater can access network-specific update locations

@ -28,7 +28,9 @@
import com.google.common.io.Files; import com.google.common.io.Files;
import java.io.File; import java.io.File;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.Configuration;
import net.yacy.data.WorkTables; import net.yacy.data.WorkTables;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
@ -152,26 +154,32 @@ public class ConfigHeuristics_p {
// display config file content // display config file content
final File f = new File (sb.getDataPath(),"DATA/SETTINGS/heuristicopensearch.conf"); final File f = new File (sb.getDataPath(),"DATA/SETTINGS/heuristicopensearch.conf");
SchemaConfiguration p = new SchemaConfiguration(f);
int c = 0; try {
boolean dark = false; Configuration p = new Configuration(f);
Iterator<SchemaConfiguration.Entry> i = p.entryIterator(); int c = 0;
while (i.hasNext()) { boolean dark = false;
SchemaConfiguration.Entry e = i.next(); Iterator<Configuration.Entry> i = p.entryIterator();
prop.put("osdcfg_" + c + "_dark", dark ? 1 : 0); while (i.hasNext()) {
dark = !dark; SchemaConfiguration.Entry e = i.next();
prop.put("osdcfg_" + c + "_checked", e.enabled() ? 1 : 0); prop.put("osdcfg_" + c + "_dark", dark ? 1 : 0);
prop.putHTML("osdcfg_" + c + "_title", e.key()); dark = !dark;
prop.putHTML("osdcfg_" + c + "_comment", e.getComment() != null ? e.getComment() : ""); prop.put("osdcfg_" + c + "_checked", e.enabled() ? 1 : 0);
prop.putHTML("osdcfg_" + c + "_title", e.key());
String tmps = e.getValue(); prop.putHTML("osdcfg_" + c + "_comment", e.getComment() != null ? e.getComment() : "");
prop.putHTML("osdcfg_" + c + "_url", tmps);
tmps = tmps.substring(0,tmps.lastIndexOf("/")); String tmps = e.getValue();
prop.putHTML("osdcfg_" + c + "_urlhostlink", tmps); prop.putHTML("osdcfg_" + c + "_url", tmps);
tmps = tmps.substring(0,tmps.lastIndexOf("/"));
c++; prop.putHTML("osdcfg_" + c + "_urlhostlink", tmps);
c++;
}
prop.put("osdcfg", c);
} catch (IOException e1) {
Log.logException(e1);
prop.put("osdcfg", 0);
} }
prop.put("osdcfg", c);
prop.putHTML("osderrmsg",osderrmsg); prop.putHTML("osderrmsg",osderrmsg);
return prop; return prop;
} }
@ -180,40 +188,45 @@ public class ConfigHeuristics_p {
// read index schema table flags // read index schema table flags
final File f = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf"); final File f = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
SchemaConfiguration cfg = new SchemaConfiguration(f); try {
final Iterator<SchemaConfiguration.Entry> cfgentries = cfg.entryIterator(); Configuration cfg = new Configuration(f);
SchemaConfiguration.Entry entry; final Iterator<Configuration.Entry> cfgentries = cfg.entryIterator();
boolean modified = false; // flag to remember changes Configuration.Entry entry;
while (cfgentries.hasNext()) { boolean modified = false; // flag to remember changes
entry = cfgentries.next(); while (cfgentries.hasNext()) {
final String sfn = post.get("ossys_url_" + entry.key()); entry = cfgentries.next();
if (sfn != null) { final String sfn = post.get("ossys_url_" + entry.key());
if (!sfn.equals(entry.getValue())) { if (sfn != null) {
entry.setValue(sfn); if (!sfn.equals(entry.getValue())) {
entry.setValue(sfn);
modified = true;
}
}
// set enable flag
String v = post.get("ossys_" + entry.key());
boolean c = v != null && v.equals("checked");
if (entry.enabled() != c) {
entry.setEnable(c);
modified = true; modified = true;
} }
} // delete entry from config
// set enable flag v = post.get("ossys_del_" + entry.key());
String v = post.get("ossys_" + entry.key()); c = v != null && v.equals("checked");
boolean c = v != null && v.equals("checked"); if (c) {
if (entry.enabled() != c) { cfgentries.remove();
entry.setEnable(c); modified = true;
modified = true; }
}
// delete entry from config
v = post.get("ossys_del_" + entry.key());
c = v != null && v.equals("checked");
if (c) {
cfgentries.remove();
modified = true;
} }
} if (modified) { // save settings to config file if modified
if (modified) { // save settings to config file if modified try {
try { cfg.commit();
cfg.commit(); } catch (IOException ex) {
} catch (IOException ex) { }
} }
} catch (IOException e) {
Log.logException(e);
} }
// re-read config (and create/update work table) // re-read config (and create/update work table)
if (sb.getConfigBool("heuristic.opensearch", true)) { if (sb.getConfigBool("heuristic.opensearch", true)) {
OpenSearchConnector os = new OpenSearchConnector(sb, true); OpenSearchConnector os = new OpenSearchConnector(sb, true);

@ -35,7 +35,7 @@
} }
.ac_loading { .ac_loading {
background: white url('indicator.gif') right center no-repeat; background: white right center no-repeat;
} }
.ac_odd { .ac_odd {

@ -28,9 +28,8 @@ import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set; import java.util.Set;
import net.yacy.cora.federate.solr.SchemaConfiguration;
import net.yacy.cora.federate.solr.SchemaConfiguration.Entry;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.storage.Configuration;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.parser.xml.opensearchdescriptionReader; import net.yacy.document.parser.xml.opensearchdescriptionReader;
import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables;
@ -64,12 +63,12 @@ public class OpenSearchConnector {
if (createworktable) { // read from config file and create worktable if (createworktable) { // read from config file and create worktable
sb.tables.clear("opensearchsys"); sb.tables.clear("opensearchsys");
try { try {
SchemaConfiguration cfg = new SchemaConfiguration(confFile); Configuration cfg = new Configuration(confFile);
// copy active opensearch systems to a work table (opensearchsys) // copy active opensearch systems to a work table (opensearchsys)
Iterator<Entry> cfgentries = cfg.entryIterator(); Iterator<Configuration.Entry> cfgentries = cfg.entryIterator();
while (cfgentries.hasNext()) { while (cfgentries.hasNext()) {
Entry e = cfgentries.next(); Configuration.Entry e = cfgentries.next();
if (e.enabled()) { if (e.enabled()) {
String title = e.key(); // get the title String title = e.key(); // get the title
String urlstr = e.getValue(); // get the search template url String urlstr = e.getValue(); // get the search template url
@ -141,20 +140,25 @@ public class OpenSearchConnector {
return false; return false;
} }
SchemaConfiguration conf = new SchemaConfiguration(confFile); try {
if (name != null && !name.isEmpty()) { Configuration conf = new Configuration(confFile);
conf.add(name, null, active); if (name != null && !name.isEmpty()) {
Entry e = conf.get(name); conf.add(name, null, active);
e.setValue(url); Configuration.Entry e = conf.get(name);
e.setEnable(active); e.setValue(url);
e.setComment(comment); e.setEnable(active);
conf.put(name, e); e.setComment(comment);
try { conf.put(name, e);
conf.commit(); try {
} catch (IOException ex) { conf.commit();
Log.logWarning("OpenSearchConnector.add", "config file write error"); } catch (IOException ex) {
Log.logWarning("OpenSearchConnector.add", "config file write error");
}
return true;
} }
return true; } catch (IOException e1) {
Log.logException(e1);
return false;
} }
return false; return false;
} }

@ -20,156 +20,30 @@
package net.yacy.cora.federate.solr; package net.yacy.cora.federate.solr;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File; import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.Date; import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import net.yacy.cora.federate.solr.SchemaConfiguration.Entry; import net.yacy.cora.storage.Configuration;
import net.yacy.cora.storage.Files;
/** public class SchemaConfiguration extends Configuration implements Serializable {
* this class reads configuration attributes as a list of keywords from a list
* the list may contain lines with one keyword, comment lines, empty lines and out-commented keyword lines
* when an attribute is changed here, the list is stored again with the original formatting
*
* the syntax of configuration files:
* - all lines beginning with '##' are comments
* - all non-empty lines not beginning with '#' are keyword lines
* - all lines beginning with '#' and where the second character is not '#' are commented-out keyword lines
* - all text after a '#' not at beginn of line is treated as comment (like 'key = value # comment' )
* - a line may contain a key only or a key=value pair
* @author Michael Christen
*/
public class SchemaConfiguration extends TreeMap<String,Entry> implements Serializable {
private final static long serialVersionUID=-5961730809008841258L; private final static long serialVersionUID=-5961730809008841258L;
private final static Logger log = Logger.getLogger(SchemaConfiguration.class); private final static Logger log = Logger.getLogger(SchemaConfiguration.class);
private final File file;
protected boolean lazy;
public SchemaConfiguration() { public SchemaConfiguration() {
this.file = null; super();
this.lazy = false;
}
public SchemaConfiguration(final File file) {
this.file = file;
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(this.file));
String s;
boolean enabled;
String comment, key, value;
int i;
comment = null;
while ((s = br.readLine()) != null) {
if (s.startsWith("##") || s.isEmpty()){
// is comment line - do nothing
if (s.startsWith("##")) comment = s.substring(2);
continue;
}
if (s.startsWith("#")) {
enabled = false ;
s = s.substring (1).trim();
} else {
enabled = true;
}
if (s.contains("#")) {
// second # = text afterwards is a comment
i = s.indexOf("#");
comment = s.substring(i+1);
s = s.substring(0,i).trim();
} else {
// comment = null;
}
if (s.contains("=")) {
i = s.indexOf("=");
key = s.substring(0,i).trim();
value = s.substring(i+1).trim();
if (value.isEmpty()) value = null;
} else {
key = s.trim();
value = null;
}
if (!key.isEmpty()) {
Entry entry = new Entry(key, value, enabled);
if (comment != null) {
entry.setComment(comment);
comment = null;
}
this.put(key, entry);
}
}
} catch (final IOException e) {
log.warn(e);
} finally {
if (br != null) try {br.close();} catch (IOException e) {}
}
}
/**
* override the abstract implementation because that is not stable in concurrent requests
*/
public boolean contains(String key) {
if (key == null) return false;
Entry e = this.get(key);
return e == null ? false : e.enabled();
}
public boolean containsDisabled(final String o) {
if (o == null) return false;
Entry e = this.get(o);
return e == null ? false : !e.enabled();
}
public boolean add(final String key) {
return add(key, null);
} }
public boolean add(final String key, final String comment) { public SchemaConfiguration(final File file) throws IOException {
return add(key, comment, true); super(file);
}
public boolean add(final String key, final String comment, final boolean enabled) {
boolean modified = false;
Entry entry = get(key);
if (entry == null) {
entry = new Entry (key,enabled);
if (comment != null) entry.setComment(comment);
this.put (key,entry);
modified = true;
} else {
if (entry.enabled() != enabled) {
entry.setEnable(enabled);
modified = true;
}
if ( (comment != null) && ( !comment.equals(entry.getComment()) )) {
entry.setComment(comment);
modified = true;
}
}
try {
if (modified) {
commit();
}
} catch (final IOException e) {}
return modified;
} }
public void fill(final SchemaConfiguration other, final boolean defaultActivated) { public void fill(final SchemaConfiguration other, final boolean defaultActivated) {
@ -177,7 +51,7 @@ public class SchemaConfiguration extends TreeMap<String,Entry> implements Serial
Entry e, enew = null; Entry e, enew = null;
while (i.hasNext()) { while (i.hasNext()) {
e = i.next(); e = i.next();
if (contains(e.key) || containsDisabled(e.key)) continue; if (contains(e.key()) || containsDisabled(e.key())) continue;
// add as new entry // add as new entry
enew = new Entry(e.key(),e.getValue(),defaultActivated && e.enabled()); enew = new Entry(e.key(),e.getValue(),defaultActivated && e.enabled());
enew.setComment(e.getComment()); enew.setComment(e.getComment());
@ -241,158 +115,5 @@ public class SchemaConfiguration extends TreeMap<String,Entry> implements Serial
Date now = new Date(); Date now = new Date();
return (x == null) ? new Date(0) : x.after(now) ? now : x; return (x == null) ? new Date(0) : x.after(now) ? now : x;
} }
/**
* save the configuration back to the file
* @throws IOException
*/
public void commit() throws IOException {
if (this.file == null) return;
// create a temporary bak file, use it as template to preserve user comments
File bakfile = new File (this.file.getAbsolutePath() + ".bak");
try {
Files.copy(this.file, bakfile);
} catch (final IOException e) {
this.file.createNewFile();
}
@SuppressWarnings("unchecked")
TreeMap<String,Entry> tclone = (TreeMap<String,Entry>) this.clone(); // clone to write appended entries
final BufferedWriter writer = new BufferedWriter(new FileWriter(this.file));
try {
final BufferedReader reader = new BufferedReader(new FileReader(bakfile));
String s, sorig;
String key;
int i;
while ((sorig = reader.readLine()) != null) {
if (sorig.startsWith("##") || sorig.isEmpty()){
// is comment line - write as is
writer.write(sorig + "\n");
continue;
}
if (sorig.startsWith("#")) {
s = sorig.substring (1).trim();
} else {
s = sorig;
}
if (s.contains("#")) {
// second # = is a line comment
i = s.indexOf("#");
s = s.substring(0,i).trim();
}
if (s.contains("=")) {
i = s.indexOf("=");
key = s.substring(0,i).trim();
} else {
key = s.trim();
}
if (!key.isEmpty()) {
Entry e = this.get(key);
if (e != null) {
writer.write (e.toString());
tclone.remove(key); // remove written entries from clone
}
writer.write("\n");
} else {
writer.write(sorig+"\n");
}
}
reader.close();
bakfile.delete();
} catch (final IOException e) {}
// write remainig entries (not already written)
Iterator<Map.Entry<String,Entry>> ie = tclone.entrySet().iterator();
while (ie.hasNext()) {
Object e = ie.next();
writer.write (e.toString() + "\n");
}
writer.close();
}
public Iterator<Entry> entryIterator() {
return this.values().iterator();
}
public class Entry {
private final String key;
private String value;
private boolean enabled;
private String comment;
public Entry(final String key, final boolean enabled) {
this.enabled = enabled;
// split in key, value if line contains a "=" (equal sign) e.g. myattribute = 123
// for backward compatibility here the key parameter is checked to contain a "="
if (key.contains("=")) {
int i = key.indexOf("=");
this.key = key.substring(0,i).trim();
this.value = key.substring(i+1).trim();
} else {
this.key = key;
this.value = null;
}
}
public Entry (final String key, String value, final boolean enabled) {
this.enabled = enabled;
this.key = key;
this.value = value;
}
public String key() {
return this.key;
}
public void setValue(String theValue) {
//empty string not wanted
if ((theValue != null) && theValue.isEmpty()) {
this.value = null;
} else {
this.value = theValue;
}
}
public String getValue() {
return this.value;
}
public void setComment(String comment) {
this.comment = comment;
}
public String getComment() {
return this.comment;
}
public void setEnable(boolean value){
this.enabled = value;
}
public boolean enabled() {
return this.enabled;
}
@Override
public String toString(){
// output string to write to config file
return (this.enabled ? "" : "#") + (this.value != null ? this.key + " = " + this.value : this.key ) + (this.comment != null ? " #" + this.comment : "");
}
}
public static void main(final String[] args) {
if (args.length == 0) return;
final File f = new File (args[0]);
final SchemaConfiguration cs = new SchemaConfiguration(f);
Iterator<Entry> i = cs.entryIterator();
Entry k;
System.out.println("\nall activated attributes:");
while (i.hasNext()) {
k = i.next();
if (k.enabled()) System.out.println(k.toString());
}
i = cs.entryIterator();
System.out.println("\nall deactivated attributes:");
while (i.hasNext()) {
k = i.next();
if (!k.enabled()) System.out.println(k.toString() );
}
}
} }

@ -37,7 +37,6 @@ import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.util.LookAheadIterator; import net.yacy.cora.util.LookAheadIterator;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.FacetField; import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
@ -51,8 +50,6 @@ import org.apache.solr.common.params.ModifiableSolrParams;
public abstract class AbstractSolrConnector implements SolrConnector { public abstract class AbstractSolrConnector implements SolrConnector {
private final static Logger log = Logger.getLogger(AbstractSolrConnector.class);
public final static SolrDocument POISON_DOCUMENT = new SolrDocument(); public final static SolrDocument POISON_DOCUMENT = new SolrDocument();
public final static String POISON_ID = "POISON_ID"; public final static String POISON_ID = "POISON_ID";
public final static SolrQuery catchallQuery = new SolrQuery(); public final static SolrQuery catchallQuery = new SolrQuery();
@ -79,7 +76,6 @@ public abstract class AbstractSolrConnector implements SolrConnector {
long count = getQueryCount(fieldName + ":\"" + key + "\""); long count = getQueryCount(fieldName + ":\"" + key + "\"");
return count > 0; return count > 0;
} catch (final Throwable e) { } catch (final Throwable e) {
log.warn(e);
return false; return false;
} }
} }

@ -58,14 +58,19 @@ public class EmbeddedInstance implements SolrInstance {
if (!defaultCorePath.exists()) defaultCorePath.mkdirs(); if (!defaultCorePath.exists()) defaultCorePath.mkdirs();
// migrate old conf directory // migrate old conf directory
File conf = new File(defaultCorePath, "conf");
File oldConf = new File(containerPath, "conf"); File oldConf = new File(containerPath, "conf");
if (oldConf.exists()) oldConf.renameTo(conf); File confDir = new File(defaultCorePath, "conf");
if (oldConf.exists()) oldConf.renameTo(confDir);
// migrate old data directory // migrate old data directory
File oldData = new File(containerPath, "data"); File oldData = new File(containerPath, "data");
if (oldData.exists()) oldData.renameTo(new File(defaultCorePath, "data")); File dataDir = new File(defaultCorePath, "data");
if (oldData.exists()) oldData.renameTo(dataDir);
// create index subdirectory in data if it does not exist
File indexDir = new File(dataDir, "index");
if (!indexDir.exists()) indexDir.mkdirs();
// initialize the cores' configuration // initialize the cores' configuration
for (String coreName: initializeCoreNames) { for (String coreName: initializeCoreNames) {
initializeCoreConf(solr_config, containerPath, coreName); initializeCoreConf(solr_config, containerPath, coreName);

@ -0,0 +1,324 @@
/**
* Configuration
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
* First released 29.06.2011 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Serializable;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import net.yacy.cora.storage.Configuration.Entry;
import net.yacy.cora.storage.Files;
/**
* this class reads configuration attributes as a list of keywords from a list
* the list may contain lines with one keyword, comment lines, empty lines and out-commented keyword lines
* when an attribute is changed here, the list is stored again with the original formatting
*
* the syntax of configuration files:
* - all lines beginning with '##' are comments
* - all non-empty lines not beginning with '#' are keyword lines
* - all lines beginning with '#' and where the second character is not '#' are commented-out keyword lines
* - all text after a '#' not at beginn of line is treated as comment (like 'key = value # comment' )
* - a line may contain a key only or a key=value pair
* @author Michael Christen
*/
public class Configuration extends TreeMap<String, Entry> implements Serializable {
private final static long serialVersionUID=-5961730809008841258L;
private final File file;
protected boolean lazy;
public Configuration() {
this.file = null;
this.lazy = false;
}
public Configuration(final File file) throws IOException {
this.file = file;
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(this.file));
String s;
boolean enabled;
String comment, key, value;
int i;
comment = null;
while ((s = br.readLine()) != null) {
if (s.startsWith("##") || s.isEmpty()){
// is comment line - do nothing
if (s.startsWith("##")) comment = s.substring(2);
continue;
}
if (s.startsWith("#")) {
enabled = false ;
s = s.substring (1).trim();
} else {
enabled = true;
}
if (s.contains("#")) {
// second # = text afterwards is a comment
i = s.indexOf("#");
comment = s.substring(i+1);
s = s.substring(0,i).trim();
} else {
// comment = null;
}
if (s.contains("=")) {
i = s.indexOf("=");
key = s.substring(0,i).trim();
value = s.substring(i+1).trim();
if (value.isEmpty()) value = null;
} else {
key = s.trim();
value = null;
}
if (!key.isEmpty()) {
Entry entry = new Entry(key, value, enabled);
if (comment != null) {
entry.setComment(comment);
comment = null;
}
this.put(key, entry);
}
}
} catch (final IOException e) {
throw e;
} finally {
if (br != null) try {br.close();} catch (IOException e) {}
}
}
/**
* override the abstract implementation because that is not stable in concurrent requests
*/
public boolean contains(String key) {
if (key == null) return false;
Entry e = this.get(key);
return e == null ? false : e.enabled();
}
public boolean containsDisabled(final String o) {
if (o == null) return false;
Entry e = this.get(o);
return e == null ? false : !e.enabled();
}
public boolean add(final String key) {
return add(key, null);
}
public boolean add(final String key, final String comment) {
return add(key, comment, true);
}
public boolean add(final String key, final String comment, final boolean enabled) {
boolean modified = false;
Entry entry = get(key);
if (entry == null) {
entry = new Entry (key,enabled);
if (comment != null) entry.setComment(comment);
this.put (key,entry);
modified = true;
} else {
if (entry.enabled() != enabled) {
entry.setEnable(enabled);
modified = true;
}
if ( (comment != null) && ( !comment.equals(entry.getComment()) )) {
entry.setComment(comment);
modified = true;
}
}
try {
if (modified) {
commit();
}
} catch (final IOException e) {}
return modified;
}
/**
* save the configuration back to the file
* @throws IOException
*/
public void commit() throws IOException {
if (this.file == null) return;
// create a temporary bak file, use it as template to preserve user comments
File bakfile = new File (this.file.getAbsolutePath() + ".bak");
try {
Files.copy(this.file, bakfile);
} catch (final IOException e) {
this.file.createNewFile();
}
@SuppressWarnings("unchecked")
TreeMap<String,Entry> tclone = (TreeMap<String,Entry>) this.clone(); // clone to write appended entries
final BufferedWriter writer = new BufferedWriter(new FileWriter(this.file));
try {
final BufferedReader reader = new BufferedReader(new FileReader(bakfile));
String s, sorig;
String key;
int i;
while ((sorig = reader.readLine()) != null) {
if (sorig.startsWith("##") || sorig.isEmpty()){
// is comment line - write as is
writer.write(sorig + "\n");
continue;
}
if (sorig.startsWith("#")) {
s = sorig.substring (1).trim();
} else {
s = sorig;
}
if (s.contains("#")) {
// second # = is a line comment
i = s.indexOf("#");
s = s.substring(0,i).trim();
}
if (s.contains("=")) {
i = s.indexOf("=");
key = s.substring(0,i).trim();
} else {
key = s.trim();
}
if (!key.isEmpty()) {
Entry e = this.get(key);
if (e != null) {
writer.write (e.toString());
tclone.remove(key); // remove written entries from clone
}
writer.write("\n");
} else {
writer.write(sorig+"\n");
}
}
reader.close();
bakfile.delete();
} catch (final IOException e) {}
// write remainig entries (not already written)
Iterator<Map.Entry<String,Entry>> ie = tclone.entrySet().iterator();
while (ie.hasNext()) {
Object e = ie.next();
writer.write (e.toString() + "\n");
}
writer.close();
}
public Iterator<Entry> entryIterator() {
return this.values().iterator();
}
public static class Entry {
private final String key;
private String value;
private boolean enabled;
private String comment;
public Entry(final String key, final boolean enabled) {
this.enabled = enabled;
// split in key, value if line contains a "=" (equal sign) e.g. myattribute = 123
// for backward compatibility here the key parameter is checked to contain a "="
if (key.contains("=")) {
int i = key.indexOf("=");
this.key = key.substring(0,i).trim();
this.value = key.substring(i+1).trim();
} else {
this.key = key;
this.value = null;
}
}
public Entry (final String key, String value, final boolean enabled) {
this.enabled = enabled;
this.key = key;
this.value = value;
}
public String key() {
return this.key;
}
public void setValue(String theValue) {
//empty string not wanted
if ((theValue != null) && theValue.isEmpty()) {
this.value = null;
} else {
this.value = theValue;
}
}
public String getValue() {
return this.value;
}
public void setComment(String comment) {
this.comment = comment;
}
public String getComment() {
return this.comment;
}
public void setEnable(boolean value){
this.enabled = value;
}
public boolean enabled() {
return this.enabled;
}
@Override
public String toString(){
// output string to write to config file
return (this.enabled ? "" : "#") + (this.value != null ? this.key + " = " + this.value : this.key ) + (this.comment != null ? " #" + this.comment : "");
}
}
public static void main(final String[] args) {
if (args.length == 0) return;
final File f = new File (args[0]);
Configuration cs;
try {
cs = new Configuration(f);
Iterator<Entry> i = cs.entryIterator();
Entry k;
System.out.println("\nall activated attributes:");
while (i.hasNext()) {
k = i.next();
if (k.enabled()) System.out.println(k.toString());
}
i = cs.entryIterator();
System.out.println("\nall deactivated attributes:");
while (i.hasNext()) {
k = i.next();
if (!k.enabled()) System.out.println(k.toString() );
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

@ -125,7 +125,7 @@ public final class Heap extends HeapModifier implements BLOB {
*/ */
@Override @Override
public boolean containsKey(byte[] key) { public boolean containsKey(byte[] key) {
assert this.index != null; if (this.index == null) return false;
key = normalizeKey(key); key = normalizeKey(key);
synchronized (this) { synchronized (this) {
// check the buffer // check the buffer
@ -245,7 +245,6 @@ public final class Heap extends HeapModifier implements BLOB {
synchronized (this) { synchronized (this) {
// check the buffer // check the buffer
assert this.buffer != null;
if (this.buffer != null) { if (this.buffer != null) {
byte[] blob = this.buffer.get(key); byte[] blob = this.buffer.get(key);
if (blob != null) return blob; if (blob != null) return blob;
@ -267,7 +266,6 @@ public final class Heap extends HeapModifier implements BLOB {
synchronized (this) { synchronized (this) {
// check the buffer // check the buffer
assert this.buffer != null;
if (this.buffer != null) { if (this.buffer != null) {
byte[] blob = this.buffer.get(key); byte[] blob = this.buffer.get(key);
if (blob != null) return blob.length; if (blob != null) return blob.length;

@ -1066,7 +1066,7 @@ public final class Protocol {
instance.close(); instance.close();
// no need to close this here because that sends a commit to remote solr which is not wanted here // no need to close this here because that sends a commit to remote solr which is not wanted here
} catch (Throwable e) { } catch (Throwable e) {
Network.log.logInfo("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")", e); Network.log.logInfo("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")");
return -1; return -1;
} }
} }

@ -286,7 +286,7 @@ public final class Switchboard extends serverSwitch {
private static Switchboard sb; private static Switchboard sb;
public HashMap<String, Object[]> crawlJobsStatus = new HashMap<String, Object[]>(); public HashMap<String, Object[]> crawlJobsStatus = new HashMap<String, Object[]>();
public Switchboard(final File dataPath, final File appPath, final String initPath, final String configPath) throws IOException { public Switchboard(final File dataPath, final File appPath, final String initPath, final String configPath) {
super(dataPath, appPath, initPath, configPath); super(dataPath, appPath, initPath, configPath);
sb = this; sb = this;
// check if port is already occupied // check if port is already occupied
@ -349,7 +349,11 @@ public final class Switchboard extends serverSwitch {
for (String fs : defaultWorkPath.list()) { for (String fs : defaultWorkPath.list()) {
File wf = new File(this.workPath, fs); File wf = new File(this.workPath, fs);
if (!wf.exists()) { if (!wf.exists()) {
Files.copy(new File(defaultWorkPath, fs), wf); try {
Files.copy(new File(defaultWorkPath, fs), wf);
} catch (IOException e) {
Log.logException(e);
}
} }
} }
} }
@ -387,7 +391,13 @@ public final class Switchboard extends serverSwitch {
setConfig(SwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount)); setConfig(SwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount));
// load the network definition // load the network definition
overwriteNetworkDefinition(); try {
overwriteNetworkDefinition();
} catch (FileNotFoundException e) {
Log.logException(e);
} catch (IOException e) {
Log.logException(e);
}
// start indexing management // start indexing management
this.log.logConfig("Starting Indexing Management"); this.log.logConfig("Starting Indexing Management");
@ -410,53 +420,70 @@ public final class Switchboard extends serverSwitch {
final File solrCollectionConfigurationWorkFile = new File(getDataPath(), "DATA/SETTINGS/" + SOLR_COLLECTION_CONFIGURATION_NAME); final File solrCollectionConfigurationWorkFile = new File(getDataPath(), "DATA/SETTINGS/" + SOLR_COLLECTION_CONFIGURATION_NAME);
final File solrWebgraphConfigurationInitFile = new File(getAppPath(), "defaults/" + SOLR_WEBGRAPH_CONFIGURATION_NAME); final File solrWebgraphConfigurationInitFile = new File(getAppPath(), "defaults/" + SOLR_WEBGRAPH_CONFIGURATION_NAME);
final File solrWebgraphConfigurationWorkFile = new File(getDataPath(), "DATA/SETTINGS/" + SOLR_WEBGRAPH_CONFIGURATION_NAME); final File solrWebgraphConfigurationWorkFile = new File(getDataPath(), "DATA/SETTINGS/" + SOLR_WEBGRAPH_CONFIGURATION_NAME);
CollectionConfiguration solrCollectionConfigurationWork = null;
WebgraphConfiguration solrWebgraphConfigurationWork = null;
// migrate the old Schema file path to a new one // migrate the old Schema file path to a new one
final File solrCollectionConfigurationWorkOldFile = new File(getDataPath(), "DATA/SETTINGS/" + SOLR_COLLECTION_CONFIGURATION_NAME_OLD); final File solrCollectionConfigurationWorkOldFile = new File(getDataPath(), "DATA/SETTINGS/" + SOLR_COLLECTION_CONFIGURATION_NAME_OLD);
if (solrCollectionConfigurationWorkOldFile.exists() && !solrCollectionConfigurationWorkFile.exists()) solrCollectionConfigurationWorkOldFile.renameTo(solrCollectionConfigurationWorkFile); if (solrCollectionConfigurationWorkOldFile.exists() && !solrCollectionConfigurationWorkFile.exists()) solrCollectionConfigurationWorkOldFile.renameTo(solrCollectionConfigurationWorkFile);
// initialize the schema if it does not yet exist // initialize the collection schema if it does not yet exist
if (!solrCollectionConfigurationWorkFile.exists()) Files.copy(solrCollectionConfigurationInitFile, solrCollectionConfigurationWorkFile); if (!solrCollectionConfigurationWorkFile.exists()) try {
Files.copy(solrCollectionConfigurationInitFile, solrCollectionConfigurationWorkFile);
} catch (IOException e) {Log.logException(e);}
// lazy definition of schema: do not write empty fields // lazy definition of schema: do not write empty fields
final boolean solrlazy = getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true); final boolean solrlazy = getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true);
// define collection schema // define collection schema
final CollectionConfiguration solrCollectionConfigurationInit = new CollectionConfiguration(solrCollectionConfigurationInitFile, solrlazy); try {
final CollectionConfiguration solrCollectionConfigurationWork = new CollectionConfiguration(solrCollectionConfigurationWorkFile, solrlazy); final CollectionConfiguration solrCollectionConfigurationInit = new CollectionConfiguration(solrCollectionConfigurationInitFile, solrlazy);
// update the working scheme with the backup scheme. This is necessary to include new features. solrCollectionConfigurationWork = new CollectionConfiguration(solrCollectionConfigurationWorkFile, solrlazy);
// new features are always activated by default (if activated in input-backupScheme) // update the working scheme with the backup scheme. This is necessary to include new features.
solrCollectionConfigurationWork.fill(solrCollectionConfigurationInit, true); // new features are always activated by default (if activated in input-backupScheme)
// switch on some fields which are necessary for ranking and faceting solrCollectionConfigurationWork.fill(solrCollectionConfigurationInit, true);
for (CollectionSchema field: new CollectionSchema[]{ // switch on some fields which are necessary for ranking and faceting
CollectionSchema.host_s, CollectionSchema.load_date_dt, for (CollectionSchema field: new CollectionSchema[]{
CollectionSchema.url_file_ext_s, CollectionSchema.last_modified, // needed for media search and /date operator CollectionSchema.host_s, CollectionSchema.load_date_dt,
/*YaCySchema.url_paths_sxt,*/ CollectionSchema.host_organization_s, // needed to search in the url CollectionSchema.url_file_ext_s, CollectionSchema.last_modified, // needed for media search and /date operator
/*YaCySchema.inboundlinks_protocol_sxt,*/ CollectionSchema.inboundlinks_urlstub_txt, // needed for HostBrowser /*YaCySchema.url_paths_sxt,*/ CollectionSchema.host_organization_s, // needed to search in the url
/*YaCySchema.outboundlinks_protocol_sxt,*/ CollectionSchema.outboundlinks_urlstub_txt // needed to enhance the crawler /*YaCySchema.inboundlinks_protocol_sxt,*/ CollectionSchema.inboundlinks_urlstub_txt, // needed for HostBrowser
}) { /*YaCySchema.outboundlinks_protocol_sxt,*/ CollectionSchema.outboundlinks_urlstub_txt // needed to enhance the crawler
SchemaConfiguration.Entry entry = solrCollectionConfigurationWork.get(field.name()); entry.setEnable(true); solrCollectionConfigurationWork.put(field.name(), entry); }) {
} SchemaConfiguration.Entry entry = solrCollectionConfigurationWork.get(field.name()); entry.setEnable(true); solrCollectionConfigurationWork.put(field.name(), entry);
solrCollectionConfigurationWork.commit(); }
solrCollectionConfigurationWork.commit();
} catch (IOException e) {Log.logException(e);}
// define webgraph schema // initialize the webgraph schema if it does not yet exist
final WebgraphConfiguration solrWebgraphConfigurationInit = new WebgraphConfiguration(solrWebgraphConfigurationInitFile, solrlazy); if (!solrWebgraphConfigurationWorkFile.exists()) try {
final WebgraphConfiguration solrWebgraphConfigurationWork = new WebgraphConfiguration(solrWebgraphConfigurationWorkFile, solrlazy); Files.copy(solrWebgraphConfigurationInitFile, solrWebgraphConfigurationWorkFile);
solrWebgraphConfigurationWork.fill(solrWebgraphConfigurationInit, true); } catch (IOException e) {Log.logException(e);}
solrWebgraphConfigurationWork.commit();
// define webgraph schema
try {
final WebgraphConfiguration solrWebgraphConfigurationInit = new WebgraphConfiguration(solrWebgraphConfigurationInitFile, solrlazy);
solrWebgraphConfigurationWork = new WebgraphConfiguration(solrWebgraphConfigurationWorkFile, solrlazy);
solrWebgraphConfigurationWork.fill(solrWebgraphConfigurationInit, true);
solrWebgraphConfigurationWork.commit();
} catch (IOException e) {Log.logException(e);}
// initialize index // initialize index
ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0); ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS"); final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
this.index = new Segment(this.log, segmentsPath, solrCollectionConfigurationWork, solrWebgraphConfigurationWork); this.index = new Segment(this.log, segmentsPath, solrCollectionConfigurationWork, solrWebgraphConfigurationWork);
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax); if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) try {
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax); this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
} catch (IOException e) {Log.logException(e);}
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) try {
this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
} catch (IOException e) {Log.logException(e);}
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) { if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) {
this.index.connectUrlDb(this.useTailCache, this.exceed134217727); this.index.connectUrlDb(this.useTailCache, this.exceed134217727);
this.index.fulltext().connectLocalSolr(); try {this.index.fulltext().connectLocalSolr();} catch (IOException e) {Log.logException(e);}
} }
this.index.writeWebgraph(this.getConfigBool(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, false)); this.index.writeWebgraph(this.getConfigBool(SwitchboardConstants.CORE_SERVICE_WEBGRAPH, false));
// set up the solr interface // set up the solr interface
final String solrurls = getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr"); final String solrurls = getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr");
final boolean usesolr = getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, false) & solrurls.length() > 0; final boolean usesolr = getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, false) & solrurls.length() > 0;
@ -590,46 +617,6 @@ public final class Switchboard extends serverSwitch {
final File rankingPath = new File(this.appPath, "ranking/YBR".replace('/', File.separatorChar)); final File rankingPath = new File(this.appPath, "ranking/YBR".replace('/', File.separatorChar));
BlockRank.loadBlockRankTable(rankingPath, 16); BlockRank.loadBlockRankTable(rankingPath, 16);
// load distributed ranking
// very large memory configurations allow to re-compute a ranking table
/*
final File hostIndexFile = new File(this.queuesRoot, "hostIndex.blob");
if (MemoryControl.available() > 1024 * 1024 * 1024) new Thread() {
public void run() {
ReferenceContainerCache<HostReference> hostIndex; // this will get large, more than 0.5 million entries by now
if (!hostIndexFile.exists()) {
hostIndex = BlockRank.collect(Switchboard.this.peers, Switchboard.this.webStructure, Integer.MAX_VALUE);
BlockRank.saveHostIndex(hostIndex, hostIndexFile);
} else {
hostIndex = BlockRank.loadHostIndex(hostIndexFile);
}
// use an index segment to find hosts for given host hashes
final String segmentName = getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
final Segment segment = Switchboard.this.indexSegments.segment(segmentName);
final MetadataRepository metadata = segment.urlMetadata();
Map<String,HostStat> hostHashResolver;
try {
hostHashResolver = metadata.domainHashResolver(metadata.domainSampleCollector());
} catch (final IOException e) {
hostHashResolver = new HashMap<String, HostStat>();
}
// recursively compute a new ranking table
Switchboard.this.log.logInfo("BLOCK RANK: computing new ranking tables...");
BlockRank.ybrTables = BlockRank.evaluate(hostIndex, hostHashResolver, null, 0);
hostIndex = null; // we don't need that here any more, so free the memory
// use the web structure and the hostHash resolver to analyse the ranking table
Switchboard.this.log.logInfo("BLOCK RANK: analysis of " + BlockRank.ybrTables.length + " tables...");
BlockRank.analyse(Switchboard.this.webStructure, hostHashResolver);
// store the new table
Switchboard.this.log.logInfo("BLOCK RANK: storing fresh table...");
BlockRank.storeBlockRankTable(rankingPath);
}
}.start();
*/
// start a cache manager // start a cache manager
this.log.logConfig("Starting HT Cache Manager"); this.log.logConfig("Starting HT Cache Manager");
@ -662,27 +649,43 @@ public final class Switchboard extends serverSwitch {
this.log.logInfo("RELEASE Path = " + this.releasePath.getAbsolutePath()); this.log.logInfo("RELEASE Path = " + this.releasePath.getAbsolutePath());
// starting message board // starting message board
initMessages(); try {
initMessages();
} catch (IOException e) {
Log.logException(e);
}
// starting wiki // starting wiki
initWiki(); try {
initWiki();
} catch (IOException e) {
Log.logException(e);
}
//starting blog //starting blog
initBlog(); try {
initBlog();
} catch (IOException e) {
Log.logException(e);
}
// init User DB // init User DB
this.log.logConfig("Loading User DB"); this.log.logConfig("Loading User DB");
final File userDbFile = new File(getDataPath(), "DATA/SETTINGS/user.heap"); final File userDbFile = new File(getDataPath(), "DATA/SETTINGS/user.heap");
this.userDB = new UserDB(userDbFile); try {
this.log.logConfig("Loaded User DB from file " this.userDB = new UserDB(userDbFile);
+ userDbFile.getName() this.log.logConfig("Loaded User DB from file "
+ ", " + userDbFile.getName()
+ this.userDB.size() + ", "
+ " entries" + this.userDB.size()
+ ", " + " entries"
+ ppRamString(userDbFile.length() / 1024)); + ", "
+ ppRamString(userDbFile.length() / 1024));
} catch (IOException e) {
Log.logException(e);
}
// init user triplestores // init user triplestores
JenaTripleStore.initPrivateStores(); JenaTripleStore.initPrivateStores();
// init html parser evaluation scheme // init html parser evaluation scheme
@ -690,14 +693,22 @@ public final class Switchboard extends serverSwitch {
String[] settingsList = parserPropertiesPath.list(); String[] settingsList = parserPropertiesPath.list();
for ( final String l : settingsList ) { for ( final String l : settingsList ) {
if ( l.startsWith("parser.") && l.endsWith(".properties") ) { if ( l.startsWith("parser.") && l.endsWith(".properties") ) {
Evaluation.add(new File(parserPropertiesPath, l)); try {
Evaluation.add(new File(parserPropertiesPath, l));
} catch (IOException e) {
Log.logException(e);
}
} }
} }
parserPropertiesPath = new File(getDataPath(), "DATA/SETTINGS/"); parserPropertiesPath = new File(getDataPath(), "DATA/SETTINGS/");
settingsList = parserPropertiesPath.list(); settingsList = parserPropertiesPath.list();
for ( final String l : settingsList ) { for ( final String l : settingsList ) {
if ( l.startsWith("parser.") && l.endsWith(".properties") ) { if ( l.startsWith("parser.") && l.endsWith(".properties") ) {
Evaluation.add(new File(parserPropertiesPath, l)); try {
Evaluation.add(new File(parserPropertiesPath, l));
} catch (IOException e) {
Log.logException(e);
}
} }
} }
@ -751,7 +762,11 @@ public final class Switchboard extends serverSwitch {
// load the robots.txt db // load the robots.txt db
this.log.logConfig("Initializing robots.txt DB"); this.log.logConfig("Initializing robots.txt DB");
this.robots = new RobotsTxt(this.tables, this.loader); this.robots = new RobotsTxt(this.tables, this.loader);
this.log.logConfig("Loaded robots.txt DB: " + this.robots.size() + " entries"); try {
this.log.logConfig("Loaded robots.txt DB: " + this.robots.size() + " entries");
} catch (IOException e) {
Log.logException(e);
}
// load oai tables // load oai tables
final Map<String, File> oaiFriends = final Map<String, File> oaiFriends =
@ -2851,7 +2866,6 @@ public final class Switchboard extends serverSwitch {
try { try {
scraper = this.loader.loadDocument(url, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay); scraper = this.loader.loadDocument(url, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay);
} catch (IOException e) { } catch (IOException e) {
Log.logException(e);
return "scraper cannot load URL: " + e.getMessage(); return "scraper cannot load URL: " + e.getMessage();
} }

@ -356,18 +356,20 @@ public final class Fulltext {
} }
public void putDocument(final SolrInputDocument doc) throws IOException { public void putDocument(final SolrInputDocument doc) throws IOException {
SolrConnector connector = this.getDefaultConnector();
if (connector == null) return;
String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()); String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
byte[] idb = ASCII.getBytes(id); byte[] idb = ASCII.getBytes(id);
try { try {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
Date sdDate = (Date) this.getDefaultConnector().getFieldById(id, CollectionSchema.last_modified.getSolrFieldName()); Date sdDate = (Date) connector.getFieldById(id, CollectionSchema.last_modified.getSolrFieldName());
Date docDate = null; Date docDate = null;
if (sdDate == null || (docDate = SchemaConfiguration.getDate(doc, CollectionSchema.last_modified)) == null || sdDate.before(docDate)) { if (sdDate == null || (docDate = SchemaConfiguration.getDate(doc, CollectionSchema.last_modified)) == null || sdDate.before(docDate)) {
if (this.collectionConfiguration.contains(CollectionSchema.ip_s)) { if (this.collectionConfiguration.contains(CollectionSchema.ip_s)) {
// ip_s needs a dns lookup which causes blockings during search here // ip_s needs a dns lookup which causes blockings during search here
this.getDefaultConnector().add(doc); connector.add(doc);
} else synchronized (this.solrInstances) { } else synchronized (this.solrInstances) {
this.getDefaultConnector().add(doc); connector.add(doc);
} }
} }
} catch (SolrException e) { } catch (SolrException e) {

@ -90,8 +90,9 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
* the configuration file simply contains a list of lines with keywords * the configuration file simply contains a list of lines with keywords
* or keyword = value lines (while value is a custom Solr field name * or keyword = value lines (while value is a custom Solr field name
* @param configurationFile * @param configurationFile
* @throws IOException
*/ */
public CollectionConfiguration(final File configurationFile, boolean lazy) { public CollectionConfiguration(final File configurationFile, boolean lazy) throws IOException {
super(configurationFile); super(configurationFile);
super.lazy = lazy; super.lazy = lazy;
// check consistency: compare with YaCyField enum // check consistency: compare with YaCyField enum
@ -109,6 +110,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// check consistency the other way: look if all enum constants in SolrField appear in the configuration file // check consistency the other way: look if all enum constants in SolrField appear in the configuration file
for (CollectionSchema field: CollectionSchema.values()) { for (CollectionSchema field: CollectionSchema.values()) {
if (this.get(field.name()) == null) { if (this.get(field.name()) == null) {
if (CollectionSchema.author_sxt.getSolrFieldName().endsWith(field.name())) continue; // exception for this: that is a copy-field
Log.logWarning("SolrCollectionWriter", " solr schema file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'"); Log.logWarning("SolrCollectionWriter", " solr schema file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'");
} }
} }

@ -55,9 +55,9 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
* initialize with an empty ConfigurationSet which will cause that all the index * initialize with an empty ConfigurationSet which will cause that all the index
* attributes are used * attributes are used
*/ */
public WebgraphConfiguration() { public WebgraphConfiguration(boolean lazy) {
super(); super();
this.lazy = false; this.lazy = lazy;
} }
/** /**
@ -65,8 +65,9 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
* the configuration file simply contains a list of lines with keywords * the configuration file simply contains a list of lines with keywords
* or keyword = value lines (while value is a custom Solr field name * or keyword = value lines (while value is a custom Solr field name
* @param configurationFile * @param configurationFile
* @throws IOException
*/ */
public WebgraphConfiguration(final File configurationFile, boolean lazy) { public WebgraphConfiguration(final File configurationFile, boolean lazy) throws IOException {
super(configurationFile); super(configurationFile);
this.lazy = lazy; this.lazy = lazy;
// check consistency: compare with YaCyField enum // check consistency: compare with YaCyField enum

@ -78,7 +78,7 @@ import org.apache.commons.logging.LogFactory;
*/ */
public class InternetGatewayDevice { public class InternetGatewayDevice {
private final static Log log = LogFactory.getLog( InternetGatewayDevice.class ); //private final static Log log = LogFactory.getLog( InternetGatewayDevice.class );
private final UPNPRootDevice igd; private final UPNPRootDevice igd;
private UPNPMessageFactory msgFactory; private UPNPMessageFactory msgFactory;
@ -154,7 +154,7 @@ public class InternetGatewayDevice {
} }
if ( this.msgFactory == null ) { if ( this.msgFactory == null ) {
// Nothing found using WANCommonInterfaceConfig! IP by default // Nothing found using WANCommonInterfaceConfig! IP by default
log.warn( "Unable to detect active WANIPConnection, dfaulting to urn:schemas-upnp-org:service:WANIPConnection:1" ); //log.warn( "Unable to detect active WANIPConnection, dfaulting to urn:schemas-upnp-org:service:WANIPConnection:1" );
this.msgFactory = UPNPMessageFactory.getNewInstance( wanIPSrv ); this.msgFactory = UPNPMessageFactory.getNewInstance( wanIPSrv );
} }
} }
@ -171,7 +171,7 @@ public class InternetGatewayDevice {
// ok probably not the IP interface // ok probably not the IP interface
} catch ( IOException ex ) { } catch ( IOException ex ) {
// not really normal // not really normal
log.warn( "IOException occured during device detection", ex ); //log.warn( "IOException occured during device detection", ex );
} }
if ( ipToParse != null && ipToParse.length() > 0 && !ipToParse.equals( "0.0.0.0" ) ) { if ( ipToParse != null && ipToParse.length() > 0 && !ipToParse.equals( "0.0.0.0" ) ) {
try { try {
@ -259,7 +259,7 @@ public class InternetGatewayDevice {
valid.add( new InternetGatewayDevice( device, WANIPConnection, WANPPPConnection ) ); valid.add( new InternetGatewayDevice( device, WANIPConnection, WANPPPConnection ) );
} catch ( UnsupportedOperationException ex ) { } catch ( UnsupportedOperationException ex ) {
// the device is either not IP or PPP // the device is either not IP or PPP
if ( log.isDebugEnabled() ) log.debug( "UnsupportedOperationException during discovery " + ex.getMessage() ); //if ( log.isDebugEnabled() ) log.debug( "UnsupportedOperationException during discovery " + ex.getMessage() );
} }
} }
if ( valid.isEmpty() ) { if ( valid.isEmpty() ) {

Loading…
Cancel
Save