Merge origin/master into jetty

pull/1/head
reger 11 years ago
commit c84c313fe1

@ -3,7 +3,7 @@ javacSource=1.6
javacTarget=1.6
# Release Configuration
releaseVersion=1.66
releaseVersion=1.67
stdReleaseFile=yacy${branch}_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
sourceReleaseFile=yacy_src_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy

@ -208,10 +208,6 @@ public class SettingsAck_p {
} else if (staticIP.startsWith("https://")) {
if (staticIP.length() > 8) { staticIP = staticIP.substring(8); } else { staticIP = ""; }
}
// TODO IPv6 support!
if (staticIP.indexOf(':',0) > 0) {
staticIP = staticIP.substring(0, staticIP.indexOf(':',0));
}
if (staticIP.isEmpty()) {
serverCore.useStaticIP = false;
} else {

@ -2,7 +2,6 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.regex.Pattern;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.protocol.ClientIdentification;
@ -94,7 +93,7 @@ public class get_metadata {
}
public static int putTags(final String tagString, final String var) {
final String list[] = Pattern.compile(YMarkUtil.TAGS_SEPARATOR).split(tagString, 0);
final String list[] = YMarkUtil.TAGS_SEPARATOR_PATTERN.split(tagString, 0);
int count = 0;
for (final String element : list) {
final String tag = element;

@ -4,7 +4,6 @@ import java.util.Date;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.regex.Pattern;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.encoding.UTF8;
@ -98,11 +97,11 @@ public class get_treeview {
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
int n = Pattern.compile(YMarkUtil.FOLDERS_SEPARATOR).split(root, 0).length;
int n = YMarkUtil.FOLDERS_SEPARATOR_PATTERN.split(root, 0).length;
if (n == 0) n = 1;
while (it.hasNext()) {
final String folder = it.next();
foldername = folder.split(YMarkUtil.FOLDERS_SEPARATOR);
foldername = YMarkUtil.FOLDERS_SEPARATOR_PATTERN.split(folder);
if (foldername.length == n+1) {
prop.put("folders_"+count+"_foldername", foldername[n]);
prop.put("folders_"+count+"_expanded", "false");

@ -52,7 +52,7 @@ public class get_xbel {
root = "";
}
final int root_depth = root.split(YMarkUtil.FOLDERS_SEPARATOR).length - 1;
final int root_depth = YMarkUtil.FOLDERS_SEPARATOR_PATTERN.split(root).length - 1;
// Log.logInfo(YMarkTables.BOOKMARKS_LOG, "root: "+root+" root_depth: "+root_depth);
Iterator<String> fit = null;
Iterator<Tables.Row> bit = null;
@ -67,7 +67,7 @@ public class get_xbel {
while (fit.hasNext()) {
final String folder = fit.next();
foldername = folder.split(YMarkUtil.FOLDERS_SEPARATOR);
foldername = YMarkUtil.FOLDERS_SEPARATOR_PATTERN.split(folder);
final int len = foldername.length -1;
if(n > root_depth) {
for (; len <= n; n--) {

@ -152,6 +152,12 @@ public class select {
}
sb.intermissionAllThreads(3000); // tell all threads to do nothing for a specific time
// count remote searches if this was part of a p2p search
if (post.containsKey("partitions")) {
final int partitions = post.getInt("partitions", 30);
sb.searchQueriesGlobal += 1.0f / partitions; // increase query counter
}
// get the ranking profile id
int profileNr = post.getInt("profileNr", 0);

@ -36,6 +36,7 @@ import net.yacy.server.serverSwitch;
* or to generate json (or jsonp) with
* http://localhost:8090/yacy/seedlist.json
* http://localhost:8090/yacy/seedlist.json?callback=seedlist
* http://localhost:8090/yacy/seedlist.json?node=true&me=false&address=true
*/
public final class seedlist {
@ -45,7 +46,11 @@ public final class seedlist {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
int maxcount = Math.min(LISTMAX, post == null ? Integer.MAX_VALUE : post.getInt("maxcount", Integer.MAX_VALUE));
final ArrayList<Seed> v = sb.peers.getSeedlist(maxcount, true);
float minversion = Math.min(LISTMAX, post == null ? 0.0f : post.getFloat("minversion", 0.0f));
boolean nodeonly = post == null || !post.containsKey("node") ? false : post.getBoolean("node");
boolean includeme = post == null || !post.containsKey("me") ? true : post.getBoolean("me");
boolean addressonly = post == null || !post.containsKey("address") ? false : post.getBoolean("address");
final ArrayList<Seed> v = sb.peers.getSeedlist(maxcount, includeme, nodeonly, minversion);
final serverObjects prop = new serverObjects();
// write simple-encoded seed lines or json
@ -66,16 +71,21 @@ public final class seedlist {
prop.putJSON("peers_" + i + "_map_0_k", Seed.HASH);
prop.putJSON("peers_" + i + "_map_0_v", v.get(i).hash);
prop.put("peers_" + i + "_map_0_c", 1);
Map<String, String> map = v.get(i).getMap();
Seed seed = v.get(i);
Map<String, String> map = seed.getMap();
int c = 1;
for (Map.Entry<String, String> m: map.entrySet()) {
prop.putJSON("peers_" + i + "_map_" + c + "_k", m.getKey());
prop.putJSON("peers_" + i + "_map_" + c + "_v", m.getValue());
prop.put("peers_" + i + "_map_" + c + "_c", 1);
c++;
if (!addressonly) {
for (Map.Entry<String, String> m: map.entrySet()) {
prop.putJSON("peers_" + i + "_map_" + c + "_k", m.getKey());
prop.putJSON("peers_" + i + "_map_" + c + "_v", m.getValue());
prop.put("peers_" + i + "_map_" + c + "_c", 1);
c++;
}
}
prop.put("peers_" + i + "_map_" + (c - 1) + "_c", 0);
prop.put("peers_" + i + "_map", c);
prop.putJSON("peers_" + i + "_map_" + c + "_k", "Address");
prop.putJSON("peers_" + i + "_map_" + c + "_v", seed.getPublicAddress());
prop.put("peers_" + i + "_map_" + c + "_c", 0);
prop.put("peers_" + i + "_map", c + 1);
prop.put("peers_" + i + "_c", i < v.size() - 1 ? 1 : 0);
}
prop.put("peers", v.size());

@ -73,7 +73,7 @@
<compilation-unit>
<package-root>source</package-root>
<package-root>htroot</package-root>
<classpath mode="compile">lib/activation.jar;lib/apache-mime4j-0.6.jar;lib/arq-2.8.7.jar;lib/bcmail-jdk15-145.jar;lib/bcprov-jdk15-145.jar;lib/commons-codec-1.7.jar;lib/commons-compress-1.4.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-httpclient-3.1.jar;lib/commons-io-2.1.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.1.3.jar;lib/fontbox-1.7.1.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-13.0.1.jar;lib/htmllexer.jar;lib/httpclient-4.3.1.jar;lib/httpcore-4.3.jar;lib/httpmime-4.3.1.jar;lib/icu4j-core.jar;lib/iri-0.8.jar;lib/J7Zip-modified.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/jcifs-1.3.15.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.7.1.jar;lib/jena-2.6.4.jar;lib/jsch-0.1.42.jar;lib/json-simple-1.1.jar;lib/jsoup-1.6.3.jar;lib/log4j-1.2.17.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.2.1.jar;lib/lucene-analyzers-phonetic-4.2.1.jar;lib/lucene-core-4.2.1.jar;lib/lucene-misc-4.2.1.jar;lib/lucene-spatial-4.2.1.jar;lib/metadata-extractor-2.4.0-beta-1.jar;lib/mysql-connector-java-5.1.12-bin.jar;lib/pdfbox-1.7.1.jar;lib/poi-3.6-20091214.jar;lib/poi-scratchpad-3.6-20091214.jar;lib/sax-2.0.1.jar;lib/servlet-api-2.5-20081211.jar;lib/slf4j-api-1.7.2.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.2.1.jar;lib/solr-solrj-4.2.1.jar;lib/spatial4j-0.3.jar;lib/webcat-0.1-swf.jar;lib/wstx-asl-3.2.7.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/zookeeper-3.4.5.jar</classpath>
<classpath mode="compile">lib/activation.jar;lib/apache-mime4j-0.6.jar;lib/arq-2.8.7.jar;lib/bcmail-jdk15-145.jar;lib/bcprov-jdk15-145.jar;lib/commons-codec-1.7.jar;lib/commons-compress-1.4.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-httpclient-3.1.jar;lib/commons-io-2.1.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.1.3.jar;lib/fontbox-1.8.2.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-13.0.1.jar;lib/htmllexer.jar;lib/httpclient-4.3.1.jar;lib/httpcore-4.3.jar;lib/httpmime-4.3.1.jar;lib/icu4j-core.jar;lib/iri-0.8.jar;lib/J7Zip-modified.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.2.jar;lib/jena-2.6.4.jar;lib/jsch-0.1.42.jar;lib/json-simple-1.1.jar;lib/jsoup-1.6.3.jar;lib/log4j-1.2.17.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.5.0.jar;lib/lucene-analyzers-phonetic-4.5.0.jar;lib/lucene-core-4.5.0.jar;lib/lucene-misc-4.5.0.jar;lib/lucene-spatial-4.5.0.jar;lib/metadata-extractor-2.6.2.jar;lib/mysql-connector-java-5.1.12-bin.jar;lib/pdfbox-1.8.2.jar;lib/poi-3.6-20091214.jar;lib/poi-scratchpad-3.6-20091214.jar;lib/sax-2.0.1.jar;lib/servlet-api-2.5-20081211.jar;lib/slf4j-api-1.7.2.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.5.0.jar;lib/solr-solrj-4.5.0.jar;lib/spatial4j-0.3.jar;lib/webcat-0.1-swf.jar;lib/wstx-asl-3.2.7.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/zookeeper-3.4.5.jar</classpath>
<source-level>1.6</source-level>
</compilation-unit>
</java-data>

@ -288,7 +288,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).charAt(0);
}
public final Pattern rootPattern = Pattern.compile("/|/index.htm(l?)|/index.php|/home.htm(l?)|/home.php|/default.htm(l?)|/default.php");
public final static Pattern rootPattern = Pattern.compile("/|/index.htm(l?)|/index.php|/home.htm(l?)|/home.php|/default.htm(l?)|/default.php");
public final boolean probablyRootURL() {
return this.path.length() <= 1 || rootPattern.matcher(this.path).matches();

@ -31,7 +31,6 @@ import java.util.Map;
import java.util.Set;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
@ -158,17 +157,15 @@ public class SchemaConfiguration extends Configuration implements Serializable {
continue uniquecheck;
}
try {
if (segment.fulltext().getDefaultConnector().existsByQuery(CollectionSchema.host_id_s + ":\"" + hostid + "\" AND " + signaturefield.getSolrFieldName() + ":\"" + checkhash.toString() + "\"")) {
final SolrDocument doc = segment.fulltext().getDefaultConnector().getDocumentById(CollectionSchema.host_id_s + ":\"" + hostid + "\" AND " + signaturefield.getSolrFieldName() + ":\"" + checkhash.toString() + "\"");
if (doc != null) {
// switch unique attribute in new document
sid.setField(uniquefield.getSolrFieldName(), false);
// switch attribute also in all existing documents (which should be exactly only one!)
SolrDocumentList docs = segment.fulltext().getDefaultConnector().getDocumentListByQuery(CollectionSchema.host_id_s + ":\"" + hostid + "\" AND " + signaturefield.getSolrFieldName() + ":\"" + checkhash.toString() + "\" AND " + uniquefield.getSolrFieldName() + ":true", 0, 1000);
for (SolrDocument doc: docs) {
SolrInputDocument sidContext = segment.fulltext().getDefaultConfiguration().toSolrInputDocument(doc);
sidContext.setField(uniquefield.getSolrFieldName(), false);
segment.putDocumentInQueue(sidContext);
changed = true;
}
// switch attribute in existing document
SolrInputDocument sidContext = segment.fulltext().getDefaultConfiguration().toSolrInputDocument(doc);
sidContext.setField(uniquefield.getSolrFieldName(), false);
segment.putDocumentInQueue(sidContext);
changed = true;
} else {
sid.setField(uniquefield.getSolrFieldName(), true);
}

@ -46,6 +46,7 @@ import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
public abstract class AbstractSolrConnector implements SolrConnector {
@ -71,16 +72,6 @@ public abstract class AbstractSolrConnector implements SolrConnector {
}
protected final static int pagesize = 100;
@Override
public boolean existsByQuery(final String query) throws IOException {
try {
long count = getCountByQuery(query);
return count > 0;
} catch (final Throwable e) {
return false;
}
}
/**
* Get a query result from solr as a stream of documents.
* The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
@ -189,11 +180,16 @@ public abstract class AbstractSolrConnector implements SolrConnector {
params.setIncludeScore(false);
// query the server
QueryResponse rsp = getResponseByParams(params);
final SolrDocumentList docs = rsp.getResults();
final SolrDocumentList docs = getDocumentListByParams(params);
return docs;
}
@Override
public long getDocumentCountByParams(ModifiableSolrParams params) throws IOException, SolrException {
final SolrDocumentList sdl = getDocumentListByParams(params);
return sdl == null ? 0 : sdl.getNumFound();
}
/**
* check if a given document, identified by url hash as ducument id exists
* @param id the url hash and document id
@ -215,10 +211,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
params.setIncludeScore(false);
// query the server
QueryResponse rsp = getResponseByParams(params);
final SolrDocumentList docs = rsp.getResults();
boolean exist = docs == null ? false : docs.getNumFound() > 0;
return exist;
return getDocumentCountByParams(params) > 0;
}
/**
@ -247,8 +240,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
params.setIncludeScore(false);
// query the server
QueryResponse rsp = getResponseByParams(params);
final SolrDocumentList docs = rsp.getResults();
final SolrDocumentList docs = getDocumentListByParams(params);
// construct a new id list from that
HashSet<String> idsr = new HashSet<String>();
for (SolrDocument doc : docs) {
@ -276,9 +268,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
params.setIncludeScore(false);
// query the server
QueryResponse rsp = getResponseByParams(params);
final SolrDocumentList docs = rsp.getResults();
return docs == null ? 0 : docs.getNumFound();
return getDocumentCountByParams(params);
}
/**
@ -335,12 +325,12 @@ public abstract class AbstractSolrConnector implements SolrConnector {
// query the server
try {
final QueryResponse rsp = getResponseByParams(query);
final SolrDocumentList docs = rsp.getResults();
if (docs.isEmpty()) return null;
final SolrDocumentList docs = getDocumentListByParams(query);
if (docs == null || docs.isEmpty()) return null;
return docs.get(0);
} catch (final Throwable e) {
throw new IOException(e.getMessage(), e);
}
}
}

@ -122,34 +122,6 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
this.clearCaches();
this.solr.deleteByQuery(querystring);
}
@Override
public boolean existsByQuery(final String query) throws IOException {
if (this.hitCache.containsKey(query)) {
this.hitCache_Hit++;
return true;
}
this.hitCache_Miss++;
if (this.documentCache.containsKey(query)) {
this.documentCache_Hit++;
return true;
}
this.documentCache_Miss++;
if (this.missCache.containsKey(query)) {
this.missCache_Hit++;
return false;
}
this.missCache_Miss++;
if (solr != null && solr.existsByQuery(query)) {
this.missCache.remove(query);
this.hitCache.put(query, EXIST);
this.hitCache_Insert++;
return true;
}
this.missCache.put(query, EXIST);
this.missCache_Insert++;
return false;
}
@Override
public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException {
@ -249,6 +221,12 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
QueryResponse list = this.solr.getResponseByParams(query);
return list;
}
@Override
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException, SolrException {
SolrDocumentList sdl = this.solr.getDocumentListByParams(params);
return sdl;
}
@Override
public long getCountByQuery(final String querystring) throws IOException {

@ -376,12 +376,6 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
e.addAll(e1);
return e;
}
@Override
public boolean existsByQuery(String solrquery) throws IOException {
// this is actually wrong but to make it right we need to wait until all queues are flushed. But that may take very long when the queues are filled again all the time.
return this.connector.existsByQuery(solrquery);
}
@Override
public void add(SolrInputDocument solrdoc) throws IOException, SolrException {
@ -424,6 +418,18 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
return this.connector.getResponseByParams(query);
}
@Override
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException, SolrException {
SolrDocumentList sdl = this.connector.getDocumentListByParams(params);
return sdl;
}
@Override
public long getDocumentCountByParams(ModifiableSolrParams params) throws IOException, SolrException {
final SolrDocumentList sdl = getDocumentListByParams(params);
return sdl == null ? 0 : sdl.getNumFound();
}
@Override
public SolrDocumentList getDocumentListByQuery(String querystring, int offset, int count, String... fields) throws IOException, SolrException {
return this.connector.getDocumentListByQuery(querystring, offset, count, fields);

@ -22,6 +22,7 @@
package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
@ -35,10 +36,14 @@ import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
@ -49,9 +54,12 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.request.UnInvertedField;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSet;
@ -180,7 +188,81 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
// return result
return rsp;
}
/**
* conversion from a SolrQueryResponse (which is a solr-internal data format) to SolrDocumentList (which is a solrj-format)
* The conversion is done inside the solrj api using the BinaryResponseWriter and a very complex unfolding process
* via org.apache.solr.common.util.JavaBinCodec.marshal.
* @param request
* @param sqr
* @return
*/
public SolrDocumentList SolrQueryResponse2SolrDocumentList(final SolrQueryRequest req, final SolrQueryResponse rsp) {
SolrDocumentList sdl = new SolrDocumentList();
@SuppressWarnings("rawtypes")
NamedList nl = rsp.getValues();
ResultContext resultContext = (ResultContext) nl.get("response");
DocList response = resultContext == null ? new DocSlice(0, 0, new int[0], new float[0], 0, 0.0f) : resultContext.docs;
sdl.setNumFound(response == null ? 0 : response.matches());
sdl.setStart(response == null ? 0 : response.offset());
if (response != null) {
final int responseCount = response.size();
SolrIndexSearcher searcher = req.getSearcher();
DocIterator iterator = response.iterator();
for (int i = 0; i < responseCount; i++) {
try {
sdl.add(doc2SolrDoc(searcher.doc(iterator.nextDoc(), (Set<String>) null)));
} catch (IOException e) {
ConcurrentLog.logException(e);
}
}
}
return sdl;
}
public SolrDocument doc2SolrDoc(Document doc) {
SolrDocument solrDoc = new SolrDocument();
for (IndexableField field : doc) {
String fieldName = field.name();
SchemaField sf = this.core.getLatestSchema().getFieldOrNull(fieldName);
Object val = null;
try {
FieldType ft = null;
if (sf != null) ft = sf.getType();
if (ft == null) {
BytesRef bytesRef = field.binaryValue();
if (bytesRef != null) {
if (bytesRef.offset == 0 && bytesRef.length == bytesRef.bytes.length) {
val = bytesRef.bytes;
} else {
final byte[] bytes = new byte[bytesRef.length];
System.arraycopy(bytesRef.bytes, bytesRef.offset, bytes, 0, bytesRef.length);
val = bytes;
}
} else {
val = field.stringValue();
}
} else {
val = ft.toObject(field);
}
} catch (Throwable e) {
continue;
}
if (sf != null && sf.multiValued() && !solrDoc.containsKey(fieldName)) {
ArrayList<Object> l = new ArrayList<Object>();
l.add(val);
solrDoc.addField(fieldName, l);
} else {
solrDoc.addField(fieldName, val);
}
}
return solrDoc;
}
/**
* the usage of getResponseByParams is disencouraged for the embedded Solr connector. Please use request(SolrParams) instead.
* Reason: Solr makes a very complex folding/unfolding including data compression for SolrQueryResponses.
@ -196,7 +278,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
try {
rsp = this.server.query(params);
if (q != null) Thread.currentThread().setName(threadname);
if (rsp != null) log.fine(rsp.getResults().getNumFound() + " results for q=" + q);
if (rsp != null) if (log.isFine()) log.fine(rsp.getResults().getNumFound() + " results for q=" + q);
return rsp;
} catch (final SolrServerException e) {
throw new IOException(e);
@ -205,6 +287,44 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
}
}
/**
* get the solr document list from a query response
* This differs from getResponseByParams in such a way that it does only create the fields of the response but
* never search snippets and there are also no facets generated.
* @param params
* @return
* @throws IOException
* @throws SolrException
*/
@Override
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException, SolrException {
SolrQueryRequest req = this.request(params);
SolrQueryResponse response = null;
try {
response = this.query(req);
if (response == null) throw new IOException("response == null");
return SolrQueryResponse2SolrDocumentList(req, response);
} finally {
req.close();
SolrRequestInfo.clearRequestInfo();
}
}
public long getDocumentCountByParams(ModifiableSolrParams params) throws IOException, SolrException {
SolrQueryRequest req = this.request(params);
SolrQueryResponse response = null;
try {
response = this.query(req);
if (response == null) throw new IOException("response == null");
NamedList<?> nl = response.getValues();
ResultContext resultContext = (ResultContext) nl.get("response");
return resultContext == null ? 0 : resultContext.docs.matches();
} finally {
req.close();
SolrRequestInfo.clearRequestInfo();
}
}
private class DocListSearcher {
public SolrQueryRequest request;
public DocList response;

@ -158,14 +158,6 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
if (this.solr0 != null) this.solr0.deleteByQuery(querystring);
if (this.solr1 != null) this.solr1.deleteByQuery(querystring);
}
@Override
public boolean existsByQuery(final String query) throws IOException {
if ((solr0 != null && solr0.existsByQuery(query)) || (solr1 != null && solr1.existsByQuery(query))) {
return true;
}
return false;
}
@Override
public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException {
@ -283,6 +275,49 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
// TODO: combine both
return rsp1;
}
@Override
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams query) throws IOException, SolrException {
Integer count0 = query.getInt(CommonParams.ROWS);
int count = count0 == null ? 10 : count0.intValue();
Integer start0 = query.getInt(CommonParams.START);
int start = start0 == null ? 0 : start0.intValue();
if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList();
if (this.solr0 != null && this.solr1 == null) {
SolrDocumentList list = this.solr0.getDocumentListByParams(query);
return list;
}
if (this.solr1 != null && this.solr0 == null) {
SolrDocumentList list = this.solr1.getDocumentListByParams(query);
return list;
}
// combine both lists
final SolrDocumentList l = this.solr0.getDocumentListByParams(query);
if (l.size() >= count) return l;
// at this point we need to know how many results are in solr0
// compute this with a very bad hack; replace with better method later
int size0 = 0;
{ //bad hack - TODO: replace
query.set(CommonParams.START, 0);
query.set(CommonParams.ROWS, Integer.MAX_VALUE);
final SolrDocumentList lHack = this.solr0.getDocumentListByParams(query);
query.set(CommonParams.START, start);
query.set(CommonParams.ROWS, count);
size0 = lHack.size();
}
// now use the size of the first query to do a second query
query.set(CommonParams.START, start + l.size() - size0);
query.set(CommonParams.ROWS, count - l.size());
final SolrDocumentList l1 = this.solr1.getDocumentListByParams(query);
query.set(CommonParams.START, start);
query.set(CommonParams.ROWS, count);
// TODO: combine both
return l1;
}
@Override
public long getCountByQuery(final String querystring) throws IOException {

@ -98,7 +98,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
public void deleteByQuery(final String querystring) throws IOException;
/**
* check if a given document, identified by url hash as ducument id exists
* check if a given document, identified by url hash as document id exists
* @param id the url hash and document id
* @return true if any entry in solr exists
* @throws IOException
@ -112,14 +112,6 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
* @throws IOException
*/
public Set<String> existsByIds(Set<String> ids) throws IOException;
/**
* check if a given document exists in solr
* @param solrquery
* @return true if any entry in solr exists
* @throws IOException
*/
public boolean existsByQuery(final String solrquery) throws IOException;
/**
* add a solr input document
@ -147,12 +139,32 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException;
/**
* get a query response from solr
* get a "full" query response from solr. Please compare to getSolrDocumentListByParams which may be much more efficient
* @param query
* @throws IOException
*/
public QueryResponse getResponseByParams(final ModifiableSolrParams query) throws IOException, SolrException;
/**
* get the solr document list from a query response
* This differs from getResponseByParams in such a way that it does only create the fields of the response but
* never search snippets and there are also no facets generated.
* @param params
* @return
* @throws IOException
* @throws SolrException
*/
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException, SolrException;
/**
* get the number of results for a query response
* @param params
* @return
* @throws IOException
* @throws SolrException
*/
public long getDocumentCountByParams(ModifiableSolrParams params) throws IOException, SolrException;
/**
* get a query result from solr
* to get all results set the query String to "*:*"

@ -30,8 +30,11 @@ import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
@ -41,6 +44,7 @@ import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
@ -285,6 +289,35 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
}
}
}
/**
* get the solr document list from a query response
* This differs from getResponseByParams in such a way that it does only create the fields of the response but
* never search snippets and there are also no facets generated.
* @param params
* @return
* @throws IOException
* @throws SolrException
*/
@Override
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException, SolrException {
if (this.server == null) throw new IOException("server disconnected");
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
String q = params.get("q");
String threadname = Thread.currentThread().getName();
if (q != null) Thread.currentThread().setName("solr query: q = " + q);
QueryResponse rsp;
try {
rsp = this.server.query(params);
if (q != null) Thread.currentThread().setName(threadname);
if (rsp != null) if (log.isFine()) log.fine(rsp.getResults().getNumFound() + " results for q=" + q);
return rsp.getResults();
} catch (final SolrServerException e) {
throw new SolrException(ErrorCode.UNKNOWN, e);
} catch (final Throwable e) {
throw new IOException("Error executing query", e);
}
}
public Collection<FieldInfo> getFields() throws SolrServerException {
// get all fields contained in index

@ -638,7 +638,7 @@ public class Domains {
host = host.toLowerCase().trim();
// try to simply parse the address
InetAddress ip = parseInetAddress(host);
InetAddress ip = InetAddress.getByName(host);
if (ip != null) return ip;
// trying to resolve host by doing a name cache lookup
@ -716,8 +716,14 @@ public class Domains {
if (host0 == null || host0.isEmpty()) return null;
final String host = host0.toLowerCase().trim();
// try to simply parse the address
InetAddress ip = parseInetAddress(host);
if (ip != null) return ip;
InetAddress ip;
try {
ip = InetAddress.getByName(host);
return ip;
} catch (UnknownHostException e1) {
// we expected that InetAddress.getByName may fail if this is not a raw address.
// We silently ignore this and go on.
}
/*
if (MemoryControl.shortStatus()) {
@ -834,30 +840,6 @@ public class Domains {
NAME_CACHE_MISS.clear();
} catch (final IOException e) {}
}
public static final InetAddress parseInetAddress(String ip) {
if (ip == null || ip.length() < 8) return null;
ip = ip.trim();
if (ip.charAt(0) == '[' && ip.charAt(ip.length() - 1) == ']') ip = ip.substring(1, ip.length() - 1);
if ("localhost".equals(ip)) ip = "127.0.0.1"; // normalize to IPv4 here since that is the way to calculate the InetAddress
final String[] ips = CommonPattern.DOT.split(ip);
if (ips.length != 4) return null; // TODO: parse IPv6 addresses
final byte[] ipb = new byte[4];
try {
ipb[0] = (byte) Integer.parseInt(ips[0]);
ipb[1] = (byte) Integer.parseInt(ips[1]);
ipb[2] = (byte) Integer.parseInt(ips[2]);
ipb[3] = (byte) Integer.parseInt(ips[3]);
} catch (final NumberFormatException e) {
return null;
}
try {
return InetAddress.getByAddress(ipb);
} catch (final UnknownHostException e) {
return null;
}
}
/**
* Returns the number of entries in the nameCacheHit map

@ -58,6 +58,8 @@ public class RequestHeader extends HeaderFramework {
HTML, JSON, XML
}
private Date date_cache_IfModifiedSince = null;
public RequestHeader() {
super();
}
@ -86,8 +88,11 @@ public class RequestHeader extends HeaderFramework {
return url.getHost();
}
public Date ifModifiedSince() {
return headerDate(IF_MODIFIED_SINCE);
if (this.date_cache_IfModifiedSince != null) return date_cache_IfModifiedSince;
this.date_cache_IfModifiedSince = headerDate(RequestHeader.IF_MODIFIED_SINCE);
return this.date_cache_IfModifiedSince;
}
public Object ifRange() {

@ -38,6 +38,10 @@ public class ResponseHeader extends HeaderFramework {
private static final long serialVersionUID = 0L;
private static final ConcurrentLog log = new ConcurrentLog(ResponseHeader.class.getName());
private Date date_cache_Date = null;
private Date date_cache_Expires = null;
private Date date_cache_LastModified = null;
public ResponseHeader(final int statusCode) {
super();
this.put(HeaderFramework.STATUS_CODE, Integer.toString(statusCode));
@ -69,21 +73,27 @@ public class ResponseHeader extends HeaderFramework {
return 200;
}
}
public Date date() {
if (this.date_cache_Date != null) return this.date_cache_Date;
final Date d = headerDate(HeaderFramework.DATE);
final Date now = new Date();
return (d == null) ? now : d.after(now) ? now : d;
this.date_cache_Date = (d == null) ? now : d.after(now) ? now : d;
return this.date_cache_Date;
}
public Date expires() {
return headerDate(EXPIRES);
if (this.date_cache_Expires != null) return this.date_cache_Expires;
this.date_cache_Expires = headerDate(HeaderFramework.EXPIRES);
return this.date_cache_Expires;
}
public Date lastModified() {
final Date d = headerDate(LAST_MODIFIED);
if (this.date_cache_LastModified != null) return this.date_cache_LastModified;
final Date d = headerDate(HeaderFramework.LAST_MODIFIED);
final Date now = new Date();
return (d == null) ? date() : d.after(now) ? now : d;
this.date_cache_LastModified = (d == null) ? date() : d.after(now) ? now : d;
return this.date_cache_LastModified;
}
public long age() {
@ -94,8 +104,8 @@ public class ResponseHeader extends HeaderFramework {
}
public boolean gzip() {
return ((containsKey(CONTENT_ENCODING)) &&
((get(CONTENT_ENCODING)).toUpperCase().startsWith("GZIP")));
return ((containsKey(HeaderFramework.CONTENT_ENCODING)) &&
((get(HeaderFramework.CONTENT_ENCODING)).toUpperCase().startsWith("GZIP")));
}
public static Object[] parseResponseLine(final String respLine) {

@ -392,13 +392,14 @@ public final class CrawlStacker {
final String urlstring = url.toString();
// check if the url is double registered
String urlhash = ASCII.String(url.hash());
final HarvestProcess dbocc = this.nextQueue.exists(url.hash()); // returns the name of the queue if entry exists
final Date oldDate = this.indexSegment.fulltext().getLoadDate(ASCII.String(url.hash()));
final Date oldDate = this.indexSegment.fulltext().getLoadDate(urlhash); // TODO: combine the exists-query with this one
if (oldDate == null) {
if (dbocc != null) {
// do double-check
if (dbocc == HarvestProcess.ERRORS) {
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(urlhash);
return "double in: errors (" + errorEntry.getFailReason() + ")";
}
return "double in: " + dbocc.toString();
@ -414,7 +415,7 @@ public final class CrawlStacker {
return "double in: LURL-DB, oldDate = " + oldDate.toString();
}
if (dbocc == HarvestProcess.ERRORS) {
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(urlhash);
if (CrawlStacker.log.isInfo()) CrawlStacker.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.getFailReason());
return "double in: errors (" + errorEntry.getFailReason() + "), oldDate = " + oldDate.toString();
}

@ -60,7 +60,7 @@ public class YMarkDMOZImporter extends YMarkImporter {
}
public void setDepth(int d) {
this.depth = d + this.targetFolder.split(YMarkUtil.FOLDERS_SEPARATOR).length-1;
this.depth = d + YMarkUtil.FOLDERS_SEPARATOR_PATTERN.split(this.targetFolder).length-1;
}
public class DMOZParser extends DefaultHandler {

@ -259,7 +259,7 @@ public class YMarkTables {
while(bit.hasNext()) {
bmk_row = bit.next();
if(bmk_row.containsKey(YMarkEntry.BOOKMARK.FOLDERS.key())) {
final String[] folderArray = (new String(bmk_row.get(YMarkEntry.BOOKMARK.FOLDERS.key()),"UTF8")).split(YMarkUtil.TAGS_SEPARATOR);
final String[] folderArray = YMarkUtil.TAGS_SEPARATOR_PATTERN.split(new String(bmk_row.get(YMarkEntry.BOOKMARK.FOLDERS.key()),"UTF8"));
for (final String folder : folderArray) {
if(folder.length() > root.length() && folder.substring(0, root.length()+1).equals(r)) {
if(!folders.contains(folder)) {

@ -29,6 +29,7 @@ package net.yacy.data.ymark;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.regex.Pattern;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
@ -40,6 +41,9 @@ public class YMarkUtil {
public final static String SPACE = " ";
public final static String EMPTY_STRING = new String();
public final static Pattern TAGS_SEPARATOR_PATTERN = Pattern.compile(TAGS_SEPARATOR);
public final static Pattern FOLDERS_SEPARATOR_PATTERN = Pattern.compile(FOLDERS_SEPARATOR);
/**
* conveniance function to generate url hashes for YMark bookmarks
* @param url a string representation of a valid url

@ -1006,13 +1006,14 @@ public final class Protocol {
final int offset,
final int count,
Seed target,
final int partitions,
final Blacklist blacklist) {
if (event.query.getQueryGoal().getOriginalQueryString(false) == null || event.query.getQueryGoal().getOriginalQueryString(false).length() == 0) {
return -1; // we cannot query solr only with word hashes, there is no clear text string
}
event.addExpectedRemoteReferences(count);
if (partitions > 0) solrQuery.set("partitions", partitions);
solrQuery.setStart(offset);
solrQuery.setRows(count);

@ -165,29 +165,30 @@ public class RemoteSearch extends Thread {
nodePeers.add(s);
}
}
// start solr searches
if (Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_DHT_TESTLOCAL, false)) {
dhtPeers.clear();
dhtPeers.add(event.peers.mySeed());
}
if (Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_TESTLOCAL, false)) {
nodePeers.clear();
nodePeers.add(event.peers.mySeed());
}
// start solr searches
final int targets = dhtPeers.size() + nodePeers.size();
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_OFF, false)) {
final SolrQuery solrQuery = event.query.solrQuery(event.getQuery().contentdom, start == 0, event.excludeintext_image);
for (Seed s: nodePeers) {
Thread t = solrRemoteSearch(event, solrQuery, start, count, s, blacklist);
Thread t = solrRemoteSearch(event, solrQuery, start, count, s, targets, blacklist);
event.nodeSearchThreads.add(t);
}
}
if (Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_DHT_TESTLOCAL, false)) {
dhtPeers.clear();
dhtPeers.add(event.peers.mySeed());
}
// start search to YaCy DHT peers
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_DHT_OFF, false)) {
final int targets = dhtPeers.size();
if (targets == 0) return;
for (int i = 0; i < targets; i++) {
for (int i = 0; i < dhtPeers.size(); i++) {
if (dhtPeers.get(i) == null || dhtPeers.get(i).hash == null) continue;
try {
RemoteSearch rs = new RemoteSearch(
@ -271,6 +272,7 @@ public class RemoteSearch extends Thread {
final int start,
final int count,
final Seed targetPeer,
final int partitions,
final Blacklist blacklist) {
assert solrQuery != null;
@ -290,6 +292,7 @@ public class RemoteSearch extends Thread {
start,
count,
targetPeer,
partitions,
blacklist);
if (urls >= 0) {
// urls is an array of url hashes. this is only used for log output

@ -303,12 +303,17 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
if ( ipport == null ) {
return;
}
final int p = ipport.indexOf(':');
final int p = ipport.lastIndexOf(':');
if ( p < 0 ) {
this.alternativeIP = ipport;
} else {
this.alternativeIP = ipport.substring(0, p);
}
if (this.alternativeIP.charAt(0) == '[' && this.alternativeIP.charAt(this.alternativeIP.length() - 1) == ']') {
// IPv6 patch
this.alternativeIP = this.alternativeIP.substring(1, this.alternativeIP.length() - 1);
}
}
/**
@ -574,19 +579,26 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
*/
public final String getPublicAddress() {
String ip = getIP();
if ( ip == null || ip.length() < 8 || ip.length() > 60 ) {
ip = Domains.LOCALHOST;
}
if (ip == null) ip = Domains.LOCALHOST; // that should not happen
final String port = this.dna.get(Seed.PORT);
if ( port == null || port.length() < 2 || port.length() > 5 ) {
return null;
}
final StringBuilder sb = new StringBuilder(ip.length() + port.length() + 1);
sb.append(ip);
sb.append(':');
sb.append(port);
final StringBuilder sb = new StringBuilder(ip.length() + port.length() + 3);
if (ip.indexOf(':') >= 0) {
// IPv6 Address!, see: http://en.wikipedia.org/wiki/IPv6_address#Literal_IPv6_addresses_in_network_resource_identifiers
sb.append('[');
sb.append(ip);
sb.append(']');
sb.append(':');
sb.append(port);
} else {
sb.append(ip);
sb.append(':');
sb.append(port);
}
return sb.toString();
}
@ -604,11 +616,24 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
}
final String port = this.dna.get(Seed.PORT);
if ( (port == null) || (port.length() < 2) ) {
if ( port == null || port.length() < 2 || port.length() > 5 ) {
return null;
}
return this.alternativeIP + ":" + port;
final StringBuilder sb = new StringBuilder(this.alternativeIP.length() + port.length() + 3);
if (this.alternativeIP.indexOf(':') >= 0) {
// IPv6 Address!, see: http://en.wikipedia.org/wiki/IPv6_address#Literal_IPv6_addresses_in_network_resource_identifiers
sb.append('[');
sb.append(this.alternativeIP);
sb.append(']');
sb.append(':');
sb.append(port);
} else {
sb.append(this.alternativeIP);
sb.append(':');
sb.append(port);
}
return sb.toString();
}
/**
@ -618,7 +643,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
return Domains.dnsResolve(getIP());
}
/** @return the portnumber of this seed or <code>-1</code> if not present */
/** @return the port number of this seed or <code>-1</code> if not present */
public final int getPort() {
final String port = this.dna.get(Seed.PORT);
if ( port == null ) {

@ -717,7 +717,7 @@ public final class SeedDB implements AlternativeDomainNames {
try {
pw = new PrintWriter(new BufferedWriter(new FileWriter(seedFile)));
List<Seed> seedlist = getSeedlist(Integer.MAX_VALUE, addMySeed);
List<Seed> seedlist = getSeedlist(Integer.MAX_VALUE, addMySeed, false, 0.0f);
String line;
for (Seed seed: seedlist) {
line = seed.genSeedStr(null);
@ -731,7 +731,7 @@ public final class SeedDB implements AlternativeDomainNames {
return v;
}
public ArrayList<Seed> getSeedlist(int maxcount, boolean addMySeed) {
public ArrayList<Seed> getSeedlist(int maxcount, boolean addMySeed, boolean nodeonly, float minversion) {
final ArrayList<Seed> v = new ArrayList<Seed>(this.seedActiveDB.size() + 1000);
// store own peer seed
@ -739,10 +739,10 @@ public final class SeedDB implements AlternativeDomainNames {
// store active peer seeds
Seed ys;
Iterator<Seed> se = this.seedsConnected(true, false, null, (float) 0.0);
Iterator<Seed> se = this.seedsConnected(true, false, null, minversion);
while (se.hasNext() && v.size() < maxcount) {
ys = se.next();
if (ys != null) v.add(ys);
if (ys != null && (!nodeonly || ys.getFlagRootNode())) v.add(ys);
}
// store some of the not-so-old passive peer seeds (limit: 1 day)
@ -750,7 +750,7 @@ public final class SeedDB implements AlternativeDomainNames {
final long timeout = System.currentTimeMillis() - (1000L * 60L * 60L * 24L);
while (se.hasNext() && v.size() < maxcount) {
ys = se.next();
if (ys != null && ys.getLastSeenUTC() >= timeout) v.add(ys);
if (ys != null && ys.getLastSeenUTC() >= timeout && (!nodeonly || ys.getFlagRootNode())) v.add(ys);
}
final StringBuilder encoded = new StringBuilder(1024);

@ -30,10 +30,10 @@ import java.util.Set;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
@ -50,12 +50,12 @@ public class ErrorCache {
private static final int maxStackSize = 1000;
// the class object
private final Map<String, CollectionConfiguration.FailDoc> stack;
private final Map<String, CollectionConfiguration.FailDoc> cache;
private final Fulltext fulltext;
public ErrorCache(final Fulltext fulltext) {
this.fulltext = fulltext;
this.stack = new LinkedHashMap<String, CollectionConfiguration.FailDoc>();
this.cache = new LinkedHashMap<String, CollectionConfiguration.FailDoc>();
try {
// fill stack with latest values
final SolrQuery params = new SolrQuery();
@ -64,28 +64,29 @@ public class ErrorCache {
params.setRows(100);
params.setFacet(false);
params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc));
params.setFacet(false);
params.setFields(CollectionSchema.id.getSolrFieldName());
params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
QueryResponse rsp = fulltext.getDefaultConnector().getResponseByParams(params);
SolrDocumentList docList = rsp == null ? null : rsp.getResults();
params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given
SolrDocumentList docList = fulltext.getDefaultConnector().getDocumentListByParams(params);
if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) {
CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc(docList.get(i));
this.stack.put(ASCII.String(failDoc.getDigestURL().hash()), failDoc);
SolrDocument doc = docList.get(i);
String hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
this.cache.put(hash, null);
}
} catch (final Throwable e) {
}
}
public void clear() throws IOException {
if (this.stack != null) synchronized (this.stack) {this.stack.clear();}
if (this.cache != null) synchronized (this.cache) {this.cache.clear();}
this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
}
public void removeHosts(final Set<String> hosthashes) {
if (hosthashes == null || hosthashes.size() == 0) return;
this.fulltext.deleteDomainErrors(hosthashes);
synchronized (this.stack) {
Iterator<String> i = ErrorCache.this.stack.keySet().iterator();
synchronized (this.cache) {
Iterator<String> i = ErrorCache.this.cache.keySet().iterator();
while (i.hasNext()) {
String b = i.next();
if (hosthashes.contains(b)) i.remove();
@ -105,9 +106,6 @@ public class ErrorCache {
url, profile == null ? null : profile.collections(),
failCategory.name() + " " + reason, failCategory.failType,
httpcode);
synchronized (this.stack) {
this.stack.put(ASCII.String(url.hash()), failDoc);
}
if (this.fulltext.getDefaultConnector() != null && failCategory.store) {
// send the error to solr
try {
@ -116,38 +114,57 @@ public class ErrorCache {
} catch (final IOException e) {
ConcurrentLog.warn("SOLR", "failed to send error " + url.toNormalform(true) + " to solr: " + e.getMessage());
}
synchronized (this.cache) {
this.cache.put(ASCII.String(url.hash()), null);
}
} else {
synchronized (this.cache) {
this.cache.put(ASCII.String(url.hash()), failDoc);
}
}
checkStackSize();
}
private void checkStackSize() {
synchronized (this.stack) {
int dc = this.stack.size() - maxStackSize;
synchronized (this.cache) {
int dc = this.cache.size() - maxStackSize;
if (dc > 0) {
Collection<String> d = new ArrayList<String>();
Iterator<String> i = this.stack.keySet().iterator();
Iterator<String> i = this.cache.keySet().iterator();
while (dc-- > 0 && i.hasNext()) d.add(i.next());
for (String s: d) this.stack.remove(s);
for (String s: d) this.cache.remove(s);
}
}
}
public ArrayList<CollectionConfiguration.FailDoc> list(int max) {
final ArrayList<CollectionConfiguration.FailDoc> l = new ArrayList<CollectionConfiguration.FailDoc>();
synchronized (this.stack) {
Iterator<CollectionConfiguration.FailDoc> fdi = this.stack.values().iterator();
for (int i = 0; i < this.stack.size() - max; i++) fdi.next();
while (fdi.hasNext()) l.add(fdi.next());
synchronized (this.cache) {
Iterator<Map.Entry<String, CollectionConfiguration.FailDoc>> hi = this.cache.entrySet().iterator();
for (int i = 0; i < this.cache.size() - max; i++) hi.next();
while (hi.hasNext()) {
try {
Map.Entry<String, CollectionConfiguration.FailDoc> entry = hi.next();
String hash = entry.getKey();
CollectionConfiguration.FailDoc failDoc = entry.getValue();
if (failDoc == null) {
SolrDocument doc = this.fulltext.getDefaultConnector().getDocumentById(hash);
if (doc != null) failDoc = new CollectionConfiguration.FailDoc(doc);
}
if (failDoc != null) l.add(failDoc);
} catch (IOException e) {
}
}
}
return l;
}
public CollectionConfiguration.FailDoc get(final String urlhash) {
CollectionConfiguration.FailDoc fd;
synchronized (this.stack) {
fd = this.stack.get(urlhash);
CollectionConfiguration.FailDoc failDoc = null;
synchronized (this.cache) {
failDoc = this.cache.get(urlhash);
}
if (fd != null) return fd;
if (failDoc != null) return failDoc;
try {
SolrDocument doc = this.fulltext.getDefaultConnector().getDocumentById(urlhash);
if (doc == null) return null;
@ -160,23 +177,28 @@ public class ErrorCache {
public boolean exists(final byte[] urlHash) {
try {
return this.fulltext.getDefaultConnector().existsByQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + ASCII.String(urlHash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
final SolrDocument doc = this.fulltext.getDefaultConnector().getDocumentById(ASCII.String(urlHash), CollectionSchema.failreason_s.getSolrFieldName());
if (doc == null) return false;
// check if the document contains a value in the field CollectionSchema.failreason_s
Object failreason = doc.getFieldValue(CollectionSchema.failreason_s.getSolrFieldName());
return failreason != null && failreason.toString().length() > 0;
} catch (IOException e) {
return false;
}
}
public void clearStack() {
synchronized (this.stack) {
this.stack.clear();
synchronized (this.cache) {
this.cache.clear();
}
}
public int stackSize() {
synchronized (this.stack) {
return this.stack.size();
synchronized (this.cache) {
return this.cache.size();
}
}
}

@ -287,7 +287,7 @@ public final class SearchEvent {
// start a local solr search
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, true, this.excludeintext_image), 0, this.query.itemsPerPage, null /*this peer*/, Switchboard.urlBlacklist);
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, true, this.excludeintext_image), 0, this.query.itemsPerPage, null /*this peer*/, 0, Switchboard.urlBlacklist);
}
this.localsolroffset = this.query.itemsPerPage;
@ -1375,7 +1375,7 @@ public final class SearchEvent {
int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join();} catch (final InterruptedException e) {}}
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, nextitems, null /*this peer*/, Switchboard.urlBlacklist);
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, nextitems, null /*this peer*/, 0, Switchboard.urlBlacklist);
}
this.localsolroffset += nextitems;
}
@ -1396,7 +1396,7 @@ public final class SearchEvent {
if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) {
// at the end of a list, trigger a next solr search
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, this.query.itemsPerPage, null /*this peer*/, Switchboard.urlBlacklist);
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, this.query.itemsPerPage, null /*this peer*/, 0, Switchboard.urlBlacklist);
}
this.localsolroffset += this.query.itemsPerPage;
}

@ -1365,7 +1365,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
}
this.collections = new HashMap<String, Pattern>();
Collection<Object> c = doc.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName());
for (Object cn: c) this.collections.put((String) cn, QueryParams.catchall_pattern);
if (c != null) for (Object cn: c) if (cn != null) this.collections.put((String) cn, QueryParams.catchall_pattern);
this.failReason = (String) doc.getFieldValue(CollectionSchema.failreason_s.getSolrFieldName());
this.failType = FailType.valueOf((String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName()));
this.httpstatus = (Integer) doc.getFieldValue(CollectionSchema.httpstatus_i.getSolrFieldName());

Loading…
Cancel
Save