Merge origin/master into jetty

pull/1/head
reger 12 years ago
commit b38de92a16

@ -3,7 +3,7 @@ javacSource=1.6
javacTarget=1.6
# Release Configuration
releaseVersion=1.64
releaseVersion=1.65
stdReleaseFile=yacy${branch}_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
sourceReleaseFile=yacy_src_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy

@ -399,7 +399,7 @@ public class HostBrowser {
} else {
// this is a folder
prop.put("files_list_" + c + "_type", 1);
prop.put("files_list_" + c + "_type_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
int linked = ((int[]) entry.getValue())[0];
int stored = ((int[]) entry.getValue())[1];
int crawler = ((int[]) entry.getValue())[2];
@ -441,7 +441,9 @@ public class HostBrowser {
// maybe this is only in the errorURL
prop.put("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS ? sb.crawlQueues.errorURL.get(ASCII.String(uri.hash())).getFailReason() : "unknown error");
} else {
prop.put("files_list_" + c + "_type_stored_error", failType == FailType.excl ? "excluded from indexing" : "load fail");
String ids = ASCII.String(uri.hash());
InfoCacheEntry ice = infoCache.get(ids);
prop.put("files_list_" + c + "_type_stored_error", failType == FailType.excl ? "excluded from indexing" : "load fail; " + ice.toString());
}
}
if (loadRight) {

@ -115,7 +115,7 @@ public class GitRevTask extends org.apache.tools.ant.Task {
} else {
gitRevTask.setRepoPath(args[0]);
}
gitRevTask.setBranchprop("brnach");
gitRevTask.setBranchprop("branch");
gitRevTask.setRevprop("baseRevisionNr");
gitRevTask.setDateprop("DSTAMP");

@ -70,7 +70,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
catchSuccessQuery.setRows(0);
catchSuccessQuery.setStart(0);
}
private final static int pagesize = 100;
protected final static int pagesize = 100;
@Override
public boolean existsByQuery(final String query) throws IOException {
@ -83,10 +83,10 @@ public abstract class AbstractSolrConnector implements SolrConnector {
}
@Override
public Object getFieldById(final String key, final String field) throws IOException {
public String getFieldById(final String key, final String field) throws IOException {
SolrDocument doc = getDocumentById(key, field);
if (doc == null) return null;
return doc.getFieldValue(field);
return doc.getFieldValue(field).toString();
}
/**
@ -329,11 +329,11 @@ public abstract class AbstractSolrConnector implements SolrConnector {
}
@Override
public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException {
public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException {
final SolrQuery query = new SolrQuery();
assert key.length() == 12;
assert id.length() == 12;
// construct query
query.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + key);
query.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id);
query.clearSorts();
query.setRows(1);
query.setStart(0);

@ -377,11 +377,11 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
}
@Override
public Object getFieldById(String id, String field) throws IOException {
public String getFieldById(String id, String field) throws IOException {
if (existIdFromDeleteQueue(id)) return null;
SolrInputDocument doc = getFromUpdateQueue(id);
if (doc != null) {cacheSuccessSign(); return doc.getFieldValue(field);}
Object val = this.connector.getFieldById(id, field);
if (doc != null) {cacheSuccessSign(); return doc.getFieldValue(field).toString();}
String val = this.connector.getFieldById(id, field);
if (val != null) updateIdCache(id);
return val;
}

@ -22,12 +22,20 @@
package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrException;
@ -40,13 +48,20 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
public class EmbeddedSolrConnector extends SolrServerConnector implements SolrConnector {
static Set<String> SOLR_ID_FIELDS = new HashSet<String>();
static {
SOLR_ID_FIELDS.add(CollectionSchema.id.getSolrFieldName());
}
public static final String SELECT = "/select";
public static final String CONTEXT = "/solr";
@ -128,7 +143,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
NamedList<Object> responseHeader = new SimpleOrderedMap<Object>();
responseHeader.add("params", req.getOriginalParams().toNamedList());
rsp.add("responseHeader", responseHeader);
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
//SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
// send request to solr and create a result
this.requestHandler.handleRequest(req, rsp);
@ -144,6 +159,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
return rsp;
}
/**
* the usage of getResponseByParams is disencouraged for the embedded Solr connector. Please use request(SolrParams) instead.
* Reason: Solr makes a very complex folding/unfolding including data compression for SolrQueryResponses.
*/
@Override
public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException {
if (this.server == null) throw new IOException("server disconnected");
@ -164,4 +183,125 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
}
}
private class DocListSearcher {
public SolrQueryRequest request;
public DocList response;
public DocListSearcher(final String querystring, final int offset, final int count, final String ... fields) {
// construct query
final SolrQuery params = new SolrQuery();
params.setQuery(querystring);
params.setRows(count);
params.setStart(offset);
params.setFacet(false);
params.clearSorts();
if (fields.length > 0) params.setFields(fields);
params.setIncludeScore(false);
// query the server
this.request = request(params);
SolrQueryResponse rsp = query(request);
this.response = ((ResultContext) rsp.getValues().get("response")).docs;
}
public void close() {
if (this.request != null) this.request.close();
this.request = null;
this.response = null;
}
}
@Override
public long getCountByQuery(String querystring) {
DocListSearcher docListSearcher = new DocListSearcher(querystring, 0, 0, CollectionSchema.id.getSolrFieldName());
int numFound = docListSearcher.response.matches();
docListSearcher.close();
return numFound;
}
@Override
public boolean existsById(String id) {
return getCountByQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id) > 0;
}
@Override
public Set<String> existsByIds(Collection<String> ids) {
if (ids == null || ids.size() == 0) return new HashSet<String>();
if (ids.size() == 1 && ids instanceof Set) return existsById(ids.iterator().next()) ? (Set<String>) ids : new HashSet<String>();
StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
for (String id: ids) {
sb.append("({!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id).append(") OR ");
}
if (sb.length() > 0) sb.setLength(sb.length() - 4); // cut off the last 'or'
DocListSearcher docListSearcher = new DocListSearcher(sb.toString(), 0, ids.size(), CollectionSchema.id.getSolrFieldName());
//int numFound = docListSearcher.response.matches();
int responseCount = docListSearcher.response.size();
SolrIndexSearcher searcher = docListSearcher.request.getSearcher();
DocIterator iterator = docListSearcher.response.iterator();
HashSet<String> idsr = new HashSet<String>();
try {
for (int i = 0; i < responseCount; i++) {
Document doc = searcher.doc(iterator.nextDoc(), SOLR_ID_FIELDS);
idsr.add(doc.get(CollectionSchema.id.getSolrFieldName()));
}
} catch (IOException e) {
} finally {
docListSearcher.close();
}
// construct a new id list from that
return idsr;
}
@Override
public String getFieldById(final String id, final String field) throws IOException {
DocListSearcher docListSearcher = new DocListSearcher("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, 0, 1, CollectionSchema.id.getSolrFieldName());
int numFound = docListSearcher.response.matches();
if (numFound == 0) return null;
Set<String> solrFields = new HashSet<String>();
solrFields.add(field);
try {
Document doc = docListSearcher.request.getSearcher().doc(docListSearcher.response.iterator().nextDoc(), solrFields);
return doc.get(field);
} catch (IOException e) {
e.printStackTrace();
} finally {
docListSearcher.close();
}
return null;
}
@Override
public BlockingQueue<String> concurrentIDsByQuery(final String querystring, final int offset, final int maxcount, final long maxtime) {
final BlockingQueue<String> queue = new LinkedBlockingQueue<String>();
final long endtime = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; // we know infinity!
final Thread t = new Thread() {
@Override
public void run() {
int o = offset;
while (System.currentTimeMillis() < endtime) {
try {
DocListSearcher docListSearcher = new DocListSearcher(querystring, o, pagesize, CollectionSchema.id.getSolrFieldName());
int responseCount = docListSearcher.response.size();
SolrIndexSearcher searcher = docListSearcher.request.getSearcher();
DocIterator iterator = docListSearcher.response.iterator();
try {
for (int i = 0; i < responseCount; i++) {
Document doc = searcher.doc(iterator.nextDoc(), SOLR_ID_FIELDS);
try {queue.put(doc.get(CollectionSchema.id.getSolrFieldName()));} catch (final InterruptedException e) {break;}
}
} catch (IOException e) {
} finally {
docListSearcher.close();
}
if (responseCount < pagesize) break;
o += pagesize;
} catch (final SolrException e) {
break;
}
}
try {queue.put(AbstractSolrConnector.POISON_ID);} catch (final InterruptedException e1) {}
}
};
t.start();
return queue;
}
}

@ -23,7 +23,10 @@ package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
import net.yacy.cora.sorting.ReversibleScoreMap;
@ -338,4 +341,41 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return s;
}
@Override
public boolean existsById(String id) throws IOException {
return (this.solr0 != null && this.solr0.existsById(id)) || (this.solr1 != null && this.solr1.existsById(id));
}
@Override
public Set<String> existsByIds(Collection<String> ids) throws IOException {
if (this.solr0 != null && this.solr1 == null) return this.solr0.existsByIds(ids);
if (this.solr0 == null && this.solr1 != null) return this.solr1.existsByIds(ids);
Set<String> s = new HashSet<String>();
s.addAll(this.solr0.existsByIds(ids));
s.addAll(this.solr1.existsByIds(ids));
return s;
}
@Override
public String getFieldById(String key, String field) throws IOException {
if (this.solr0 != null && this.solr1 == null) return this.solr0.getFieldById(key, field);
if (this.solr0 == null && this.solr1 != null) return this.solr1.getFieldById(key, field);
String value = this.solr0.getFieldById(key, field);
if (value != null) return value;
return this.solr1.getFieldById(key, field);
}
/*
@Override
public BlockingQueue<SolrDocument> concurrentDocumentsByQuery(String querystring, int offset, int maxcount, long maxtime, int buffersize, String... fields) {
return null;
}
*/
@Override
public BlockingQueue<String> concurrentIDsByQuery(String querystring, int offset, int maxcount, long maxtime) {
if (this.solr0 != null && this.solr1 == null) return this.solr0.concurrentIDsByQuery(querystring, offset, maxcount, maxtime);
if (this.solr0 == null && this.solr1 != null) return this.solr1.concurrentIDsByQuery(querystring, offset, maxcount, maxtime);
return super.concurrentIDsByQuery(querystring, offset, maxcount, maxtime);
}
}

@ -139,7 +139,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
* @return one result or null if no result exists
* @throws IOException
*/
public Object getFieldById(final String key, final String field) throws IOException;
public String getFieldById(final String key, final String field) throws IOException;
/**
* get a document from solr by given key for the id-field

@ -166,6 +166,10 @@ public class EmbeddedInstance implements SolrInstance {
return this.containerPath;
}
public CoreContainer getCoreContainer() {
return this.coreContainer;
}
@Override
public String getDefaultCoreName() {
return this.defaultCoreName;

@ -310,7 +310,9 @@ public final class Fulltext {
if (urlHash == null) return null;
Date x;
try {
x = (Date) this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.load_date_dt.getSolrFieldName());
String d = this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.load_date_dt.getSolrFieldName());
if (d == null) return null;
x = new Date(Long.parseLong(d));
} catch (final IOException e) {
return null;
}
@ -322,7 +324,7 @@ public final class Fulltext {
String x;
try {
x = (String) this.getDefaultConnector().getFieldById(ASCII.String(urlHash), CollectionSchema.sku.getSolrFieldName());
x = this.getDefaultConnector().getFieldById(ASCII.String(urlHash), CollectionSchema.sku.getSolrFieldName());
} catch (final IOException e) {
return null;
}
@ -642,7 +644,7 @@ public final class Fulltext {
public String failReason(final String urlHash) throws IOException {
if (urlHash == null) return null;
String reason = (String) this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.failreason_s.getSolrFieldName());
String reason = this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.failreason_s.getSolrFieldName());
if (reason == null) return null;
return reason.length() == 0 ? null : reason;
}

@ -327,7 +327,8 @@ public class Segment {
this.externalHosts = new RowHandleSet(6, Base64Order.enhancedCoder, 0);
this.internalIDs = new RowHandleSet(12, Base64Order.enhancedCoder, 0);
this.externalIDs = new RowHandleSet(12, Base64Order.enhancedCoder, 0);
if (Segment.this.fulltext.writeToWebgraph()) {
boolean useWebgraph = Segment.this.fulltext.writeToWebgraph();
if (useWebgraph) {
// reqd the references from the webgraph
SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
webgraph.commit(true);
@ -354,7 +355,8 @@ public class Segment {
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
}
} else if (connectedCitation()) {
}
if ((!useWebgraph || (internalIDs.size() == 0 && externalIDs.size() == 0)) && connectedCitation()) {
// read the references from the citation index
ReferenceContainer<CitationReference> references;
references = urlCitation().get(id, null);

@ -1092,7 +1092,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
this.crt = new TreeMap<byte[], double[]>(Base64Order.enhancedCoder);
try {
// select all documents for each host
BlockingQueue<String> ids = connector.concurrentIDsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 0, 1000000, 600000);
BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, 0, 1000000, 600000);
String id;
while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) {
this.crt.put(ASCII.getBytes(id), new double[]{0.0d,0.0d}); //{old value, new value}

Loading…
Cancel
Save