fixed bug in solr error document

pull/1/head
Michael Peter Christen 13 years ago
parent cb54c1737b
commit f150bc218b

@ -82,8 +82,8 @@ public class CrawlQueues {
this.log.logConfig("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
}
public void relocate(final File newQueuePath) {
@ -94,8 +94,8 @@ public class CrawlQueues {
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
}
public synchronized void close() {

@ -37,6 +37,7 @@ import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.services.federated.solr.SolrDoc;
import net.yacy.cora.services.federated.solr.SolrShardingConnector;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
@ -49,6 +50,7 @@ import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.table.Table;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.index.SolrConfiguration;
import de.anomic.crawler.retrieval.Request;
public class ZURL implements Iterable<ZURL.Entry> {
@ -79,15 +81,18 @@ public class ZURL implements Iterable<ZURL.Entry> {
private Index urlIndex;
private final Queue<byte[]> stack;
private final SolrConnector solrConnector;
private final SolrConfiguration solrConfiguration;
public ZURL(
final SolrConnector solrConnector,
final SolrConfiguration solrConfiguration,
final File cachePath,
final String tablename,
final boolean startWithEmptyFile,
final boolean useTailCache,
final boolean exceed134217727) {
this.solrConnector = solrConnector;
this.solrConfiguration = solrConfiguration;
// creates a new ZURL in a file
cachePath.mkdirs();
final File f = new File(cachePath, tablename);
@ -109,8 +114,10 @@ public class ZURL implements Iterable<ZURL.Entry> {
this.stack = new LinkedBlockingQueue<byte[]>();
}
public ZURL(final SolrShardingConnector solrConnector) {
public ZURL(final SolrShardingConnector solrConnector,
final SolrConfiguration solrConfiguration) {
this.solrConnector = solrConnector;
this.solrConfiguration = solrConfiguration;
// creates a new ZUR in RAM
this.urlIndex = new RowSet(rowdef);
this.stack = new LinkedBlockingQueue<byte[]>();
@ -156,7 +163,8 @@ public class ZURL implements Iterable<ZURL.Entry> {
if (this.solrConnector != null && (failCategory == FailCategory.TEMPORARY_NETWORK_FAILURE || failCategory == FailCategory.FINAL_ROBOTS_RULE)) {
// send the error to solr
try {
this.solrConnector.err(bentry.url(), failCategory.name() + " " + reason, httpcode);
SolrDoc errorDoc = this.solrConfiguration.err(bentry.url(), failCategory.name() + " " + reason, httpcode);
this.solrConnector.add(errorDoc);
} catch (final IOException e) {
Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true, false) + " to solr: " + e.getMessage());
}

@ -28,8 +28,6 @@ import java.io.IOException;
import java.util.Collection;
import java.util.List;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -74,16 +72,6 @@ public interface SolrConnector {
public void add(final SolrDoc solrdoc) throws IOException, SolrException;
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException;
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException;
/**
* get a query result from solr
* to get all results set the query String to "*:*"

@ -5,8 +5,6 @@ import java.util.Collection;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -105,11 +103,6 @@ public class SolrMultipleConnector implements SolrConnector {
}
}
@Override
public void err(DigestURI digestURI, String failReason, int httpstatus) throws IOException {
this.solr.err(digestURI, failReason, httpstatus);
}
@Override
public SolrDocumentList get(String querystring, int offset, int count) throws IOException {
return this.solr.get(querystring, offset, count);

@ -28,8 +28,6 @@ import java.io.IOException;
import java.util.Collection;
import java.util.List;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -128,21 +126,6 @@ public class SolrRetryConnector implements SolrConnector {
for (SolrDoc d: solrdocs) add(d);
}
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
this.solrConnector.err(digestURI, failReason, httpstatus);
return;
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
continue;
}
if (ee != null) throw (ee instanceof IOException) ? (IOException) ee : new IOException(ee.getMessage());
}
@Override
public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;

@ -31,7 +31,6 @@ import java.util.Collection;
import java.util.List;
import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
@ -128,19 +127,6 @@ public class SolrShardingConnector implements SolrConnector {
for (final SolrDoc doc: docs) add(doc);
}
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
this.connectors.get(this.sharding.selectURL(digestURI.toNormalform(true, false))).err(digestURI, failReason, httpstatus);
}
/**
* get a query result from solr
* to get all results set the query String to "*:*"

@ -31,10 +31,8 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import org.apache.http.HttpHost;
@ -218,27 +216,6 @@ public class SolrSingleConnector implements SolrConnector {
}
}
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final SolrDoc solrdoc = new SolrDoc();
solrdoc.addField("id", ASCII.String(digestURI.hash()));
solrdoc.addField("sku", digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress();
if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());
// path elements of link
final String path = digestURI.getPath();
if (path != null) {
final String[] paths = path.split("/");
if (paths.length > 0) solrdoc.addField("attr_paths", paths);
}
solrdoc.addField("failreason_t", failReason);
solrdoc.addField("httpstatus_i", httpstatus);
add(solrdoc);
}
/**
* get a query result from solr
* to get all results set the query String to "*:*"

@ -26,6 +26,7 @@ package net.yacy.search.index;
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.util.ArrayList;
@ -36,6 +37,7 @@ import java.util.Map;
import java.util.Properties;
import java.util.Set;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.HeaderFramework;
@ -491,6 +493,33 @@ public class SolrConfiguration extends ConfigurationSet {
return a;
}
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
public SolrDoc err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final SolrDoc solrdoc = new SolrDoc();
addSolr(solrdoc, SolrField.id, ASCII.String(digestURI.hash()));
addSolr(solrdoc, SolrField.sku, digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress();
if (address != null) addSolr(solrdoc, SolrField.ip_s, address.getHostAddress());
if (digestURI.getHost() != null) addSolr(solrdoc, SolrField.host_s, digestURI.getHost());
// path elements of link
final String path = digestURI.getPath();
if (path != null) {
final String[] paths = path.split("/");
if (paths.length > 0) addSolr(solrdoc, SolrField.paths_txt, paths);
}
addSolr(solrdoc, SolrField.failreason_t, failReason);
addSolr(solrdoc, SolrField.httpstatus_i, httpstatus);
return solrdoc;
}
/*
standard solr schema

Loading…
Cancel
Save