fixed bug in solr error document

pull/1/head
Michael Peter Christen 13 years ago
parent cb54c1737b
commit f150bc218b

@ -82,8 +82,8 @@ public class CrawlQueues {
this.log.logConfig("Starting Crawling Management"); this.log.logConfig("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727); this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME)); FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727); this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727); this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
} }
public void relocate(final File newQueuePath) { public void relocate(final File newQueuePath) {
@ -94,8 +94,8 @@ public class CrawlQueues {
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727); this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME)); FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727); this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727); this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
} }
public synchronized void close() { public synchronized void close() {

@ -37,6 +37,7 @@ import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.services.federated.solr.SolrConnector; import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.services.federated.solr.SolrDoc;
import net.yacy.cora.services.federated.solr.SolrShardingConnector; import net.yacy.cora.services.federated.solr.SolrShardingConnector;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
@ -49,6 +50,7 @@ import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.table.SplitTable; import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.table.Table; import net.yacy.kelondro.table.Table;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.index.SolrConfiguration;
import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.Request;
public class ZURL implements Iterable<ZURL.Entry> { public class ZURL implements Iterable<ZURL.Entry> {
@ -79,15 +81,18 @@ public class ZURL implements Iterable<ZURL.Entry> {
private Index urlIndex; private Index urlIndex;
private final Queue<byte[]> stack; private final Queue<byte[]> stack;
private final SolrConnector solrConnector; private final SolrConnector solrConnector;
private final SolrConfiguration solrConfiguration;
public ZURL( public ZURL(
final SolrConnector solrConnector, final SolrConnector solrConnector,
final SolrConfiguration solrConfiguration,
final File cachePath, final File cachePath,
final String tablename, final String tablename,
final boolean startWithEmptyFile, final boolean startWithEmptyFile,
final boolean useTailCache, final boolean useTailCache,
final boolean exceed134217727) { final boolean exceed134217727) {
this.solrConnector = solrConnector; this.solrConnector = solrConnector;
this.solrConfiguration = solrConfiguration;
// creates a new ZURL in a file // creates a new ZURL in a file
cachePath.mkdirs(); cachePath.mkdirs();
final File f = new File(cachePath, tablename); final File f = new File(cachePath, tablename);
@ -109,8 +114,10 @@ public class ZURL implements Iterable<ZURL.Entry> {
this.stack = new LinkedBlockingQueue<byte[]>(); this.stack = new LinkedBlockingQueue<byte[]>();
} }
public ZURL(final SolrShardingConnector solrConnector) { public ZURL(final SolrShardingConnector solrConnector,
final SolrConfiguration solrConfiguration) {
this.solrConnector = solrConnector; this.solrConnector = solrConnector;
this.solrConfiguration = solrConfiguration;
// creates a new ZUR in RAM // creates a new ZUR in RAM
this.urlIndex = new RowSet(rowdef); this.urlIndex = new RowSet(rowdef);
this.stack = new LinkedBlockingQueue<byte[]>(); this.stack = new LinkedBlockingQueue<byte[]>();
@ -156,7 +163,8 @@ public class ZURL implements Iterable<ZURL.Entry> {
if (this.solrConnector != null && (failCategory == FailCategory.TEMPORARY_NETWORK_FAILURE || failCategory == FailCategory.FINAL_ROBOTS_RULE)) { if (this.solrConnector != null && (failCategory == FailCategory.TEMPORARY_NETWORK_FAILURE || failCategory == FailCategory.FINAL_ROBOTS_RULE)) {
// send the error to solr // send the error to solr
try { try {
this.solrConnector.err(bentry.url(), failCategory.name() + " " + reason, httpcode); SolrDoc errorDoc = this.solrConfiguration.err(bentry.url(), failCategory.name() + " " + reason, httpcode);
this.solrConnector.add(errorDoc);
} catch (final IOException e) { } catch (final IOException e) {
Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true, false) + " to solr: " + e.getMessage()); Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true, false) + " to solr: " + e.getMessage());
} }

@ -28,8 +28,6 @@ import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
@ -74,16 +72,6 @@ public interface SolrConnector {
public void add(final SolrDoc solrdoc) throws IOException, SolrException; public void add(final SolrDoc solrdoc) throws IOException, SolrException;
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException; public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException;
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException;
/** /**
* get a query result from solr * get a query result from solr
* to get all results set the query String to "*:*" * to get all results set the query String to "*:*"

@ -5,8 +5,6 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
@ -105,11 +103,6 @@ public class SolrMultipleConnector implements SolrConnector {
} }
} }
@Override
public void err(DigestURI digestURI, String failReason, int httpstatus) throws IOException {
this.solr.err(digestURI, failReason, httpstatus);
}
@Override @Override
public SolrDocumentList get(String querystring, int offset, int count) throws IOException { public SolrDocumentList get(String querystring, int offset, int count) throws IOException {
return this.solr.get(querystring, offset, count); return this.solr.get(querystring, offset, count);

@ -28,8 +28,6 @@ import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
@ -128,21 +126,6 @@ public class SolrRetryConnector implements SolrConnector {
for (SolrDoc d: solrdocs) add(d); for (SolrDoc d: solrdocs) add(d);
} }
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
this.solrConnector.err(digestURI, failReason, httpstatus);
return;
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
continue;
}
if (ee != null) throw (ee instanceof IOException) ? (IOException) ee : new IOException(ee.getMessage());
}
@Override @Override
public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException { public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime; final long t = System.currentTimeMillis() + this.retryMaxTime;

@ -31,7 +31,6 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
@ -128,19 +127,6 @@ public class SolrShardingConnector implements SolrConnector {
for (final SolrDoc doc: docs) add(doc); for (final SolrDoc doc: docs) add(doc);
} }
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
this.connectors.get(this.sharding.selectURL(digestURI.toNormalform(true, false))).err(digestURI, failReason, httpstatus);
}
/** /**
* get a query result from solr * get a query result from solr
* to get all results set the query String to "*:*" * to get all results set the query String to "*:*"

@ -31,10 +31,8 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
@ -218,27 +216,6 @@ public class SolrSingleConnector implements SolrConnector {
} }
} }
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final SolrDoc solrdoc = new SolrDoc();
solrdoc.addField("id", ASCII.String(digestURI.hash()));
solrdoc.addField("sku", digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress();
if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());
// path elements of link
final String path = digestURI.getPath();
if (path != null) {
final String[] paths = path.split("/");
if (paths.length > 0) solrdoc.addField("attr_paths", paths);
}
solrdoc.addField("failreason_t", failReason);
solrdoc.addField("httpstatus_i", httpstatus);
add(solrdoc);
}
/** /**
* get a query result from solr * get a query result from solr
* to get all results set the query String to "*:*" * to get all results set the query String to "*:*"

@ -26,6 +26,7 @@ package net.yacy.search.index;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.net.InetAddress; import java.net.InetAddress;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.ArrayList; import java.util.ArrayList;
@ -36,6 +37,7 @@ import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
@ -491,6 +493,33 @@ public class SolrConfiguration extends ConfigurationSet {
return a; return a;
} }
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
public SolrDoc err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final SolrDoc solrdoc = new SolrDoc();
addSolr(solrdoc, SolrField.id, ASCII.String(digestURI.hash()));
addSolr(solrdoc, SolrField.sku, digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress();
if (address != null) addSolr(solrdoc, SolrField.ip_s, address.getHostAddress());
if (digestURI.getHost() != null) addSolr(solrdoc, SolrField.host_s, digestURI.getHost());
// path elements of link
final String path = digestURI.getPath();
if (path != null) {
final String[] paths = path.split("/");
if (paths.length > 0) addSolr(solrdoc, SolrField.paths_txt, paths);
}
addSolr(solrdoc, SolrField.failreason_t, failReason);
addSolr(solrdoc, SolrField.httpstatus_i, httpstatus);
return solrdoc;
}
/* /*
standard solr schema standard solr schema

Loading…
Cancel
Save