From 96f6a5869f15dce8d901bb49dd5cce184d0cb4a6 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sat, 16 Jun 2012 22:30:31 +0200 Subject: [PATCH] more robust OAI-PMH client (large time-out, three re-tries). OAI-PMH server appeart to be very slow sometimes --- defaults/yacy.init | 2 +- .../anomic/crawler/retrieval/HTTPLoader.java | 2 +- .../yacy/document/importer/OAIPMHLoader.java | 29 ++++++++++--------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index e8f2b78c5..d94671b43 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -691,7 +691,7 @@ crawlPause.localsearch=50 crawlPause.remotesearch=10 # Some configuration values for the crawler -crawler.clientTimeout=9000 +crawler.clientTimeout=30000 # http crawler specific settings; size in bytes crawler.http.accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java index 2a349a601..6d8726fbe 100644 --- a/source/de/anomic/crawler/retrieval/HTTPLoader.java +++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java @@ -67,7 +67,7 @@ public final class HTTPLoader { this.log = theLog; // refreshing timeout value - this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 10000); + this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 30000); } public Response load(final Request entry, final int maxFileSize, final boolean checkBlacklist) throws IOException { diff --git a/source/net/yacy/document/importer/OAIPMHLoader.java b/source/net/yacy/document/importer/OAIPMHLoader.java index b1069cf14..2f90009c4 100644 --- a/source/net/yacy/document/importer/OAIPMHLoader.java +++ b/source/net/yacy/document/importer/OAIPMHLoader.java @@ -27,6 +27,7 @@ import java.io.IOException; import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.LoaderDispatcher; import de.anomic.crawler.retrieval.Response; @@ -47,7 +48,20 @@ public class OAIPMHLoader { this.source = source; // load the file from the net - final Response response = loader.load(loader.request(source, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, true); + Response response = null; + IOException ee = null; + for (int i = 0; i < 3; i++) { + // make some retries if first attempt fails + try { + response = loader.load(loader.request(source, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, true); + break; + } catch (IOException e) { + Log.logWarning("OAIPMHLoader", "loading failed at attempt " + (i + 1) + ": " + source.toNormalform(true, false)); + ee = e; + continue; + } + } + if (response == null) throw ee; final byte[] b = response.getContent(); this.resumptionToken = new ResumptionToken(source, b); //System.out.println("*** ResumptionToken = " + this.resumptionToken.toString()); @@ -57,19 +71,6 @@ public class OAIPMHLoader { // transaction-safe writing FileUtils.copy(b, f0); f0.renameTo(f1); - - /* - SurrogateReader sr = new SurrogateReader(new ByteArrayInputStream(b), 100); - Thread srt = new Thread(sr); - srt.start(); - DCEntry dce; - while ((dce = sr.take()) != DCEntry.poison) { - System.out.println(dce.toString()); - } - try { - srt.join(); - } catch (InterruptedException e) {} - */ } public ResumptionToken getResumptionToken() {