diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 2044f1fba..19b802708 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -345,6 +345,20 @@ public final class Switchboard extends serverSwitch { // set the default segment names setDefaultSegments(); + // load domainList + try { + this.domainList = null; + if(!getConfig("network.unit.domainlist", "").equals("")) { + Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt")); + this.domainList = new FilterEngine(); + this.domainList.loadList(new BufferedReader(r), null); + } + } catch (FileNotFoundException e) { + log.logSevere("CONFIG: domainlist not found: " + e.getMessage()); + } catch (IOException e) { + log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage()); + } + // create a crawler crawler = new CrawlSwitchboard( networkName, @@ -824,15 +838,7 @@ public final class Switchboard extends serverSwitch { } */ MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig("network.unit.domain", "global")); - - try { - this.domainList = null; - Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath()); - this.domainList = new FilterEngine(); - this.domainList.loadList(new BufferedReader(r), null); - } catch (FileNotFoundException e) { - } catch (IOException e) { - } + } public void switchNetwork(final String networkDefinition) { @@ -917,13 +923,18 @@ public final class Switchboard extends serverSwitch { this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map")); + // load domainList try { this.domainList = null; - Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainList", ""), getAppPath().getAbsolutePath()); - this.domainList = new FilterEngine(); - this.domainList.loadList(new BufferedReader(r), null); + if(!getConfig("network.unit.domainlist", "").equals("")) { + Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt")); + this.domainList = new FilterEngine(); + this.domainList.loadList(new BufferedReader(r), null); + } } catch (FileNotFoundException e) { + log.logSevere("CONFIG: domainlist not found: " + e.getMessage()); } catch (IOException e) { + log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage()); } this.crawlStacker = new CrawlStacker( diff --git a/source/de/anomic/server/serverSwitch.java b/source/de/anomic/server/serverSwitch.java index d7c6f3fa0..812f624c3 100644 --- a/source/de/anomic/server/serverSwitch.java +++ b/source/de/anomic/server/serverSwitch.java @@ -22,10 +22,10 @@ package de.anomic.server; import java.io.BufferedInputStream; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; @@ -43,13 +43,11 @@ import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.http.HTTPClient; -import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.workflow.BusyThread; import net.yacy.kelondro.workflow.WorkflowThread; -import de.anomic.search.Switchboard; import de.anomic.server.serverAccessTracker.Track; import de.anomic.server.serverCore.Session; @@ -573,8 +571,9 @@ public class serverSwitch { * file may be an url or a filename with path relative to rootPath parameter * @param file url or filename * @param rootPath searchpath for file + * @param file file to use when remote fetching fails (null if unused) */ - public Reader getConfigFileFromWebOrLocally(String uri, String rootPath) throws IOException, FileNotFoundException { + public Reader getConfigFileFromWebOrLocally(String uri, String rootPath, File file) throws IOException, FileNotFoundException { if(uri.startsWith("http://") || uri.startsWith("https://")) { String[] uris = uri.split(","); for (String netdef: uris) { @@ -586,12 +585,22 @@ public class serverSwitch { client.setHeader(reqHeader.entrySet()); byte[] data = client.GETbytes(uri); if (data == null || data.length == 0) continue; + // save locally in case next fetch fails + if (file != null) { + FileOutputStream f = new FileOutputStream(file); + f.write(data); + f.close(); + } return new InputStreamReader(new BufferedInputStream(new ByteArrayInputStream(data))); } catch (final Exception e) { continue; } } - throw new FileNotFoundException(); + if (file != null && file.exists()) { + return new FileReader(file); + } else { + throw new FileNotFoundException(); + } } else { final File f = (uri.length() > 0 && uri.charAt(0) == '/') ? new File(uri) : new File(rootPath, uri); if (f.exists()) { diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java index 03e25754c..ec38df626 100644 --- a/source/net/yacy/cora/protocol/http/HTTPClient.java +++ b/source/net/yacy/cora/protocol/http/HTTPClient.java @@ -447,7 +447,7 @@ public class HTTPClient { byte[] content = null; try { execute(httpUriRequest); - if (httpResponse == null) return null; + if (httpResponse == null || httpResponse.getStatusLine().getStatusCode() != 200) return null; // get the response body final HttpEntity httpEntity = httpResponse.getEntity(); if (httpEntity != null) {