From 02d9af1a707e55163a80c58224b0ac44833a5080 Mon Sep 17 00:00:00 2001 From: theli Date: Sat, 22 Oct 2005 13:28:04 +0000 Subject: [PATCH] *) Restructuring and extending of Remote Proxy Support - remote proxy configuration can now be "really" changed on the fly and takes effect immediately - adding possibility to disable remote proxy usage for yacy->yacy communication - adding possibility to disable remote proxy usage for ssl - restructuring proxy configuration so that it is stored in a single place now *) Adding possibility to import a foreign word DB (or even more of them in parallel) at runtime into the peers DB - this can be done by calling IndexImport_p.html - ATTENTION: please not that at the moment this thread must be aborted via gui before a normal server shutdown is done. - TODO: integrating IndexImport Thread into normal server shutdown - TODO: Adding posibility to import crawl-queues, etc. from foreign peers - TODO: removing old import function from yacy.java and calling the new routines instead git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@968 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexImport_p.html | 105 +++++ htroot/IndexImport_p.java | 196 +++++++++ htroot/Language_p.java | 2 +- htroot/Network.java | 4 +- htroot/SettingsAck_p.java | 64 ++- htroot/Settings_p.html | 53 ++- htroot/Settings_p.java | 9 +- htroot/Skins_p.java | 2 +- htroot/Status.java | 1 + htroot/Status_p.inc | 4 +- htroot/sharedBlacklist_p.java | 4 +- source/de/anomic/data/robotsParser.java | 4 +- .../de/anomic/http/httpRemoteProxyConfig.java | 181 +++++++++ source/de/anomic/http/httpc.java | 192 +++++++-- source/de/anomic/http/httpdProxyHandler.java | 91 +++-- source/de/anomic/net/natLib.java | 8 +- .../de/anomic/plasma/plasmaCrawlLoader.java | 54 +-- .../de/anomic/plasma/plasmaCrawlWorker.java | 120 +++--- source/de/anomic/plasma/plasmaDbImporter.java | 255 ++++++++++++ .../de/anomic/plasma/plasmaSnippetCache.java | 25 +- .../de/anomic/plasma/plasmaSwitchboard.java | 79 ++-- source/de/anomic/tools/loaderThreads.java | 26 +- source/de/anomic/yacy/yacyClient.java | 381 +++++++++++++----- source/de/anomic/yacy/yacyPeerActions.java | 6 +- source/de/anomic/yacy/yacySeedDB.java | 9 +- source/yacy.java | 2 +- yacy.init | 9 +- 27 files changed, 1535 insertions(+), 351 deletions(-) create mode 100644 htroot/IndexImport_p.html create mode 100644 htroot/IndexImport_p.java create mode 100644 source/de/anomic/http/httpRemoteProxyConfig.java create mode 100644 source/de/anomic/plasma/plasmaDbImporter.java diff --git a/htroot/IndexImport_p.html b/htroot/IndexImport_p.html new file mode 100644 index 000000000..7889a6fef --- /dev/null +++ b/htroot/IndexImport_p.html @@ -0,0 +1,105 @@ + + + +YaCy '#[clientname]#': Index Import +#[metas]# + + + +#[header]# +

+

Index DB Import

+ +

The local index currenly consists of (at least) #[wcount]# reverse word indexes and #[ucount]# URL references

+
+#(error)# + :: +

#[error_msg]#

+ :: +

Import Job with the same path already started

+#(/error)# +

Starting new Job

+
+ + + + + + +
Import Path:
+
+ +
+
+

Currently running jobs

+

+ + + + + + + + + + + + + +#{running.jobs}# + + + + + + + + + + + + +#{/running.jobs}# +
PathStatus%Elapsed
Time
Estimated
Time
Word Hash# URLs# Word
Entities
# Word
Entries
Stop Import
#[path]##(stopped)#Finished::Running#(/stopped)##[percent]##[elapsed]##[estimated]##[wordHash]##[url_num]##[word_entity_num]##[word_entry_num]# + #(stopped)#:: + + + #(/stopped)# +
+

+ +
+
+

Finished jobs

+

+ + + + + + + + + + + +#{finished.jobs}# + + + + + + + + + + +#{/finished.jobs}# +
PathStatus%Elapsed
Time
Word Hash# URLs# Word
Entities
# Word
Entries
#[path]##(stopped)#Finished::Error: #[errorMsg]##(/stopped)##[percent]##[elapsed]##[wordHash]##[url_num]##[word_entity_num]##[word_entry_num]#
+ +

+

Last Refresh: #[date]#

+ +#[footer]# + + diff --git a/htroot/IndexImport_p.java b/htroot/IndexImport_p.java new file mode 100644 index 000000000..8f182b65c --- /dev/null +++ b/htroot/IndexImport_p.java @@ -0,0 +1,196 @@ +//IndexTransfer_p.java +//----------------------- +//part of the AnomicHTTPD caching proxy +//(C) by Michael Peter Christen; mc@anomic.de +//first published on http://www.anomic.de +//Frankfurt, Germany, 2005 +// +//This file is contributed by Martin Thelian +// +// $LastChangedDate: 2005-10-17 17:46:12 +0200 (Mo, 17 Okt 2005) $ +// $LastChangedRevision: 947 $ +// $LastChangedBy: borg-0300 $ +// +//This program is free software; you can redistribute it and/or modify +//it under the terms of the GNU General Public License as published by +//the Free Software Foundation; either version 2 of the License, or +//(at your option) any later version. +// +//This program is distributed in the hope that it will be useful, +//but WITHOUT ANY WARRANTY; without even the implied warranty of +//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//GNU General Public License for more details. +// +//You should have received a copy of the GNU General Public License +//along with this program; if not, write to the Free Software +//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//Using this software in any meaning (reading, learning, copying, compiling, +//running) means that you agree that the Author(s) is (are) not responsible +//for cost, loss of data or any harm that may be caused directly or indirectly +//by usage of this softare or this documentation. The usage of this software +//is on your own risk. The installation and usage (starting/running) of this +//software may allow other people or application to access your computer and +//any attached devices and is highly dependent on the configuration of the +//software which must be done by the user of the software; the author(s) is +//(are) also not responsible for proper configuration and usage of the +//software, even if provoked by documentation provided together with +//the software. +// +//Any changes to this file according to the GPL as documented in the file +//gpl.txt aside this file in the shipment you received can be done to the +//lines that follows this copyright notice here, but changes must not be +//done inside the copyright notive above. A re-distribution must contain +//the intact and unchanged copyright notice. +//Contributions and changes to the program code must be marked as such. + +//You must compile this file with +//javac -classpath .:../Classes IndexControl_p.java +//if the shell's current path is HTROOT + +import java.io.File; +import java.util.Date; +import java.util.Vector; + +import de.anomic.http.httpHeader; +import de.anomic.plasma.plasmaDbImporter; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.server.serverDate; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; + +public final class IndexImport_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + int activeCount = 0; + + if (post != null) { + if (post.containsKey("startIndexDbImport")) { + try { + // getting the import path + String importPath = (String) post.get("importPath"); + boolean startImport = true; + + // check if there is an already running thread with the same import path + Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2]; + activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads); + + for (int i=0; i < activeCount; i++) { + plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i]; + if (currThread.getImportRoot().equals(new File(importPath))) { + prop.put("error",2); + startImport = false; + } + } + + if (startImport) { + plasmaDbImporter newImporter = new plasmaDbImporter(switchboard.wordIndex,switchboard.urlPool.loadedURL,importPath); + newImporter.start(); + + prop.put("LOCATION",""); + return prop; + } + } catch (Exception e) { + prop.put("error",1); + prop.put("error_error_msg",e.toString()); + } + } else if (post.containsKey("clearFinishedJobList")) { + plasmaDbImporter.finishedJobs.clear(); + prop.put("LOCATION",""); + return prop; + } else if (post.containsKey("stopIndexDbImport")) { + // getting the job nr of the thread that should be stopped + String jobNr = (String) post.get("jobNr"); + + Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2]; + activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads); + + for (int i=0; i < activeCount; i++) { + plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i]; + if (currThread.getJobNr() == Integer.valueOf(jobNr).intValue()) { + currThread.stoppIt(); + try { + currThread.join(); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + break; + } + } + prop.put("LOCATION",""); + return prop; + } + } + + prop.put("wcount", Integer.toString(switchboard.wordIndex.size())); + prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size())); + + /* + * Loop over all currently running jobs + */ + Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2]; + activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads); + + for (int i=0; i < activeCount; i++) { + plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i]; + + File importPath = currThread.getImportRoot(); + String currWordHash = currThread.getCurrentWordhash(); + long currWordEntryCount = currThread.getWordEntryCounter(); + long currWordEntityCounter = currThread.getWordEntityCounter(); + long currUrlCounter = currThread.getUrlCounter(); + long currImportDbSize = currThread.getImportWordDbSize(); + long estimatedTime = currThread.getEstimatedTime(); + long elapsedTime = currThread.getElapsedTime(); + int jobNr = currThread.getJobNr(); + int percent = currThread.getProcessingStatus(); + + boolean isRunning = currThread.isAlive(); + + prop.put("running.jobs_" + i + "_path", importPath.toString()); + prop.put("running.jobs_" + i + "_stopped", isRunning ? 1:0); + prop.put("running.jobs_" + i + "_percent", Integer.toString(percent)); + prop.put("running.jobs_" + i + "_elapsed", serverDate.intervalToString(elapsedTime)); + prop.put("running.jobs_" + i + "_estimated", serverDate.intervalToString(estimatedTime)); + prop.put("running.jobs_" + i + "_wordHash", currWordHash); + prop.put("running.jobs_" + i + "_url_num", Long.toString(currUrlCounter)); + prop.put("running.jobs_" + i + "_word_entity_num", Long.toString(currWordEntityCounter)); + prop.put("running.jobs_" + i + "_word_entry_num", Long.toString(currWordEntryCount)); + prop.put("running.jobs_" + i + "_stopped_job_nr", Integer.toString(jobNr)); + } + prop.put("running.jobs",activeCount); + + /* + * Loop over all finished jobs + */ + Vector finishedJobs = (Vector) plasmaDbImporter.finishedJobs.clone(); + for (int i=0; i
Remote Proxy (optional)

YaCy can use another proxy to connect to the internet. You can enter the address for the remote proxy here:

+ + + + + + + + + + + + + + + + + + + + + - + + - + + + + - - + + + + + + + + + + + + - - + + + + + - +
Use remote proxy: Enables the usage of the remote proxy by yacy
Use remote proxy for yacy <-> yacy communicationSpecifies if the remote proxy should be used for the communication of this peer to other yacy peers.
+ Hint: Enabling this option could cause this peer to remain in junior status.
Use remote proxy for httpsSpecifies if YaCy should forward ssl connections to the remote proxy.

Remote proxy host:The ip address or domain name of the remote proxy
Remote proxy port:the port of the remote proxy
no-proxy adresses:Remote proxy user: 
Remote proxy pwd: 

Use remote proxy:no-proxy adresses:IP addresses for which the remote proxy should not be used
 Changes will take effect immediately. Changes will take effect immediately.

diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java index f25516118..6811a927f 100644 --- a/htroot/Settings_p.java +++ b/htroot/Settings_p.java @@ -109,10 +109,17 @@ public final class Settings_p { } // remote proxy + prop.put("remoteProxyUseChecked", env.getConfig("remoteProxyUse", "false").equals("true") ? 1 : 0); + prop.put("remoteProxyUse4Yacy", env.getConfig("remoteProxyUse4Yacy", "true").equals("true") ? 1 : 0); + prop.put("remoteProxyUse4SSL", env.getConfig("remoteProxyUse4SSL", "true").equals("true") ? 1 : 0); + prop.put("remoteProxyHost", env.getConfig("remoteProxyHost", "")); prop.put("remoteProxyPort", env.getConfig("remoteProxyPort", "")); + + prop.put("remoteProxyUser", env.getConfig("remoteProxyUser", "")); + prop.put("remoteProxyPwd", env.getConfig("remoteProxyPwd", "")); + prop.put("remoteProxyNoProxy", env.getConfig("remoteProxyNoProxy", "")); - prop.put("remoteProxyUseChecked", ((String) env.getConfig("remoteProxyUse", "false")).equals("true") ? 1 : 0); // proxy access filter prop.put("proxyfilter", env.getConfig("proxyClient", "*")); diff --git a/htroot/Skins_p.java b/htroot/Skins_p.java index 6ae0a0e90..9b24ea565 100644 --- a/htroot/Skins_p.java +++ b/htroot/Skins_p.java @@ -124,7 +124,7 @@ public class Skins_p { String url = (String)post.get("url"); ArrayList skinVector; try{ - skinVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); + skinVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyConfig); }catch(IOException e){ prop.put("status", 1);//unable to get url prop.put("status_url", url); diff --git a/htroot/Status.java b/htroot/Status.java index 0a73738d9..8dc3fe65f 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -132,6 +132,7 @@ public class Status { prop.put("remoteProxy", 1); prop.put("remoteProxy_host", env.getConfig("remoteProxyHost", "")); prop.put("remoteProxy_port", env.getConfig("remoteProxyPort", "")); + prop.put("remoteProxy_4Yacy", env.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true")?0:1); } else { prop.put("remoteProxy", 0); // not used } diff --git a/htroot/Status_p.inc b/htroot/Status_p.inc index d516f709e..fc57d1bf7 100644 --- a/htroot/Status_p.inc +++ b/htroot/Status_p.inc @@ -19,7 +19,7 @@   - Proxy host + Peer host #[host]#:#[port]#   @@ -30,7 +30,7 @@ Remote proxy - #(remoteProxy)#not used::#[host]#:#[port]##(/remoteProxy)# + #(remoteProxy)#not used::#[host]#:#[port]# | Used for YaCy -> YaCy communication: #(4Yacy)#Yes::No #(/4Yacy)# #(/remoteProxy)#   diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index 4c6f67659..89e1ddfdc 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -142,7 +142,7 @@ public class sharedBlacklist_p { //Make Adresse address = "http://" + IP + ":" + Port + "/yacy/list.html?col=black"; try { - otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); //get List + otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyConfig); //get List } catch (Exception e) {} //Make HTML-Optionlist with retrieved items @@ -173,7 +173,7 @@ public class sharedBlacklist_p { Name = address; try { - otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); //get List + otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyConfig); //get List } catch (Exception e) {} prop.put("status", 0); //TODO: check if the wget failed... diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java index 22a19e4a8..523f3d5a8 100644 --- a/source/de/anomic/data/robotsParser.java +++ b/source/de/anomic/data/robotsParser.java @@ -244,10 +244,10 @@ public final class robotsParser{ try { downloadStart = System.currentTimeMillis(); plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard(); - if (!sb.remoteProxyUse) { + if ((sb.remoteProxyConfig == null) || (!sb.remoteProxyConfig.useProxy())) { con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false); } else { - con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false, sb.remoteProxyHost, sb.remoteProxyPort); + con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false, sb.remoteProxyConfig); } // if we previously have downloaded this robots.txt then we can set the if-modified-since header diff --git a/source/de/anomic/http/httpRemoteProxyConfig.java b/source/de/anomic/http/httpRemoteProxyConfig.java new file mode 100644 index 000000000..b94689e7c --- /dev/null +++ b/source/de/anomic/http/httpRemoteProxyConfig.java @@ -0,0 +1,181 @@ +//httpRemoteProxyConfig.java +//----------------------- +//part of the AnomicHTTPD caching proxy +//(C) by Michael Peter Christen; mc@anomic.de +//first published on http://www.anomic.de +//Frankfurt, Germany, 2004 +// +//this file was contributed by Martin Thelian +//$LastChangedDate$ +//$LastChangedBy$ +//$LastChangedRevision$ +// +//This program is free software; you can redistribute it and/or modify +//it under the terms of the GNU General Public License as published by +//the Free Software Foundation; either version 2 of the License, or +//(at your option) any later version. +// +//This program is distributed in the hope that it will be useful, +//but WITHOUT ANY WARRANTY; without even the implied warranty of +//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//GNU General Public License for more details. +// +//You should have received a copy of the GNU General Public License +//along with this program; if not, write to the Free Software +//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//Using this software in any meaning (reading, learning, copying, compiling, +//running) means that you agree that the Author(s) is (are) not responsible +//for cost, loss of data or any harm that may be caused directly or indirectly +//by usage of this softare or this documentation. The usage of this software +//is on your own risk. The installation and usage (starting/running) of this +//software may allow other people or application to access your computer and +//any attached devices and is highly dependent on the configuration of the +//software which must be done by the user of the software; the author(s) is +//(are) also not responsible for proper configuration and usage of the +//software, even if provoked by documentation provided together with +//the software. +// +//Any changes to this file according to the GPL as documented in the file +//gpl.txt aside this file in the shipment you received can be done to the +//lines that follows this copyright notice here, but changes must not be +//done inside the copyright notive above. A re-distribution must contain +//the intact and unchanged copyright notice. +//Contributions and changes to the program code must be marked as such. + +//You must compile this file with +//javac -classpath .:../Classes Settings_p.java +//if the shell's current path is HTROOT + +package de.anomic.http; + +import java.util.HashSet; + +import de.anomic.plasma.plasmaSwitchboard; + +public final class httpRemoteProxyConfig { + + /* + * Remote Proxy configuration + */ + private boolean remoteProxyUse; + private boolean remoteProxyUse4Yacy; + private boolean remoteProxyUse4SSL; + + private String remoteProxyHost; + private int remoteProxyPort; + private String remoteProxyUser; + private String remoteProxyPwd; + + private String remoteProxyNoProxy = ""; + private String[] remoteProxyNoProxyPatterns = null; + + public final HashSet remoteProxyAllowProxySet = new HashSet(); + public final HashSet remoteProxyDisallowProxySet = new HashSet(); + + public boolean useProxy() { + return this.remoteProxyUse; + } + + public boolean useProxy4Yacy() { + return this.remoteProxyUse4Yacy; + } + + public boolean useProxy4SSL() { + return this.remoteProxyUse4SSL; + } + + public String getProxyHost() { + return this.remoteProxyHost; + } + + public int getProxyPort() { + return this.remoteProxyPort; + } + + public String getProxyUser() { + return this.remoteProxyUser; + } + + public String getProxyPwd() { + return this.remoteProxyPwd; + } + + public String getProxyNoProxy() { + return this.remoteProxyNoProxy; + } + + public String[] getProxyNoProxyPatterns() { + return this.remoteProxyNoProxyPatterns; + } + + public String toString() { + StringBuffer toStrBuf = new StringBuffer(); + + toStrBuf + .append("Status: ").append(this.remoteProxyUse?"ON":"OFF").append(" | ") + .append("Host: "); + if ((this.remoteProxyUser != null) && (this.remoteProxyUser.length() > 0)) { + toStrBuf.append(this.remoteProxyUser) + .append("@"); + } + toStrBuf + .append((this.remoteProxyHost==null)?"unknown":this.remoteProxyHost).append(":").append(this.remoteProxyPort).append(" | ") + .append("Usage: HTTP"); + if (this.remoteProxyUse4Yacy) toStrBuf.append(" YACY"); + if (this.remoteProxyUse4SSL) toStrBuf.append(" SSL"); + toStrBuf.append(" | ") + .append("No Proxy for: ") + .append(this.remoteProxyNoProxy); + + + return toStrBuf.toString(); + } + + public static httpRemoteProxyConfig init( + String proxyHostName, + int proxyHostPort + ) { + httpRemoteProxyConfig newConfig = new httpRemoteProxyConfig(); + + newConfig.remoteProxyUse = true; + newConfig.remoteProxyUse4SSL = true; + newConfig.remoteProxyUse4Yacy = true; + newConfig.remoteProxyHost = proxyHostName; + newConfig.remoteProxyPort = proxyHostPort; + + return newConfig; + } + + public static httpRemoteProxyConfig init(plasmaSwitchboard sb) { + httpRemoteProxyConfig newConfig = new httpRemoteProxyConfig(); + + // determining if remote proxy usage is enabled + newConfig.remoteProxyUse = sb.getConfig("remoteProxyUse", "false").equalsIgnoreCase("true"); + + // determining if remote proxy should be used for yacy -> yacy communication + newConfig.remoteProxyUse4Yacy = sb.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true"); + + // determining if remote proxy should be used for ssl connections + newConfig.remoteProxyUse4SSL = sb.getConfig("remoteProxyUse4SSL", "true").equalsIgnoreCase("true"); + + // reading the proxy host name + newConfig.remoteProxyHost = sb.getConfig("remoteProxyHost", "").trim(); + + // reading the proxy host port + try { + newConfig.remoteProxyPort = Integer.parseInt(sb.getConfig("remoteProxyPort", "3128")); + } catch (NumberFormatException e) { + newConfig.remoteProxyPort = 3128; + } + + newConfig.remoteProxyUser = sb.getConfig("remoteProxyUser", "").trim(); + newConfig.remoteProxyPwd = sb.getConfig("remoteProxyPwd", "").trim(); + + // determining addresses for which the remote proxy should not be used + newConfig.remoteProxyNoProxy = sb.getConfig("remoteProxyNoProxy","").trim(); + newConfig.remoteProxyNoProxyPatterns = newConfig.remoteProxyNoProxy.split(","); + + return newConfig; + } +} diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index 9e752c88d..4e71fe33e 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -134,6 +134,8 @@ public final class httpc { private boolean remoteProxyUse = false; private String savedRemoteHost = null; + private httpRemoteProxyConfig remoteProxyConfig = null; + String requestPath = null; private boolean allowContentEncoding = true; static boolean useYacyReferer = true; @@ -206,8 +208,7 @@ public final class httpc { int port, int timeout, boolean ssl, - String remoteProxyHost, - int remoteProxyPort + httpRemoteProxyConfig remoteProxyConfig ) throws IOException { httpc newHttpc; @@ -220,7 +221,13 @@ public final class httpc { // initialize it try { - newHttpc.init(server,port,timeout,ssl,remoteProxyHost, remoteProxyPort); + newHttpc.init( + server, + port, + timeout, + ssl, + remoteProxyConfig + ); } catch (IOException e) { try{ httpc.theHttpcPool.returnObject(newHttpc); } catch (Exception e1) {} throw e; @@ -386,16 +393,25 @@ public final class httpc { * @param remoteProxyPort * @throws IOException */ - void init(String server, int port, int timeout, boolean ssl, - String remoteProxyHost, int remoteProxyPort) throws IOException { + void init( + String server, + int port, + int timeout, + boolean ssl, + httpRemoteProxyConfig theRemoteProxyConfig) throws IOException { if (port == -1) { port = (ssl)? 443 : 80; } + String remoteProxyHost = theRemoteProxyConfig.getProxyHost(); + int remoteProxyPort = theRemoteProxyConfig.getProxyPort(); + this.init(remoteProxyHost, remoteProxyPort, timeout, ssl); + this.remoteProxyUse = true; this.savedRemoteHost = server + ((port == 80) ? "" : (":" + port)); + this.remoteProxyConfig = theRemoteProxyConfig; } /** @@ -491,6 +507,7 @@ public final class httpc { this.handle = 0; this.remoteProxyUse = false; + this.remoteProxyConfig = null; this.savedRemoteHost = null; this.requestPath = null; @@ -566,6 +583,14 @@ public final class httpc { else header.put(httpHeader.HOST, this.host); } + + if (this.remoteProxyUse) { + String remoteProxyUser = this.remoteProxyConfig.getProxyUser(); + String remoteProxyPwd = this.remoteProxyConfig.getProxyPwd(); + if ((remoteProxyUser!=null)&&(remoteProxyUser.length()>0)) { + header.put(httpHeader.PROXY_AUTHORIZATION,serverCodings.standardCoder.encodeBase64String(remoteProxyUser + ":" + remoteProxyPwd)); + } + } if (!(header.containsKey(httpHeader.CONNECTION))) { header.put(httpHeader.CONNECTION, "close"); @@ -702,6 +727,8 @@ public final class httpc { this.clientOutput.write(buffer, 0, c); len += c; } + + // TODO: we can not set the header here. This ist too late requestHeader.put(httpHeader.CONTENT_LENGTH, Integer.toString(len)); } this.clientOutput.flush(); @@ -806,7 +833,6 @@ public final class httpc { // finish with a boundary out.write(boundary.getBytes()); out.write(serverCore.crlf); - //buf.write("" + serverCore.crlfString); } // create body array out.close(); @@ -816,15 +842,17 @@ public final class httpc { //System.out.println("DEBUG: PUT BODY=" + new String(body)); if (zipContent) { requestHeader.put(httpHeader.CONTENT_ENCODING, "gzip"); + + //TODO: should we also set the content length here? } else { // size of that body requestHeader.put(httpHeader.CONTENT_LENGTH, Integer.toString(body.length)); } + // send the header - //System.out.println("header=" + requestHeader); send(httpHeader.METHOD_POST, path, requestHeader, false); + // send the body - //System.out.println("body=" + buf.toString()); serverCore.send(this.clientOutput, body); return new response(false); @@ -884,11 +912,20 @@ do upload ###### End OfList ###### */ - public static byte[] singleGET(String host, int port, String path, int timeout, - String user, String password, boolean ssl, - String proxyHost, int proxyPort, - httpHeader requestHeader) throws IOException { + public static byte[] singleGET( + String host, + int port, + String path, + int timeout, + String user, + String password, + boolean ssl, + httpRemoteProxyConfig theRemoteProxyConfig, + httpHeader requestHeader + ) throws IOException { if (requestHeader == null) requestHeader = new httpHeader(); + + // setting host authorization header if ((user != null) && (password != null) && (user.length() != 0)) { requestHeader.put(httpHeader.AUTHORIZATION, serverCodings.standardCoder.encodeBase64String(user + ":" + password)); } @@ -896,10 +933,10 @@ do upload httpc con = null; try { - if ((proxyHost == null) || (proxyPort == 0)) { + if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) { con = httpc.getInstance(host, port, timeout, ssl); } else { - con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort); + con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig); } httpc.response res = con.GET(path, null); @@ -915,16 +952,20 @@ do upload } - public static byte[] singleGET(URL u, int timeout, - String user, String password, - String proxyHost, int proxyPort) throws IOException { + public static byte[] singleGET( + URL u, + int timeout, + String user, + String password, + httpRemoteProxyConfig theRemoteProxyConfig + ) throws IOException { int port = u.getPort(); boolean ssl = u.getProtocol().equals("https"); if (port < 0) port = (ssl) ? 443: 80; String path = u.getPath(); String query = u.getQuery(); if ((query != null) && (query.length() > 0)) path = path + "?" + query; - return singleGET(u.getHost(), port, path, timeout, user, password, ssl, proxyHost, proxyPort, null); + return singleGET(u.getHost(), port, path, timeout, user, password, ssl, theRemoteProxyConfig, null); } /* @@ -937,10 +978,18 @@ do upload } */ - public static byte[] singlePOST(String host, int port, String path, int timeout, - String user, String password, boolean ssl, - String proxyHost, int proxyPort, - httpHeader requestHeader, serverObjects props) throws IOException { + public static byte[] singlePOST( + String host, + int port, + String path, + int timeout, + String user, + String password, + boolean ssl, + httpRemoteProxyConfig theRemoteProxyConfig, + httpHeader requestHeader, + serverObjects props + ) throws IOException { if (requestHeader == null) requestHeader = new httpHeader(); if ((user != null) && (password != null) && (user.length() != 0)) { @@ -949,10 +998,11 @@ do upload httpc con = null; try { - if ((proxyHost == null) || (proxyPort == 0)) + if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) { con = httpc.getInstance(host, port, timeout, ssl); - else - con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort); + } else { + con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig); + } httpc.response res = con.POST(path, requestHeader, props, null); //System.out.println("response=" + res.toString()); @@ -968,30 +1018,69 @@ do upload } - public static byte[] singlePOST(URL u, int timeout, - String user, String password, - String proxyHost, int proxyPort, - serverObjects props) throws IOException { + public static byte[] singlePOST( + URL u, + int timeout, + String user, + String password, + httpRemoteProxyConfig theRemoteProxyConfig, + serverObjects props + ) throws IOException { int port = u.getPort(); boolean ssl = u.getProtocol().equals("https"); if (port < 0) port = (ssl) ? 443 : 80; String path = u.getPath(); String query = u.getQuery(); if ((query != null) && (query.length() > 0)) path = path + "?" + query; - return singlePOST(u.getHost(), port, path, timeout, user, password, ssl, proxyHost, proxyPort, null, props); + return singlePOST( + u.getHost(), + port, + path, + timeout, + user, + password, + ssl, + theRemoteProxyConfig, + null, + props + ); } - public static byte[] singlePOST(String url, int timeout, serverObjects props) throws IOException { + public static byte[] singlePOST( + String url, + int timeout, + serverObjects props + ) throws IOException { try { - return singlePOST(new URL(url), timeout, null, null, null, 0, props); + return singlePOST( + new URL(url), + timeout, + null, + null, + null, + props + ); } catch (MalformedURLException e) { throw new IOException("Malformed URL: " + e.getMessage()); } } - public static ArrayList wget(URL url, int timeout, String user, String password, String proxyHost, int proxyPort) throws IOException { + public static ArrayList wget( + URL url, + int timeout, + String user, + String password, + httpRemoteProxyConfig theRemoteProxyConfig + ) throws IOException { // splitting of the byte array into lines - byte[] a = singleGET(url, timeout, user, password, proxyHost, proxyPort); + byte[] a = singleGET( + url, + timeout, + user, + password, + theRemoteProxyConfig + ); + if (a == null) return null; int s = 0; int e; @@ -1004,7 +1093,13 @@ do upload return v; } - public static httpHeader whead(URL url, int timeout, String user, String password, String proxyHost, int proxyPort) throws IOException { + public static httpHeader whead( + URL url, + int timeout, + String user, + String password, + httpRemoteProxyConfig theRemoteProxyConfig + ) throws IOException { // generate request header httpHeader requestHeader = new httpHeader(); if ((user != null) && (password != null) && (user.length() != 0)) { @@ -1023,9 +1118,9 @@ do upload // start connection httpc con = null; try { - if ((proxyHost == null) || (proxyPort == 0)) + if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) con = httpc.getInstance(host, port, timeout, ssl); - else con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort); + else con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig); httpc.response res = con.HEAD(path, requestHeader); if (res.status.startsWith("2")) { @@ -1053,9 +1148,24 @@ do upload } */ - public static ArrayList wput(URL url, int timeout, String user, String password, String proxyHost, int proxyPort, serverObjects props) throws IOException { + public static ArrayList wput( + URL url, + int timeout, + String user, + String password, + httpRemoteProxyConfig theRemoteProxyConfig, + serverObjects props + ) throws IOException { // splitting of the byte array into lines - byte[] a = singlePOST(url, timeout, user, password, proxyHost, proxyPort, props); + byte[] a = singlePOST( + url, + timeout, + user, + password, + theRemoteProxyConfig, + props + ); + //System.out.println("wput-out=" + new String(a)); int s = 0; int e; @@ -1090,8 +1200,10 @@ do upload int timeout = Integer.parseInt(args[1]); String proxyHost = args[2]; int proxyPort = Integer.parseInt(args[3]); + + httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort); try { - text = wget(new URL(url), timeout, null, null, proxyHost, proxyPort); + text = wget(new URL(url), timeout, null, null, theRemoteProxyConfig); } catch (MalformedURLException e) { System.out.println("The url '" + url + "' is wrong."); } catch (IOException e) { diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index e5b753332..649ea6f0c 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -105,14 +105,14 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt private static int timeout = 30000; private static boolean yacyTrigger = true; public static boolean isTransparentProxy = false; - public static boolean remoteProxyUse = false; - public static String remoteProxyHost = ""; - public static int remoteProxyPort = -1; - public static String remoteProxyNoProxy = ""; - public static String[] remoteProxyNoProxyPatterns = null; +// public static boolean remoteProxyUse = false; +// public static String remoteProxyHost = ""; +// public static int remoteProxyPort = -1; +// public static String remoteProxyNoProxy = ""; +// public static String[] remoteProxyNoProxyPatterns = null; - private static final HashSet remoteProxyAllowProxySet = new HashSet(); - private static final HashSet remoteProxyDisallowProxySet = new HashSet(); +// private static final HashSet remoteProxyAllowProxySet = new HashSet(); +// private static final HashSet remoteProxyDisallowProxySet = new HashSet(); private static htmlFilterTransformer transformer = null; public static final String userAgent = "yacy (" + httpc.systemOST +") yacy.net"; @@ -203,16 +203,16 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt // doing httpc init httpc.useYacyReferer = sb.getConfig("useYacyReferer", "true").equals("true"); - // load remote proxy data - remoteProxyHost = switchboard.getConfig("remoteProxyHost",""); - try { - remoteProxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128")); - } catch (NumberFormatException e) { - remoteProxyPort = 3128; - } - remoteProxyUse = switchboard.getConfig("remoteProxyUse","false").equals("true"); - remoteProxyNoProxy = switchboard.getConfig("remoteProxyNoProxy",""); - remoteProxyNoProxyPatterns = remoteProxyNoProxy.split(","); +// // load remote proxy data +// remoteProxyHost = switchboard.getConfig("remoteProxyHost",""); +// try { +// remoteProxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128")); +// } catch (NumberFormatException e) { +// remoteProxyPort = 3128; +// } +// remoteProxyUse = switchboard.getConfig("remoteProxyUse","false").equals("true"); +// remoteProxyNoProxy = switchboard.getConfig("remoteProxyNoProxy",""); +// remoteProxyNoProxyPatterns = remoteProxyNoProxy.split(","); // set timeout timeout = Integer.parseInt(switchboard.getConfig("clientTimeout", "10000")); @@ -1020,7 +1020,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST); String httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); - int timeout = Integer.parseInt(switchboard.getConfig("clientTimeout", "10000")); int port, pos; if ((pos = host.indexOf(":")) < 0) { @@ -1043,16 +1042,23 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt } // possibly branch into PROXY-PROXY connection - if (remoteProxyUse) { + if ((switchboard.remoteProxyConfig != null) && (switchboard.remoteProxyConfig.useProxy4SSL())) { httpc remoteProxy = null; try { - remoteProxy = httpc.getInstance(host, port, timeout, false, remoteProxyHost, remoteProxyPort); + remoteProxy = httpc.getInstance( + host, + port, + timeout, + false, + switchboard.remoteProxyConfig + ); + httpc.response response = remoteProxy.CONNECT(host, port, requestHeader); response.print(); if (response.success()) { // replace connection details - host = remoteProxyHost; - port = remoteProxyPort; + host = switchboard.remoteProxyConfig.getProxyHost(); + port = switchboard.remoteProxyConfig.getProxyPort(); // go on (see below) } else { // pass error response back to client @@ -1147,37 +1153,56 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt } private httpc newhttpc(String server, int port, int timeout) throws IOException { + + // getting the remote proxy configuration + httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig; + // a new httpc connection, combined with possible remote proxy - boolean useProxy = remoteProxyUse; + boolean useProxy = (remProxyConfig!=null)&&(remProxyConfig.useProxy()); + // check no-proxy rule - if ((useProxy) && (!(remoteProxyAllowProxySet.contains(server)))) { - if (remoteProxyDisallowProxySet.contains(server)) { + if ( + (switchboard.remoteProxyConfig != null) && + (switchboard.remoteProxyConfig.useProxy()) && + (!(switchboard.remoteProxyConfig.remoteProxyAllowProxySet.contains(server)))) { + if (switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.contains(server)) { useProxy = false; } else { // analyse remoteProxyNoProxy; // set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly int i = 0; - while (i < remoteProxyNoProxyPatterns.length) { - if (server.matches(remoteProxyNoProxyPatterns[i])) { + while (i < remProxyConfig.getProxyNoProxyPatterns().length) { + if (server.matches(remProxyConfig.getProxyNoProxyPatterns()[i])) { // disallow proxy for this server - remoteProxyDisallowProxySet.add(server); + switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.add(server); useProxy = false; break; } i++; } - if (i == remoteProxyNoProxyPatterns.length) { + if (i == remProxyConfig.getProxyNoProxyPatterns().length) { // no pattern matches: allow server - remoteProxyAllowProxySet.add(server); + switchboard.remoteProxyConfig.remoteProxyAllowProxySet.add(server); } } } + // branch to server/proxy if (useProxy) { - return httpc.getInstance(server, port, timeout, false, remoteProxyHost, remoteProxyPort); - } else { - return httpc.getInstance(server, port, timeout, false); + return httpc.getInstance( + server, + port, + timeout, + false, + remProxyConfig + ); } + return httpc.getInstance( + server, + port, + timeout, + false + ); } private httpc newhttpc(String address, int timeout) throws IOException { diff --git a/source/de/anomic/net/natLib.java b/source/de/anomic/net/natLib.java index 1251fc551..82e0ebfb0 100644 --- a/source/de/anomic/net/natLib.java +++ b/source/de/anomic/net/natLib.java @@ -60,7 +60,7 @@ public class natLib { rm status.htm */ try { - ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), 5000, "admin", password, null, 0); + ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), 5000, "admin", password, null); x = nxTools.grep(x, 1, "IP Address"); if ((x == null) || (x.size() == 0)) return null; String line = nxTools.tail1(x); @@ -72,7 +72,7 @@ public class natLib { private static String getWhatIsMyIP() { try { - ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), 5000, null, null, null, 0); + ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), 5000, null, null, null); x = nxTools.grep(x, 0, "Your IP is"); String line = nxTools.tail1(x); return nxTools.awk(line, " ", 4); @@ -83,7 +83,7 @@ public class natLib { private static String getStanford() { try { - ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), 5000, null, null, null, 0); + ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), 5000, null, null, null); x = nxTools.grep(x, 0, "firewall protecting your browser"); String line = nxTools.tail1(x); return nxTools.awk(line, " ", 7); @@ -94,7 +94,7 @@ public class natLib { private static String getIPID() { try { - ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), 5000, null, null, null, 0); + ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), 5000, null, null, null); x = nxTools.grep(x, 2, "Your IP address"); String line = nxTools.tail1(x); return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1); diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java index e73ce9606..fe112c527 100644 --- a/source/de/anomic/plasma/plasmaCrawlLoader.java +++ b/source/de/anomic/plasma/plasmaCrawlLoader.java @@ -56,7 +56,6 @@ public final class plasmaCrawlLoader extends Thread { static plasmaSwitchboard switchboard; private final plasmaHTCache cacheManager; - private final int socketTimeout; private final serverLog log; private final CrawlerMessageQueue theQueue; @@ -66,15 +65,13 @@ public final class plasmaCrawlLoader extends Thread { private boolean stopped = false; public plasmaCrawlLoader( - plasmaHTCache cacheManager, - serverLog log) { + plasmaHTCache theCacheManager, + serverLog theLog) { this.setName("plasmaCrawlLoader"); - this.cacheManager = cacheManager; - this.log = log; - - this.socketTimeout = Integer.parseInt(switchboard.getConfig("crawler.clientTimeout", "10000")); + this.cacheManager = theCacheManager; + this.log = theLog; // configuring the crawler messagequeue this.theQueue = new CrawlerMessageQueue(); @@ -103,11 +100,8 @@ public final class plasmaCrawlLoader extends Thread { CrawlerFactory theFactory = new CrawlerFactory( this.theThreadGroup, + switchboard, cacheManager, - socketTimeout, - switchboard.getConfig("remoteProxyUse","false").equals("true"), - switchboard.getConfig("remoteProxyHost",""), - Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128")), log); this.crawlwerPool = new CrawlerPool(theFactory,this.cralwerPoolConfig,this.theThreadGroup); @@ -363,37 +357,28 @@ final class CrawlerFactory implements org.apache.commons.pool.PoolableObjectFact private CrawlerPool thePool; private final ThreadGroup theThreadGroup; private final plasmaHTCache cacheManager; - private final int socketTimeout; - private final boolean remoteProxyUse; - private final String remoteProxyHost; - private final int remoteProxyPort; private final serverLog theLog; + private final plasmaSwitchboard sb; public CrawlerFactory( - ThreadGroup theThreadGroup, - plasmaHTCache cacheManager, - int socketTimeout, - boolean remoteProxyUse, - String remoteProxyHost, - int remoteProxyPort, - serverLog theLog) { + ThreadGroup threadGroup, + plasmaSwitchboard theSb, + plasmaHTCache theCacheManager, + serverLog log) { super(); - if (theThreadGroup == null) + if (threadGroup == null) throw new IllegalArgumentException("The threadgroup object must not be null."); - this.theThreadGroup = theThreadGroup; - this.cacheManager = cacheManager; - this.socketTimeout = socketTimeout; - this.remoteProxyUse = remoteProxyUse; - this.remoteProxyHost = remoteProxyHost; - this.remoteProxyPort = remoteProxyPort; - this.theLog = theLog; + this.theThreadGroup = threadGroup; + this.cacheManager = theCacheManager; + this.sb = theSb; + this.theLog = log; } - public void setPool(CrawlerPool thePool) { - this.thePool = thePool; + public void setPool(CrawlerPool pool) { + this.thePool = pool; } /** @@ -403,11 +388,8 @@ final class CrawlerFactory implements org.apache.commons.pool.PoolableObjectFact return new plasmaCrawlWorker( this.theThreadGroup, this.thePool, + this.sb, this.cacheManager, - this.socketTimeout, - this.remoteProxyUse, - this.remoteProxyHost, - this.remoteProxyPort, this.theLog); } diff --git a/source/de/anomic/plasma/plasmaCrawlWorker.java b/source/de/anomic/plasma/plasmaCrawlWorker.java index a8572cf00..9e3d3c783 100644 --- a/source/de/anomic/plasma/plasmaCrawlWorker.java +++ b/source/de/anomic/plasma/plasmaCrawlWorker.java @@ -57,6 +57,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import de.anomic.http.httpHeader; +import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpc; import de.anomic.http.httpdProxyHandler; import de.anomic.server.serverCore; @@ -72,12 +73,10 @@ public final class plasmaCrawlWorker extends Thread { private static final String threadBaseName = "CrawlerWorker"; private final CrawlerPool myPool; + private final plasmaSwitchboard sb; private final plasmaHTCache cacheManager; - private final int socketTimeout; - private final boolean remoteProxyUse; - private final String remoteProxyHost; - private final int remoteProxyPort; private final serverLog log; + private int socketTimeout; public plasmaCrawlLoaderMessage theMsg; private URL url; @@ -114,33 +113,35 @@ public final class plasmaCrawlWorker extends Thread { public plasmaCrawlWorker( ThreadGroup theTG, - CrawlerPool thePool, - plasmaHTCache cacheManager, - int socketTimeout, - boolean remoteProxyUse, - String remoteProxyHost, - int remoteProxyPort, - serverLog log) { + CrawlerPool thePool, + plasmaSwitchboard theSb, + plasmaHTCache theCacheManager, + serverLog theLog) { super(theTG,threadBaseName + "_inPool"); this.myPool = thePool; - this.cacheManager = cacheManager; - this.socketTimeout = socketTimeout; - this.remoteProxyUse = remoteProxyUse; - this.remoteProxyHost = remoteProxyHost; - this.remoteProxyPort = remoteProxyPort; - this.log = log; + this.sb = theSb; + this.cacheManager = theCacheManager; + this.log = theLog; + + // setting the crawler timeout properly + this.socketTimeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000); + } + + public long getDuration() { + long startDate = this.startdate; + return (startDate != 0) ? System.currentTimeMillis() - startDate : 0; } - public synchronized void execute(plasmaCrawlLoaderMessage theMsg) { - this.theMsg = theMsg; + public synchronized void execute(plasmaCrawlLoaderMessage theNewMsg) { + this.theMsg = theNewMsg; - this.url = theMsg.url; - this.name = theMsg.name; - this.referer = theMsg.referer; - this.initiator = theMsg.initiator; - this.depth = theMsg.depth; - this.profile = theMsg.profile; + this.url = theNewMsg.url; + this.name = theNewMsg.name; + this.referer = theNewMsg.referer; + this.initiator = theNewMsg.initiator; + this.depth = theNewMsg.depth; + this.profile = theNewMsg.profile; this.startdate = System.currentTimeMillis(); //this.error = null; @@ -197,7 +198,7 @@ public final class plasmaCrawlWorker extends Thread { if (!this.stopped && !this.isInterrupted()) { try { this.myPool.returnObject(this); - this.setName(this.threadBaseName + "_inPool"); + this.setName(plasmaCrawlWorker.threadBaseName + "_inPool"); } catch (Exception e1) { log.logSevere("pool error", e1); @@ -210,10 +211,25 @@ public final class plasmaCrawlWorker extends Thread { public void execute() throws IOException { try { - this.setName(this.threadBaseName + "_" + this.url); - load(this.url, this.name, this.referer, this.initiator, this.depth, this.profile, - this.socketTimeout, this.remoteProxyHost, this.remoteProxyPort, this.remoteProxyUse, - this.cacheManager, this.log); + // setting threadname + this.setName(plasmaCrawlWorker.threadBaseName + "_" + this.url); + + // refreshing timeout value + this.socketTimeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000); + + // loading resource + load( + this.url, + this.name, + this.referer, + this.initiator, + this.depth, + this.profile, + this.socketTimeout, + this.sb.remoteProxyConfig, + this.cacheManager, + this.log + ); } catch (IOException e) { //throw e; @@ -223,8 +239,8 @@ public final class plasmaCrawlWorker extends Thread { } } - public void setStopped(boolean stopped) { - this.stopped = stopped; + public void setStopped(boolean isStopped) { + this.stopped = isStopped; } public boolean isRunning() { @@ -251,9 +267,7 @@ public final class plasmaCrawlWorker extends Thread { int depth, plasmaCrawlProfile.entry profile, int socketTimeout, - String remoteProxyHost, - int remoteProxyPort, - boolean remoteProxyUse, + httpRemoteProxyConfig theRemoteProxyConfig, plasmaHTCache cacheManager, serverLog log ) throws IOException { @@ -264,9 +278,7 @@ public final class plasmaCrawlWorker extends Thread { depth, profile, socketTimeout, - remoteProxyHost, - remoteProxyPort, - remoteProxyUse, + theRemoteProxyConfig, cacheManager, log, DEFAULT_CRAWLING_RETRY_COUNT, @@ -282,9 +294,7 @@ public final class plasmaCrawlWorker extends Thread { int depth, plasmaCrawlProfile.entry profile, int socketTimeout, - String remoteProxyHost, - int remoteProxyPort, - boolean remoteProxyUse, + httpRemoteProxyConfig theRemoteProxyConfig, plasmaHTCache cacheManager, serverLog log, int crawlingRetryCount, @@ -309,8 +319,16 @@ public final class plasmaCrawlWorker extends Thread { String hostlow = host.toLowerCase(); if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) { log.logInfo("CRAWLER Rejecting URL '" + url.toString() + "'. URL is in blacklist."); - sb.urlPool.errorURL.newEntry(url, referer,initiator, yacyCore.seedDB.mySeed.hash, - name, "denied_(url_in_blacklist)", new bitfield(plasmaURL.urlFlagLength), true); + sb.urlPool.errorURL.newEntry( + url, + referer, + initiator, + yacyCore.seedDB.mySeed.hash, + name, + "denied_(url_in_blacklist)", + new bitfield(plasmaURL.urlFlagLength), + true + ); return; } @@ -335,8 +353,9 @@ public final class plasmaCrawlWorker extends Thread { //System.out.println("CRAWLER_REQUEST_HEADER=" + requestHeader.toString()); // DEBUG // open the connection - remote = (remoteProxyUse) ? httpc.getInstance(host, port, socketTimeout, ssl, remoteProxyHost, remoteProxyPort) - : httpc.getInstance(host, port, socketTimeout, ssl); + remote = ((theRemoteProxyConfig != null) && (theRemoteProxyConfig.useProxy())) + ? httpc.getInstance(host, port, socketTimeout, ssl, theRemoteProxyConfig) + : httpc.getInstance(host, port, socketTimeout, ssl); // specifying if content encoding is allowed remote.setAllowContentEncoding(useContentEncodingGzip); @@ -346,6 +365,8 @@ public final class plasmaCrawlWorker extends Thread { if (res.status.startsWith("200") || res.status.startsWith("203")) { // the transfer is ok + + // TODO: aborting download if content is to long ... long contentLength = res.responseHeader.contentLength(); // reserve cache entry @@ -354,7 +375,6 @@ public final class plasmaCrawlWorker extends Thread { // request has been placed and result has been returned. work off response File cacheFile = cacheManager.getCachePath(url); try { - String error = null; if (plasmaParser.supportedContent(url,res.responseHeader.mime())) { if (cacheFile.isFile()) { cacheManager.deleteFile(url); @@ -430,9 +450,7 @@ public final class plasmaCrawlWorker extends Thread { depth, profile, socketTimeout, - remoteProxyHost, - remoteProxyPort, - remoteProxyUse, + theRemoteProxyConfig, cacheManager, log, --crawlingRetryCount, @@ -517,9 +535,7 @@ public final class plasmaCrawlWorker extends Thread { depth, profile, socketTimeout, - remoteProxyHost, - remoteProxyPort, - remoteProxyUse, + theRemoteProxyConfig, cacheManager, log, --crawlingRetryCount, diff --git a/source/de/anomic/plasma/plasmaDbImporter.java b/source/de/anomic/plasma/plasmaDbImporter.java new file mode 100644 index 000000000..d69bcba40 --- /dev/null +++ b/source/de/anomic/plasma/plasmaDbImporter.java @@ -0,0 +1,255 @@ +package de.anomic.plasma; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.Vector; + +import de.anomic.server.serverDate; +import de.anomic.server.logging.serverLog; + +public class plasmaDbImporter extends Thread { + + public static final Vector finishedJobs = new Vector(); + public static final ThreadGroup runningJobs = new ThreadGroup("DbImport"); + public static int currMaxJobNr = 0; + + private final int jobNr; + private final plasmaCrawlLURL homeUrlDB; + private final plasmaWordIndex homeWordIndex; + + private final plasmaCrawlLURL importUrlDB; + private final plasmaWordIndex importWordIndex; + private final String importPath; + private final File importRoot; + private final int importStartSize; + + private final serverLog log; + private boolean stopped = false; + private boolean paused = false; + private String wordHash = "------------"; + + long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = wordChunkStart; + String wordChunkStartHash = "------------", wordChunkEndHash; + private long urlCounter = 0, wordCounter = 0, entryCounter = 0; + + private long globalStart = System.currentTimeMillis(); + private long globalEnd; + + private String error; + + public void stoppIt() { + this.stopped = true; + } + + public String getError() { + return this.error; + } + + public int getJobNr() { + return this.jobNr; + } + + public String getCurrentWordhash() { + return this.wordHash; + } + + public long getUrlCounter() { + return this.urlCounter; + } + + public long getWordEntityCounter() { + return this.wordCounter; + } + + public long getWordEntryCounter() { + return this.entryCounter; + } + + public File getImportRoot() { + return this.importRoot; + } + + public int getImportWordDbSize() { + return this.importWordIndex.size(); + } + + public plasmaDbImporter(plasmaWordIndex theHomeIndexDB, plasmaCrawlLURL theHomeUrlDB, String theImportPath) throws IOException { + super(runningJobs,"DB-Import_" + theImportPath); + + this.log = new serverLog("DB-IMPORT"); + + synchronized(runningJobs) { + this.jobNr = currMaxJobNr; + currMaxJobNr++; + } + + if (theImportPath == null) throw new NullPointerException(); + this.importPath = theImportPath; + this.importRoot = new File(theImportPath); + + if (theHomeIndexDB == null) throw new NullPointerException(); + this.homeWordIndex = theHomeIndexDB; + + if (theHomeUrlDB == null) throw new NullPointerException(); + this.homeUrlDB = theHomeUrlDB; + + if (this.homeWordIndex.getRoot().equals(importRoot)) { + throw new IllegalArgumentException("Import and home DB directory must not be equal"); + } + + // configure import DB + String errorMsg = null; + if (!this.importRoot.exists()) errorMsg = "Import directory does not exist."; + if (!this.importRoot.canRead()) errorMsg = "Import directory is not readable."; + if (!this.importRoot.canWrite()) errorMsg = "Import directory is not writeable"; + if (!this.importRoot.isDirectory()) errorMsg = "ImportDirectory is not a directory."; + if (errorMsg != null) { + this.log.logSevere(errorMsg + "\nName: " + this.importRoot.getAbsolutePath()); + throw new IllegalArgumentException(errorMsg); + } + + this.log.logFine("Initializing source word index db."); + this.importWordIndex = new plasmaWordIndex(this.importRoot, 8*1024*1024, this.log); + this.log.logFine("Initializing import URL db."); + this.importUrlDB = new plasmaCrawlLURL(new File(this.importRoot, "urlHash.db"), 4*1024*1024); + this.importStartSize = this.importWordIndex.size(); + } + + public void run() { + try { + importWordsDB(); + } finally { + globalEnd = System.currentTimeMillis(); + finishedJobs.add(this); + } + } + + public long getTotalRuntime() { + return (this.globalEnd == 0)?System.currentTimeMillis()-this.globalStart:this.globalEnd-this.globalStart; + } + + public int getProcessingStatus() { + return (this.importStartSize-this.importWordIndex.size())/(this.importStartSize/100); + } + + public long getElapsedTime() { + return System.currentTimeMillis()-this.globalStart; + } + + public long getEstimatedTime() { + return (this.wordCounter==0)?0:this.importWordIndex.size()*((System.currentTimeMillis()-this.globalStart)/this.wordCounter); + } + + public void importWordsDB() { + this.log.logInfo("STARTING DB-IMPORT"); + + try { + this.log.logInfo("Importing DB from '" + this.importRoot.getAbsolutePath() + "' to '" + this.homeWordIndex.getRoot().getAbsolutePath() + "'."); + this.log.logInfo("Home word index contains " + this.homeWordIndex.size() + " words and " + this.homeUrlDB.size() + " URLs."); + this.log.logInfo("Import word index contains " + this.importWordIndex.size() + " words and " + this.importUrlDB.size() + " URLs."); + + // iterate over all words from import db + + Iterator importWordHashIterator = this.importWordIndex.wordHashes(wordChunkStartHash, true, true); + while (!isAborted() && importWordHashIterator.hasNext()) { + + plasmaWordIndexEntity importWordIdxEntity = null; + try { + wordCounter++; + wordHash = (String) importWordHashIterator.next(); + importWordIdxEntity = importWordIndex.getEntity(wordHash, true); + + if (importWordIdxEntity.size() == 0) { + importWordIdxEntity.deleteComplete(); + continue; + } + + // creating a container used to hold the imported entries + plasmaWordIndexEntryContainer newContainer = new plasmaWordIndexEntryContainer(wordHash,importWordIdxEntity.size()); + + // the combined container will fit, read the container + Iterator importWordIdxEntries = importWordIdxEntity.elements(true); + plasmaWordIndexEntry importWordIdxEntry; + while (importWordIdxEntries.hasNext()) { + + // testing if import process was aborted + if (isAborted()) break; + + // getting next word index entry + entryCounter++; + importWordIdxEntry = (plasmaWordIndexEntry) importWordIdxEntries.next(); + String urlHash = importWordIdxEntry.getUrlHash(); + if ((this.importUrlDB.exists(urlHash)) && (!this.homeUrlDB.exists(urlHash))) { + urlCounter++; + + // importing the new url + plasmaCrawlLURL.Entry urlEntry = this.importUrlDB.getEntry(urlHash); + this.homeUrlDB.newEntry(urlEntry); + + if (urlCounter % 500 == 0) { + this.log.logFine(urlCounter + " URLs processed so far."); + } + } + + // adding word index entity to container + newContainer.add(importWordIdxEntry,System.currentTimeMillis()); + + if (entryCounter % 500 == 0) { + this.log.logFine(entryCounter + " word entries and " + wordCounter + " word entries processed so far."); + } + } + + // testing if import process was aborted + if (isAborted()) break; + + // importing entity container to home db + homeWordIndex.addEntries(newContainer, true); + + // delete complete index entity file + importWordIdxEntity.close(); + importWordIndex.deleteIndex(wordHash); + + // print out some statistical information + if (wordCounter%500 == 0) { + wordChunkEndHash = wordHash; + wordChunkEnd = System.currentTimeMillis(); + long duration = wordChunkEnd - wordChunkStart; + log.logInfo(wordCounter + " word entities imported " + + "[" + wordChunkStartHash + " .. " + wordChunkEndHash + "] " + + this.getProcessingStatus() + "%\n" + + "Speed: "+ 500*1000/duration + " word entities/s" + + " | Elapsed time: " + serverDate.intervalToString(getElapsedTime()) + + " | Estimated time: " + serverDate.intervalToString(getEstimatedTime()) + "\n" + + "Home Words = " + homeWordIndex.size() + + " | Import Words = " + importWordIndex.size()); + wordChunkStart = wordChunkEnd; + wordChunkStartHash = wordChunkEndHash; + } + + } catch (Exception e) { + log.logSevere("Import of word entity '" + wordHash + "' failed.",e); + } finally { + if (importWordIdxEntity != null) try { importWordIdxEntity.close(); } catch (Exception e) {} + } + } + + this.log.logInfo("Home word index contains " + homeWordIndex.size() + " words and " + homeUrlDB.size() + " URLs."); + this.log.logInfo("Import word index contains " + importWordIndex.size() + " words and " + importUrlDB.size() + " URLs."); + + this.log.logInfo("DB-IMPORT FINISHED"); + } catch (Exception e) { + this.log.logSevere("Database import failed.",e); + e.printStackTrace(); + this.error = e.toString(); + } finally { + if (importUrlDB != null) try { importUrlDB.close(); } catch (Exception e){} + if (importWordIndex != null) try { importWordIndex.close(5000); } catch (Exception e){} + } + } + + private boolean isAborted() { + return (this.stopped) || Thread.currentThread().isInterrupted(); + } + +} diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 4f2e98159..b31cef964 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -77,19 +77,18 @@ public class plasmaSnippetCache { private plasmaHTCache cacheManager; private plasmaParser parser; private serverLog log; - private String remoteProxyHost; - private int remoteProxyPort; - private boolean remoteProxyUse; + private plasmaSwitchboard sb; - public plasmaSnippetCache(plasmaHTCache cacheManager, plasmaParser parser, - String remoteProxyHost, int remoteProxyPort, boolean remoteProxyUse, - serverLog log) { + public plasmaSnippetCache( + plasmaSwitchboard theSb, + plasmaHTCache cacheManager, + plasmaParser parser, + serverLog log + ) { this.cacheManager = cacheManager; this.parser = parser; this.log = log; - this.remoteProxyHost = remoteProxyHost; - this.remoteProxyPort = remoteProxyPort; - this.remoteProxyUse = remoteProxyUse; + this.sb = theSb; this.snippetsScoreCounter = 0; this.snippetsScore = new kelondroMScoreCluster(); this.snippetsCache = new HashMap(); @@ -367,11 +366,9 @@ public class plasmaSnippetCache { 0, null, socketTimeout, - remoteProxyHost, - remoteProxyPort, - remoteProxyUse, - cacheManager, - log); + this.sb.remoteProxyConfig, + this.cacheManager, + this.log); } public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 6eb0099be..ba14cce61 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -121,6 +121,7 @@ import de.anomic.data.wikiBoard; import de.anomic.data.userDB; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.http.httpHeader; +import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpc; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMSetTools; @@ -166,9 +167,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public plasmaCrawlStacker sbStackCrawlThread; public messageBoard messageDB; public wikiBoard wikiDB; - public String remoteProxyHost; - public int remoteProxyPort; - public boolean remoteProxyUse; public static plasmaCrawlRobotsTxt robots; public plasmaCrawlProfile profiles; public plasmaCrawlProfile.entry defaultProxyProfile; @@ -182,7 +180,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public yacyCore yc; public HashMap indexingTasksInProcess; public userDB userDB; + + /* + * Remote Proxy configuration + */ +// public boolean remoteProxyUse; +// public boolean remoteProxyUse4Yacy; +// public String remoteProxyHost; +// public int remoteProxyPort; +// public String remoteProxyNoProxy = ""; +// public String[] remoteProxyNoProxyPatterns = null; + public httpRemoteProxyConfig remoteProxyConfig = null; + + /* + * Some constants + */ private static final String STR_PROXYPROFILE = "defaultProxyProfile"; private static final String STR_REMOTEPROFILE = "defaultRemoteProfile"; private static final String STR_REMOTECRAWLTRIGGER = "REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER "; @@ -206,25 +219,45 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser this.listsPath = new File(rootPath, getConfig("listsPath", "LISTS")); this.log.logConfig("Lists Path: " + this.listsPath.toString()); - // remote proxy configuration - remoteProxyHost = getConfig("remoteProxyHost", ""); - try { - remoteProxyPort = Integer.parseInt(getConfig("remoteProxyPort", "3128")); - } catch (NumberFormatException e) { - remoteProxyPort = 3128; - } - if (getConfig("remoteProxyUse", "false").equals("true")) { - remoteProxyUse = true; - log.logConfig("Using remote proxy:" + - "\n\tHost: " + remoteProxyHost + - "\n\tPort: " + remoteProxyPort); - } else { - remoteProxyUse = false; - remoteProxyHost = null; - remoteProxyPort = 0; - } - proxyLastAccess = System.currentTimeMillis() - 60000; + /* ============================================================================ + * Remote Proxy configuration + * ============================================================================ */ + this.remoteProxyConfig = httpRemoteProxyConfig.init(this); + this.log.logConfig("Remote proxy configuration:\n" + this.remoteProxyConfig.toString()); + +// // reading the proxy host name +// this.remoteProxyHost = getConfig("remoteProxyHost", ""); +// +// // reading the proxy host port +// try { +// this.remoteProxyPort = Integer.parseInt(getConfig("remoteProxyPort", "3128")); +// } catch (NumberFormatException e) { +// this.remoteProxyPort = 3128; +// } +// +// // determining if remote proxy should be used for yacy -> yacy communication +// this.remoteProxyUse4Yacy = getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true"); +// +// // determining addresses for which the remote proxy should not be used +// this.remoteProxyNoProxy = getConfig("remoteProxyNoProxy",""); +// this.remoteProxyNoProxyPatterns = this.remoteProxyNoProxy.split(","); +// +// // determining if remote Proxy should be used +// if (getConfig("remoteProxyUse", "false").equalsIgnoreCase("true")) { +// this.remoteProxyUse = true; +// this.log.logConfig("Using remote proxy:" + +// "\n\tHost: " + this.remoteProxyHost + +// "\n\tPort: " + this.remoteProxyPort + +// "\n\tUseProxy4Yacy: " + Boolean.toString(this.remoteProxyUse4Yacy) +// ); +// } else { +// this.remoteProxyUse = false; +// this.remoteProxyHost = null; +// this.remoteProxyPort = 0; +// } + this.proxyLastAccess = System.currentTimeMillis() - 60000; + // configuring list path if (!(listsPath.exists())) listsPath.mkdirs(); // load coloured lists @@ -420,9 +453,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // generate snippets cache log.logConfig("Initializing Snippet Cache"); - snippetCache = new plasmaSnippetCache(cacheManager, parser, - remoteProxyHost, remoteProxyPort, remoteProxyUse, - log); + snippetCache = new plasmaSnippetCache(this,cacheManager, parser,log); // start yacy core log.logConfig("Starting YaCy Protocol Core"); diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java index e13fb8d94..0042cdd96 100644 --- a/source/de/anomic/tools/loaderThreads.java +++ b/source/de/anomic/tools/loaderThreads.java @@ -45,6 +45,7 @@ import java.net.URL; import java.util.ArrayList; import java.util.Hashtable; +import de.anomic.http.httpRemoteProxyConfig; import de.anomic.http.httpc; public class loaderThreads { @@ -53,28 +54,30 @@ public class loaderThreads { private int timeout; private String user; private String password; - private String remoteProxyHost; - private int remoteProxyPort; + private httpRemoteProxyConfig remoteProxyConfig; // management objects for collection of threads Hashtable threads; int completed, failed; public loaderThreads() { - this(null, 0); + this(null); } - public loaderThreads(String remoteProxyHost, int remoteProxyPort) { - this(10000, null, null, remoteProxyHost, remoteProxyPort); + public loaderThreads(httpRemoteProxyConfig theremoteProxyConfig) { + this(10000, null, null, theremoteProxyConfig); } - public loaderThreads(int timeout, String user, String password, - String remoteProxyHost, int remoteProxyPort) { + public loaderThreads( + int timeout, + String user, + String password, + httpRemoteProxyConfig theremoteProxyConfig + ) { this.timeout = timeout; this.user = user; this.password = password; - this.remoteProxyHost = remoteProxyHost; - this.remoteProxyPort = remoteProxyPort; + this.remoteProxyConfig = theremoteProxyConfig; this.threads = new Hashtable(); this.completed = 0; this.failed = 0; @@ -145,7 +148,7 @@ public class loaderThreads { public void run() { try { - page = httpc.wget(url, timeout, user, password, remoteProxyHost, remoteProxyPort); + page = httpc.wget(url, timeout, user, password, remoteProxyConfig); loaded = true; process.feed(page); if (process.status() == loaderCore.STATUS_FAILED) { @@ -227,7 +230,8 @@ public class loaderThreads { } public static void main(String[] args) { - loaderThreads loader = new loaderThreads("192.168.1.122", 3128); + httpRemoteProxyConfig proxyConfig = httpRemoteProxyConfig.init("192.168.1.122", 3128); + loaderThreads loader = new loaderThreads(proxyConfig); try { loader.newPropLoaderThread("load1", new URL("http://www.anomic.de/superseed.txt")); } catch (MalformedURLException e) { diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index bf7e03e4c..a9cf29522 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -99,20 +99,32 @@ public final class yacyClient { 10000, null, null, yacyCore.seedCache.sb.remoteProxyHost, yacyCore.seedCache.sb.remoteProxyPort)); */ + // building URL final URL url = new URL("http://" + address + "/yacy/hello.html"); + + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + + // adding all needed parameters final serverObjects obj = new serverObjects(6); - obj.put("iam", yacyCore.seedDB.mySeed.hash); - obj.put("pattern", ""); - obj.put("count", "20"); - obj.put("key", key); - obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); - obj.put("myUTC", System.currentTimeMillis()); - obj.put(yacySeed.SEED, yacyCore.seedDB.mySeed.genSeedStr(key)); - result = nxTools.table(httpc.wput(url, - 105000, null, null, - yacyCore.seedDB.sb.remoteProxyHost, - yacyCore.seedDB.sb.remoteProxyPort, - obj)); + obj.put("iam", yacyCore.seedDB.mySeed.hash); + obj.put("pattern", ""); + obj.put("count", "20"); + obj.put("key", key); + obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); + obj.put("myUTC", System.currentTimeMillis()); + obj.put(yacySeed.SEED, yacyCore.seedDB.mySeed.genSeedStr(key)); + + // sending request + result = nxTools.table( + httpc.wput(url, + 105000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, + obj + ) + ); } catch (Exception e) { if (Thread.currentThread().isInterrupted()) { yacyCore.log.logFine("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' interrupted."); @@ -215,12 +227,27 @@ public final class yacyClient { public static yacySeed querySeed(yacySeed target, String seedHash) { final String key = crypt.randomSalt(); try { - final HashMap result = nxTools.table(httpc.wget( - new URL("http://" + target.getAddress() + - "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash + - "&youare=" + target.hash + "&key=" + key + - "&object=seed&env=" + seedHash), - 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + + // sending request + final HashMap result = nxTools.table( + httpc.wget( + new URL("http://" + target.getAddress() + + "/yacy/query.html" + + "?iam=" + yacyCore.seedDB.mySeed.hash + + "&youare=" + target.hash + + "&key=" + key + + "&object=seed" + + "&env=" + seedHash + ), + 10000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null + ) + ); + if (result == null || result.size() == 0) { return null; } //final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time return yacySeed.genRemoteSeed((String) result.get("response"), key); @@ -232,13 +259,28 @@ public final class yacyClient { public static int queryRWICount(yacySeed target, String wordHash) { try { - final HashMap result = nxTools.table(httpc.wget( - new URL("http://" + target.getAddress() + - "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash + - "&youare=" + target.hash + "&key=" + - "&object=rwicount&env=" + wordHash + - "&ttl=0"), - 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + + // sending request + final HashMap result = nxTools.table( + httpc.wget( + new URL("http://" + target.getAddress() + + "/yacy/query.html" + + "?iam=" + yacyCore.seedDB.mySeed.hash + + "&youare=" + target.hash + + "&key=" + + "&object=rwicount" + + "&env=" + wordHash + + "&ttl=0" + ), + 10000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null + ) + ); + if (result == null || result.size() == 0) { return -1; } return Integer.parseInt((String) result.get("response")); } catch (Exception e) { @@ -247,19 +289,36 @@ public final class yacyClient { } } - public static int queryUrlCount(yacySeed target) { + public static int queryUrlCount(yacySeed target) { if (target == null) { return -1; } if (yacyCore.seedDB.mySeed == null) return -1; + + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + + // building url final String querystr = - "http://" + target.getAddress() + - "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash + - "&youare=" + target.hash + - "&key=" + - "&object=lurlcount&env=&ttl=0"; + "http://" + target.getAddress() + + "/yacy/query.html" + + "?iam=" + yacyCore.seedDB.mySeed.hash + + "&youare=" + target.hash + + "&key=" + + "&object=lurlcount" + + "&env=" + + "&ttl=0"; + + // seinding request try { - final HashMap result = nxTools.table(httpc.wget( - new URL(querystr), 6000, null, null, - yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + final HashMap result = nxTools.table( + httpc.wget( + new URL(querystr), + 6000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null + ) + ); + // yacyCore.log("DEBUG QUERY: query=" + querystr + "; result = " + result.toString()); if ((result == null) || (result.size() == 0)) return -1; final String resp = (String) result.get("response"); @@ -290,8 +349,14 @@ public final class yacyClient { // request result final String key = crypt.randomSalt(); - try { + try { + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + + // building url final String url = "http://" + targetPeer.getAddress() + "/yacy/search.html"; + + // adding all needed parameters /* String url = "http://" + targetPeer.getAddress() + "/yacy/search.html?myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) + @@ -301,22 +366,30 @@ public final class yacyClient { "&query=" + wordhashes; */ final serverObjects obj = new serverObjects(9); - obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key)); - obj.put("youare", targetPeer.hash); - obj.put("key", key); - obj.put("count", count); - obj.put("resource", ((global) ? "global" : "local")); - obj.put("query", wordhashes); - obj.put("ttl", "0"); - obj.put("duetime", Long.toString(duetime)); - obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); + obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key)); + obj.put("youare", targetPeer.hash); + obj.put("key", key); + obj.put("count", count); + obj.put("resource", ((global) ? "global" : "local")); + obj.put("query", wordhashes); + obj.put("ttl", "0"); + obj.put("duetime", Long.toString(duetime)); + obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); + //yacyCore.log.logDebug("yacyClient.search url=" + url); final long timestamp = System.currentTimeMillis(); - final HashMap result = nxTools.table(httpc.wput(new URL(url), - 300000, null, null, - yacyCore.seedDB.sb.remoteProxyHost, - yacyCore.seedDB.sb.remoteProxyPort, - obj)); + + // sending request + final HashMap result = nxTools.table( + httpc.wput( + new URL(url), + 300000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, + obj + ) + ); final long totalrequesttime = System.currentTimeMillis() - timestamp; /* @@ -394,13 +467,20 @@ public final class yacyClient { // ask for allowed message size and attachement size // if this replies null, the peer does not answer if (yacyCore.seedDB == null || yacyCore.seedDB.mySeed == null) { return null; } - final serverObjects post = new serverObjects(5); + + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + + // adding all needed parameters final String key = crypt.randomSalt(); - post.put("key", key); - post.put("process", "permission"); - post.put("iam", yacyCore.seedDB.mySeed.hash); - post.put("youare", targetHash); - post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); + final serverObjects post = new serverObjects(5); + post.put("key", key); + post.put("process", "permission"); + post.put("iam", yacyCore.seedDB.mySeed.hash); + post.put("youare", targetHash); + post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); + + // getting target address String address; if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) { address = yacyCore.seedDB.mySeed.getAddress(); @@ -412,10 +492,19 @@ public final class yacyClient { //System.out.println("remote address: " + address); } if (address == null) { address = "localhost:8080"; } + + // sending request try { - return nxTools.table(httpc.wput( - new URL("http://" + address + "/yacy/message.html"), - 8000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post)); + return nxTools.table( + httpc.wput( + new URL("http://" + address + "/yacy/message.html"), + 8000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, + post + ) + ); } catch (Exception e) { // most probably a network time-out exception yacyCore.log.logSevere("yacyClient.permissionMessage error:" + e.getMessage()); @@ -425,15 +514,22 @@ public final class yacyClient { public static HashMap postMessage(String targetHash, String subject, byte[] message) { // this post a message to the remote message board - final serverObjects post = new serverObjects(7); + + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + + // adding all needed parameters final String key = crypt.randomSalt(); - post.put("key", key); - post.put("process", "post"); - post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key)); - post.put("youare", targetHash); - post.put("subject", subject); - post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); - post.put("message", new String(message)); + final serverObjects post = new serverObjects(7); + post.put("key", key); + post.put("process", "post"); + post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key)); + post.put("youare", targetHash); + post.put("subject", subject); + post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); + post.put("message", new String(message)); + + // getting target address String address; if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) { address = yacyCore.seedDB.mySeed.getAddress(); @@ -441,11 +537,18 @@ public final class yacyClient { address = yacyCore.seedDB.getConnected(targetHash).getAddress(); } if (address == null) { address = "localhost:8080"; } - //System.out.println("DEBUG POST " + address + "/yacy/message.html" + post.toString()); + + // sending request try { - final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/message.html"), 20000, null, null, - yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post); - //System.out.println("V=" + v.toString()); + final ArrayList v = httpc.wput( + new URL("http://" + address + "/yacy/message.html"), + 20000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, + post + ); + return nxTools.table(v); } catch (Exception e) { yacyCore.log.logSevere("yacyClient.postMessage error:" + e.getMessage()); @@ -459,9 +562,12 @@ public final class yacyClient { if (yacyCore.seedDB.mySeed == null) { return null; } if (yacyCore.seedDB.mySeed == targetSeed) { return null; } + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + // construct request - final serverObjects post = new serverObjects(9); final String key = crypt.randomSalt(); + final serverObjects post = new serverObjects(9); post.put("key", key); post.put("process", "crawl"); post.put("iam", yacyCore.seedDB.mySeed.hash); @@ -472,12 +578,22 @@ public final class yacyClient { post.put("depth", "0"); post.put("ttl", "0"); + // determining target address final String address = targetSeed.getAddress(); if (address == null) { return null; } + + // sending request try { - return nxTools.table(httpc.wput( - new URL("http://" + address + "/yacy/crawlOrder.html"), - 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post)); + return nxTools.table( + httpc.wput( + new URL("http://" + address + "/yacy/crawlOrder.html"), + 10000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, + post + ) + ); } catch (Exception e) { // most probably a network time-out exception yacyCore.log.logSevere("yacyClient.crawlOrder error: peer=" + targetSeed.getName() + ", error=" + e.getMessage()); @@ -516,26 +632,38 @@ public final class yacyClient { stale - the resource was reloaded but not processed because source had no changes */ + + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); // construct request final String key = crypt.randomSalt(); + // determining target address String address = targetSeed.getAddress(); if (address == null) { return null; } + + // sending request try { - return nxTools.table(httpc.wget( - new URL("http://" + address + "/yacy/crawlReceipt.html?" + - "iam=" + yacyCore.seedDB.mySeed.hash + - "&youare=" + targetSeed.hash + - "&process=" + process + - "&key=" + key + - "&urlhash=" + ((entry == null) ? "" : entry.hash()) + - "&result=" + result + - "&reason=" + reason + - "&wordh=" + wordhashes + - "&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key)) - ), - 60000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + return nxTools.table( + httpc.wget( + new URL("http://" + address + "/yacy/crawlReceipt.html" + + "?iam=" + yacyCore.seedDB.mySeed.hash + + "&youare=" + targetSeed.hash + + "&process=" + process + + "&key=" + key + + "&urlhash=" + ((entry == null) ? "" : entry.hash()) + + "&result=" + result + + "&reason=" + reason + + "&wordh=" + wordhashes + + "&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key)) + ), + 60000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null + ) + ); } catch (Exception e) { // most probably a network time-out exception yacyCore.log.logSevere("yacyClient.crawlReceipt error:" + e.getMessage()); @@ -549,24 +677,28 @@ public final class yacyClient { */ public static String transferIndex(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, HashMap urlCache, boolean gzipBody, int timeout) { + HashMap in = transferRWI(targetSeed, indexes, gzipBody, timeout); if (in == null) { return "no_connection_1"; } String result = (String) in.get("result"); if (result == null) { return "no_result_1"; } if (!(result.equals("ok"))) return result; + // in now contains a list of unknown hashes final String uhss = (String) in.get("unknownURL"); if (uhss == null) { return "no_unknownURL_tag_in_response"; } if (uhss.length() == 0) { return null; } // all url's known, we are ready here + final String[] uhs = uhss.split(","); -// System.out.println("DEBUG yacyClient.transferIndex: " + uhs.length + " urls unknown"); if (uhs.length == 0) { return null; } // all url's known + // extract the urlCache from the result plasmaCrawlLURL.Entry[] urls = new plasmaCrawlLURL.Entry[uhs.length]; for (int i = 0; i < uhs.length; i++) { urls[i] = (plasmaCrawlLURL.Entry) urlCache.get(uhs[i]); if (urls[i] == null) System.out.println("DEBUG transferIndex: error with requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'"); } + in = transferURL(targetSeed, urls, gzipBody, timeout); if (in == null) { return "no_connection_2"; } result = (String) in.get("result"); @@ -574,12 +706,17 @@ public final class yacyClient { if (!(result.equals("ok"))) { return result; } // int doubleentries = Integer.parseInt((String) in.get("double")); // System.out.println("DEBUG tansferIndex: transferred " + uhs.length + " URL's, double=" + doubleentries); + return null; } private static HashMap transferRWI(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, boolean gzipBody, int timeout) { final String address = targetSeed.getAddress(); if (address == null) { return null; } + + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + // prepare post values final serverObjects post = new serverObjects(7); final String key = crypt.randomSalt(); @@ -592,6 +729,8 @@ public final class yacyClient { post.put("iam", yacyCore.seedDB.mySeed.hash); post.put("youare", targetSeed.hash); post.put("wordc", Integer.toString(indexes.length)); + + int indexcount = 0; final StringBuffer entrypost = new StringBuffer(indexes.length*73); Iterator eenum; @@ -618,8 +757,14 @@ public final class yacyClient { post.put("entryc", Integer.toString(indexcount)); post.put("indexes", entrypost.toString()); try { - final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/transferRWI.html"), timeout, null, null, - yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post); + final ArrayList v = httpc.wput( + new URL("http://" + address + "/yacy/transferRWI.html"), + timeout, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, + post + ); // this should return a list of urlhashes that are unknwon if (v != null) { yacyCore.seedDB.mySeed.incSI(indexcount); @@ -637,6 +782,10 @@ public final class yacyClient { // this post a message to the remote message board final String address = targetSeed.getAddress(); if (address == null) { return null; } + + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + // prepare post values final serverObjects post = new serverObjects(5+urls.length); final String key = crypt.randomSalt(); @@ -662,8 +811,15 @@ public final class yacyClient { } post.put("urlc", Integer.toString(urlc)); try { - final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/transferURL.html"), timeout, null, null, - yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post); + final ArrayList v = httpc.wput( + new URL("http://" + address + "/yacy/transferURL.html"), + timeout, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, + post + ); + if (v != null) { yacyCore.seedDB.mySeed.incSU(urlc); } @@ -675,6 +831,10 @@ public final class yacyClient { } public static HashMap getProfile(yacySeed targetSeed) { + + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + // this post a message to the remote message board final serverObjects post = new serverObjects(2); post.put("iam", yacyCore.seedDB.mySeed.hash); @@ -682,8 +842,15 @@ public final class yacyClient { String address = targetSeed.getAddress(); if (address == null) { address = "localhost:8080"; } try { - final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/profile.html"), 20000, null, null, - yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post); + final ArrayList v = httpc.wput( + new URL("http://" + address + "/yacy/profile.html"), + 20000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null, + post + ); + return nxTools.table(v); } catch (Exception e) { yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage()); @@ -701,14 +868,24 @@ public final class yacyClient { final String wordhashe = plasmaWordIndexEntry.word2hash("test"); //System.out.println("permission=" + permissionMessage(args[1])); - final HashMap result = nxTools.table(httpc.wget( - new URL("http://" + target.getAddress() + - "/yacy/search.html?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) + - "&youare=" + target.hash + "&key=" + - "&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) + - "&count=10&resource=global" + - "&query=" + wordhashe), - 5000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + // should we use the proxy? + boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy()); + + final HashMap result = nxTools.table( + httpc.wget( + new URL("http://" + target.getAddress() + "/yacy/search.html" + + "?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) + + "&youare=" + target.hash + "&key=" + + "&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) + + "&count=10" + + "&resource=global" + + "&query=" + wordhashe), + 5000, + null, + null, + (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null + ) + ); System.out.println("Result=" + result.toString()); } catch (Exception e) { e.printStackTrace(); diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index b9bad111e..541eef6f6 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -161,14 +161,14 @@ public class yacyPeerActions { // load the seed list try { url = new URL(seedListFileURL); - header = httpc.whead(url, 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort); + header = httpc.whead(url, 5000, null, null, this.sb.remoteProxyConfig); if ((header == null) || (header.lastModified() == null)) { yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available"); } else if ((header.age() > 86400000) && (ssc > 0)) { yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)"); } else { ssc++; - seedList = httpc.wget(url, 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort); + seedList = httpc.wget(url, 5000, null, null, this.sb.remoteProxyConfig); enu = seedList.iterator(); lc = 0; while (enu.hasNext()) { @@ -221,7 +221,7 @@ public class yacyPeerActions { // read in remote file from url try { - ArrayList remote = httpc.wget(new URL(url), 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort); + ArrayList remote = httpc.wget(new URL(url), 5000, null, null, this.sb.remoteProxyConfig); if ((remote != null) && (remote.size() > 0)) { Iterator e = remote.iterator(); while (e.hasNext()) { diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index f54b31796..06e964f57 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -708,7 +708,14 @@ public final class yacySeedDB { private boolean checkCache(ArrayList uv, URL seedURL) throws IOException { // check if the result can be retrieved again - ArrayList check = httpc.wget(seedURL, 10000, null, null, sb.remoteProxyHost, sb.remoteProxyPort); + // TODO: should we check the useProxy4Yacy option here??? + ArrayList check = httpc.wget( + seedURL, + 10000, + null, + null, + sb.remoteProxyConfig + ); if (check == null) { serverLog.logFine("YACY","SaveSeedList: Testing download failed ..."); diff --git a/source/yacy.java b/source/yacy.java index 3cd8fe9eb..de7cfb3dd 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -456,7 +456,7 @@ public final class yacy { server.terminate(false); server.interrupt(); if (server.isAlive()) try { - httpc.wget(new URL("http://localhost:" + port), 1000, null, null, null, 0); // kick server + httpc.wget(new URL("http://localhost:" + port), 1000, null, null, null); // kick server serverLog.logConfig("SHUTDOWN", "sent termination signal to server socket"); } catch (IOException ee) { serverLog.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)"); diff --git a/yacy.init b/yacy.init index 877e08f93..961a707be 100644 --- a/yacy.init +++ b/yacy.init @@ -142,11 +142,16 @@ plasmaBlueList=yacy.blue # if you wish to do that, specify it here # if you want to switch on the proxy use, set remoteProxyUse=true # remoteProxyNoProxy is a no-proxy pattern list for the remote proxy +remoteProxyUse=false +remoteProxyUse4Yacy=true +remoteProxyUse4SSL=true + remoteProxyHost=192.168.2.2 remoteProxyPort=4239 +remoteProxyUser= +remoteProxyPwd= + remoteProxyNoProxy=192.*,10.*,127.*,localhost -remoteProxyUse=false -#remoteProxyUse=true # the proxy may filter the content of transferred web pages # the bluelist removes specific keywords from web pages