diff --git a/htroot/IndexImport_p.html b/htroot/IndexImport_p.html
new file mode 100644
index 000000000..7889a6fef
--- /dev/null
+++ b/htroot/IndexImport_p.html
@@ -0,0 +1,105 @@
+
+
+
+YaCy '#[clientname]#': Index Import
+#[metas]#
+
+
+
+#[header]#
+
+Index DB Import
+
+The local index currenly consists of (at least) #[wcount]# reverse word indexes and #[ucount]# URL references
+
+#(error)#
+ ::
+ #[error_msg]#
+ ::
+ Import Job with the same path already started
+#(/error)#
+Starting new Job
+
+
+
+
+
+
+
+Last Refresh: #[date]#
+
+#[footer]#
+
+
diff --git a/htroot/IndexImport_p.java b/htroot/IndexImport_p.java
new file mode 100644
index 000000000..8f182b65c
--- /dev/null
+++ b/htroot/IndexImport_p.java
@@ -0,0 +1,196 @@
+//IndexTransfer_p.java
+//-----------------------
+//part of the AnomicHTTPD caching proxy
+//(C) by Michael Peter Christen; mc@anomic.de
+//first published on http://www.anomic.de
+//Frankfurt, Germany, 2005
+//
+//This file is contributed by Martin Thelian
+//
+// $LastChangedDate: 2005-10-17 17:46:12 +0200 (Mo, 17 Okt 2005) $
+// $LastChangedRevision: 947 $
+// $LastChangedBy: borg-0300 $
+//
+//This program is free software; you can redistribute it and/or modify
+//it under the terms of the GNU General Public License as published by
+//the Free Software Foundation; either version 2 of the License, or
+//(at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//GNU General Public License for more details.
+//
+//You should have received a copy of the GNU General Public License
+//along with this program; if not, write to the Free Software
+//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+//Using this software in any meaning (reading, learning, copying, compiling,
+//running) means that you agree that the Author(s) is (are) not responsible
+//for cost, loss of data or any harm that may be caused directly or indirectly
+//by usage of this softare or this documentation. The usage of this software
+//is on your own risk. The installation and usage (starting/running) of this
+//software may allow other people or application to access your computer and
+//any attached devices and is highly dependent on the configuration of the
+//software which must be done by the user of the software; the author(s) is
+//(are) also not responsible for proper configuration and usage of the
+//software, even if provoked by documentation provided together with
+//the software.
+//
+//Any changes to this file according to the GPL as documented in the file
+//gpl.txt aside this file in the shipment you received can be done to the
+//lines that follows this copyright notice here, but changes must not be
+//done inside the copyright notive above. A re-distribution must contain
+//the intact and unchanged copyright notice.
+//Contributions and changes to the program code must be marked as such.
+
+//You must compile this file with
+//javac -classpath .:../Classes IndexControl_p.java
+//if the shell's current path is HTROOT
+
+import java.io.File;
+import java.util.Date;
+import java.util.Vector;
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaDbImporter;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverDate;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public final class IndexImport_p {
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+ // return variable that accumulates replacements
+ plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
+ serverObjects prop = new serverObjects();
+
+ int activeCount = 0;
+
+ if (post != null) {
+ if (post.containsKey("startIndexDbImport")) {
+ try {
+ // getting the import path
+ String importPath = (String) post.get("importPath");
+ boolean startImport = true;
+
+ // check if there is an already running thread with the same import path
+ Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2];
+ activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads);
+
+ for (int i=0; i < activeCount; i++) {
+ plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i];
+ if (currThread.getImportRoot().equals(new File(importPath))) {
+ prop.put("error",2);
+ startImport = false;
+ }
+ }
+
+ if (startImport) {
+ plasmaDbImporter newImporter = new plasmaDbImporter(switchboard.wordIndex,switchboard.urlPool.loadedURL,importPath);
+ newImporter.start();
+
+ prop.put("LOCATION","");
+ return prop;
+ }
+ } catch (Exception e) {
+ prop.put("error",1);
+ prop.put("error_error_msg",e.toString());
+ }
+ } else if (post.containsKey("clearFinishedJobList")) {
+ plasmaDbImporter.finishedJobs.clear();
+ prop.put("LOCATION","");
+ return prop;
+ } else if (post.containsKey("stopIndexDbImport")) {
+ // getting the job nr of the thread that should be stopped
+ String jobNr = (String) post.get("jobNr");
+
+ Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2];
+ activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads);
+
+ for (int i=0; i < activeCount; i++) {
+ plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i];
+ if (currThread.getJobNr() == Integer.valueOf(jobNr).intValue()) {
+ currThread.stoppIt();
+ try {
+ currThread.join();
+ } catch (InterruptedException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ break;
+ }
+ }
+ prop.put("LOCATION","");
+ return prop;
+ }
+ }
+
+ prop.put("wcount", Integer.toString(switchboard.wordIndex.size()));
+ prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size()));
+
+ /*
+ * Loop over all currently running jobs
+ */
+ Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2];
+ activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads);
+
+ for (int i=0; i < activeCount; i++) {
+ plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i];
+
+ File importPath = currThread.getImportRoot();
+ String currWordHash = currThread.getCurrentWordhash();
+ long currWordEntryCount = currThread.getWordEntryCounter();
+ long currWordEntityCounter = currThread.getWordEntityCounter();
+ long currUrlCounter = currThread.getUrlCounter();
+ long currImportDbSize = currThread.getImportWordDbSize();
+ long estimatedTime = currThread.getEstimatedTime();
+ long elapsedTime = currThread.getElapsedTime();
+ int jobNr = currThread.getJobNr();
+ int percent = currThread.getProcessingStatus();
+
+ boolean isRunning = currThread.isAlive();
+
+ prop.put("running.jobs_" + i + "_path", importPath.toString());
+ prop.put("running.jobs_" + i + "_stopped", isRunning ? 1:0);
+ prop.put("running.jobs_" + i + "_percent", Integer.toString(percent));
+ prop.put("running.jobs_" + i + "_elapsed", serverDate.intervalToString(elapsedTime));
+ prop.put("running.jobs_" + i + "_estimated", serverDate.intervalToString(estimatedTime));
+ prop.put("running.jobs_" + i + "_wordHash", currWordHash);
+ prop.put("running.jobs_" + i + "_url_num", Long.toString(currUrlCounter));
+ prop.put("running.jobs_" + i + "_word_entity_num", Long.toString(currWordEntityCounter));
+ prop.put("running.jobs_" + i + "_word_entry_num", Long.toString(currWordEntryCount));
+ prop.put("running.jobs_" + i + "_stopped_job_nr", Integer.toString(jobNr));
+ }
+ prop.put("running.jobs",activeCount);
+
+ /*
+ * Loop over all finished jobs
+ */
+ Vector finishedJobs = (Vector) plasmaDbImporter.finishedJobs.clone();
+ for (int i=0; i
Remote Proxy (optional)
YaCy can use another proxy to connect to the internet. You can enter the address for the remote proxy here:
diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java
index f25516118..6811a927f 100644
--- a/htroot/Settings_p.java
+++ b/htroot/Settings_p.java
@@ -109,10 +109,17 @@ public final class Settings_p {
}
// remote proxy
+ prop.put("remoteProxyUseChecked", env.getConfig("remoteProxyUse", "false").equals("true") ? 1 : 0);
+ prop.put("remoteProxyUse4Yacy", env.getConfig("remoteProxyUse4Yacy", "true").equals("true") ? 1 : 0);
+ prop.put("remoteProxyUse4SSL", env.getConfig("remoteProxyUse4SSL", "true").equals("true") ? 1 : 0);
+
prop.put("remoteProxyHost", env.getConfig("remoteProxyHost", ""));
prop.put("remoteProxyPort", env.getConfig("remoteProxyPort", ""));
+
+ prop.put("remoteProxyUser", env.getConfig("remoteProxyUser", ""));
+ prop.put("remoteProxyPwd", env.getConfig("remoteProxyPwd", ""));
+
prop.put("remoteProxyNoProxy", env.getConfig("remoteProxyNoProxy", ""));
- prop.put("remoteProxyUseChecked", ((String) env.getConfig("remoteProxyUse", "false")).equals("true") ? 1 : 0);
// proxy access filter
prop.put("proxyfilter", env.getConfig("proxyClient", "*"));
diff --git a/htroot/Skins_p.java b/htroot/Skins_p.java
index 6ae0a0e90..9b24ea565 100644
--- a/htroot/Skins_p.java
+++ b/htroot/Skins_p.java
@@ -124,7 +124,7 @@ public class Skins_p {
String url = (String)post.get("url");
ArrayList skinVector;
try{
- skinVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort);
+ skinVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyConfig);
}catch(IOException e){
prop.put("status", 1);//unable to get url
prop.put("status_url", url);
diff --git a/htroot/Status.java b/htroot/Status.java
index 0a73738d9..8dc3fe65f 100644
--- a/htroot/Status.java
+++ b/htroot/Status.java
@@ -132,6 +132,7 @@ public class Status {
prop.put("remoteProxy", 1);
prop.put("remoteProxy_host", env.getConfig("remoteProxyHost", ""));
prop.put("remoteProxy_port", env.getConfig("remoteProxyPort", ""));
+ prop.put("remoteProxy_4Yacy", env.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true")?0:1);
} else {
prop.put("remoteProxy", 0); // not used
}
diff --git a/htroot/Status_p.inc b/htroot/Status_p.inc
index d516f709e..fc57d1bf7 100644
--- a/htroot/Status_p.inc
+++ b/htroot/Status_p.inc
@@ -19,7 +19,7 @@
- Proxy host
+ Peer host
#[host]#:#[port]#
@@ -30,7 +30,7 @@
Remote proxy
- #(remoteProxy)#not used::#[host]#:#[port]##(/remoteProxy)#
+ #(remoteProxy)#not used::#[host]#:#[port]# | Used for YaCy -> YaCy communication: #(4Yacy)#Yes::No #(/4Yacy)# #(/remoteProxy)#
diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java
index 4c6f67659..89e1ddfdc 100644
--- a/htroot/sharedBlacklist_p.java
+++ b/htroot/sharedBlacklist_p.java
@@ -142,7 +142,7 @@ public class sharedBlacklist_p {
//Make Adresse
address = "http://" + IP + ":" + Port + "/yacy/list.html?col=black";
try {
- otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); //get List
+ otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyConfig); //get List
} catch (Exception e) {}
//Make HTML-Optionlist with retrieved items
@@ -173,7 +173,7 @@ public class sharedBlacklist_p {
Name = address;
try {
- otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); //get List
+ otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyConfig); //get List
} catch (Exception e) {}
prop.put("status", 0); //TODO: check if the wget failed...
diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java
index 22a19e4a8..523f3d5a8 100644
--- a/source/de/anomic/data/robotsParser.java
+++ b/source/de/anomic/data/robotsParser.java
@@ -244,10 +244,10 @@ public final class robotsParser{
try {
downloadStart = System.currentTimeMillis();
plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard();
- if (!sb.remoteProxyUse) {
+ if ((sb.remoteProxyConfig == null) || (!sb.remoteProxyConfig.useProxy())) {
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false);
} else {
- con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false, sb.remoteProxyHost, sb.remoteProxyPort);
+ con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false, sb.remoteProxyConfig);
}
// if we previously have downloaded this robots.txt then we can set the if-modified-since header
diff --git a/source/de/anomic/http/httpRemoteProxyConfig.java b/source/de/anomic/http/httpRemoteProxyConfig.java
new file mode 100644
index 000000000..b94689e7c
--- /dev/null
+++ b/source/de/anomic/http/httpRemoteProxyConfig.java
@@ -0,0 +1,181 @@
+//httpRemoteProxyConfig.java
+//-----------------------
+//part of the AnomicHTTPD caching proxy
+//(C) by Michael Peter Christen; mc@anomic.de
+//first published on http://www.anomic.de
+//Frankfurt, Germany, 2004
+//
+//this file was contributed by Martin Thelian
+//$LastChangedDate$
+//$LastChangedBy$
+//$LastChangedRevision$
+//
+//This program is free software; you can redistribute it and/or modify
+//it under the terms of the GNU General Public License as published by
+//the Free Software Foundation; either version 2 of the License, or
+//(at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//GNU General Public License for more details.
+//
+//You should have received a copy of the GNU General Public License
+//along with this program; if not, write to the Free Software
+//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+//Using this software in any meaning (reading, learning, copying, compiling,
+//running) means that you agree that the Author(s) is (are) not responsible
+//for cost, loss of data or any harm that may be caused directly or indirectly
+//by usage of this softare or this documentation. The usage of this software
+//is on your own risk. The installation and usage (starting/running) of this
+//software may allow other people or application to access your computer and
+//any attached devices and is highly dependent on the configuration of the
+//software which must be done by the user of the software; the author(s) is
+//(are) also not responsible for proper configuration and usage of the
+//software, even if provoked by documentation provided together with
+//the software.
+//
+//Any changes to this file according to the GPL as documented in the file
+//gpl.txt aside this file in the shipment you received can be done to the
+//lines that follows this copyright notice here, but changes must not be
+//done inside the copyright notive above. A re-distribution must contain
+//the intact and unchanged copyright notice.
+//Contributions and changes to the program code must be marked as such.
+
+//You must compile this file with
+//javac -classpath .:../Classes Settings_p.java
+//if the shell's current path is HTROOT
+
+package de.anomic.http;
+
+import java.util.HashSet;
+
+import de.anomic.plasma.plasmaSwitchboard;
+
+public final class httpRemoteProxyConfig {
+
+ /*
+ * Remote Proxy configuration
+ */
+ private boolean remoteProxyUse;
+ private boolean remoteProxyUse4Yacy;
+ private boolean remoteProxyUse4SSL;
+
+ private String remoteProxyHost;
+ private int remoteProxyPort;
+ private String remoteProxyUser;
+ private String remoteProxyPwd;
+
+ private String remoteProxyNoProxy = "";
+ private String[] remoteProxyNoProxyPatterns = null;
+
+ public final HashSet remoteProxyAllowProxySet = new HashSet();
+ public final HashSet remoteProxyDisallowProxySet = new HashSet();
+
+ public boolean useProxy() {
+ return this.remoteProxyUse;
+ }
+
+ public boolean useProxy4Yacy() {
+ return this.remoteProxyUse4Yacy;
+ }
+
+ public boolean useProxy4SSL() {
+ return this.remoteProxyUse4SSL;
+ }
+
+ public String getProxyHost() {
+ return this.remoteProxyHost;
+ }
+
+ public int getProxyPort() {
+ return this.remoteProxyPort;
+ }
+
+ public String getProxyUser() {
+ return this.remoteProxyUser;
+ }
+
+ public String getProxyPwd() {
+ return this.remoteProxyPwd;
+ }
+
+ public String getProxyNoProxy() {
+ return this.remoteProxyNoProxy;
+ }
+
+ public String[] getProxyNoProxyPatterns() {
+ return this.remoteProxyNoProxyPatterns;
+ }
+
+ public String toString() {
+ StringBuffer toStrBuf = new StringBuffer();
+
+ toStrBuf
+ .append("Status: ").append(this.remoteProxyUse?"ON":"OFF").append(" | ")
+ .append("Host: ");
+ if ((this.remoteProxyUser != null) && (this.remoteProxyUser.length() > 0)) {
+ toStrBuf.append(this.remoteProxyUser)
+ .append("@");
+ }
+ toStrBuf
+ .append((this.remoteProxyHost==null)?"unknown":this.remoteProxyHost).append(":").append(this.remoteProxyPort).append(" | ")
+ .append("Usage: HTTP");
+ if (this.remoteProxyUse4Yacy) toStrBuf.append(" YACY");
+ if (this.remoteProxyUse4SSL) toStrBuf.append(" SSL");
+ toStrBuf.append(" | ")
+ .append("No Proxy for: ")
+ .append(this.remoteProxyNoProxy);
+
+
+ return toStrBuf.toString();
+ }
+
+ public static httpRemoteProxyConfig init(
+ String proxyHostName,
+ int proxyHostPort
+ ) {
+ httpRemoteProxyConfig newConfig = new httpRemoteProxyConfig();
+
+ newConfig.remoteProxyUse = true;
+ newConfig.remoteProxyUse4SSL = true;
+ newConfig.remoteProxyUse4Yacy = true;
+ newConfig.remoteProxyHost = proxyHostName;
+ newConfig.remoteProxyPort = proxyHostPort;
+
+ return newConfig;
+ }
+
+ public static httpRemoteProxyConfig init(plasmaSwitchboard sb) {
+ httpRemoteProxyConfig newConfig = new httpRemoteProxyConfig();
+
+ // determining if remote proxy usage is enabled
+ newConfig.remoteProxyUse = sb.getConfig("remoteProxyUse", "false").equalsIgnoreCase("true");
+
+ // determining if remote proxy should be used for yacy -> yacy communication
+ newConfig.remoteProxyUse4Yacy = sb.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true");
+
+ // determining if remote proxy should be used for ssl connections
+ newConfig.remoteProxyUse4SSL = sb.getConfig("remoteProxyUse4SSL", "true").equalsIgnoreCase("true");
+
+ // reading the proxy host name
+ newConfig.remoteProxyHost = sb.getConfig("remoteProxyHost", "").trim();
+
+ // reading the proxy host port
+ try {
+ newConfig.remoteProxyPort = Integer.parseInt(sb.getConfig("remoteProxyPort", "3128"));
+ } catch (NumberFormatException e) {
+ newConfig.remoteProxyPort = 3128;
+ }
+
+ newConfig.remoteProxyUser = sb.getConfig("remoteProxyUser", "").trim();
+ newConfig.remoteProxyPwd = sb.getConfig("remoteProxyPwd", "").trim();
+
+ // determining addresses for which the remote proxy should not be used
+ newConfig.remoteProxyNoProxy = sb.getConfig("remoteProxyNoProxy","").trim();
+ newConfig.remoteProxyNoProxyPatterns = newConfig.remoteProxyNoProxy.split(",");
+
+ return newConfig;
+ }
+}
diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java
index 9e752c88d..4e71fe33e 100644
--- a/source/de/anomic/http/httpc.java
+++ b/source/de/anomic/http/httpc.java
@@ -134,6 +134,8 @@ public final class httpc {
private boolean remoteProxyUse = false;
private String savedRemoteHost = null;
+ private httpRemoteProxyConfig remoteProxyConfig = null;
+
String requestPath = null;
private boolean allowContentEncoding = true;
static boolean useYacyReferer = true;
@@ -206,8 +208,7 @@ public final class httpc {
int port,
int timeout,
boolean ssl,
- String remoteProxyHost,
- int remoteProxyPort
+ httpRemoteProxyConfig remoteProxyConfig
) throws IOException {
httpc newHttpc;
@@ -220,7 +221,13 @@ public final class httpc {
// initialize it
try {
- newHttpc.init(server,port,timeout,ssl,remoteProxyHost, remoteProxyPort);
+ newHttpc.init(
+ server,
+ port,
+ timeout,
+ ssl,
+ remoteProxyConfig
+ );
} catch (IOException e) {
try{ httpc.theHttpcPool.returnObject(newHttpc); } catch (Exception e1) {}
throw e;
@@ -386,16 +393,25 @@ public final class httpc {
* @param remoteProxyPort
* @throws IOException
*/
- void init(String server, int port, int timeout, boolean ssl,
- String remoteProxyHost, int remoteProxyPort) throws IOException {
+ void init(
+ String server,
+ int port,
+ int timeout,
+ boolean ssl,
+ httpRemoteProxyConfig theRemoteProxyConfig) throws IOException {
if (port == -1) {
port = (ssl)? 443 : 80;
}
+ String remoteProxyHost = theRemoteProxyConfig.getProxyHost();
+ int remoteProxyPort = theRemoteProxyConfig.getProxyPort();
+
this.init(remoteProxyHost, remoteProxyPort, timeout, ssl);
+
this.remoteProxyUse = true;
this.savedRemoteHost = server + ((port == 80) ? "" : (":" + port));
+ this.remoteProxyConfig = theRemoteProxyConfig;
}
/**
@@ -491,6 +507,7 @@ public final class httpc {
this.handle = 0;
this.remoteProxyUse = false;
+ this.remoteProxyConfig = null;
this.savedRemoteHost = null;
this.requestPath = null;
@@ -566,6 +583,14 @@ public final class httpc {
else
header.put(httpHeader.HOST, this.host);
}
+
+ if (this.remoteProxyUse) {
+ String remoteProxyUser = this.remoteProxyConfig.getProxyUser();
+ String remoteProxyPwd = this.remoteProxyConfig.getProxyPwd();
+ if ((remoteProxyUser!=null)&&(remoteProxyUser.length()>0)) {
+ header.put(httpHeader.PROXY_AUTHORIZATION,serverCodings.standardCoder.encodeBase64String(remoteProxyUser + ":" + remoteProxyPwd));
+ }
+ }
if (!(header.containsKey(httpHeader.CONNECTION))) {
header.put(httpHeader.CONNECTION, "close");
@@ -702,6 +727,8 @@ public final class httpc {
this.clientOutput.write(buffer, 0, c);
len += c;
}
+
+ // TODO: we can not set the header here. This ist too late
requestHeader.put(httpHeader.CONTENT_LENGTH, Integer.toString(len));
}
this.clientOutput.flush();
@@ -806,7 +833,6 @@ public final class httpc {
// finish with a boundary
out.write(boundary.getBytes());
out.write(serverCore.crlf);
- //buf.write("" + serverCore.crlfString);
}
// create body array
out.close();
@@ -816,15 +842,17 @@ public final class httpc {
//System.out.println("DEBUG: PUT BODY=" + new String(body));
if (zipContent) {
requestHeader.put(httpHeader.CONTENT_ENCODING, "gzip");
+
+ //TODO: should we also set the content length here?
} else {
// size of that body
requestHeader.put(httpHeader.CONTENT_LENGTH, Integer.toString(body.length));
}
+
// send the header
- //System.out.println("header=" + requestHeader);
send(httpHeader.METHOD_POST, path, requestHeader, false);
+
// send the body
- //System.out.println("body=" + buf.toString());
serverCore.send(this.clientOutput, body);
return new response(false);
@@ -884,11 +912,20 @@ do upload
###### End OfList ######
*/
- public static byte[] singleGET(String host, int port, String path, int timeout,
- String user, String password, boolean ssl,
- String proxyHost, int proxyPort,
- httpHeader requestHeader) throws IOException {
+ public static byte[] singleGET(
+ String host,
+ int port,
+ String path,
+ int timeout,
+ String user,
+ String password,
+ boolean ssl,
+ httpRemoteProxyConfig theRemoteProxyConfig,
+ httpHeader requestHeader
+ ) throws IOException {
if (requestHeader == null) requestHeader = new httpHeader();
+
+ // setting host authorization header
if ((user != null) && (password != null) && (user.length() != 0)) {
requestHeader.put(httpHeader.AUTHORIZATION, serverCodings.standardCoder.encodeBase64String(user + ":" + password));
}
@@ -896,10 +933,10 @@ do upload
httpc con = null;
try {
- if ((proxyHost == null) || (proxyPort == 0)) {
+ if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
con = httpc.getInstance(host, port, timeout, ssl);
} else {
- con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort);
+ con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig);
}
httpc.response res = con.GET(path, null);
@@ -915,16 +952,20 @@ do upload
}
- public static byte[] singleGET(URL u, int timeout,
- String user, String password,
- String proxyHost, int proxyPort) throws IOException {
+ public static byte[] singleGET(
+ URL u,
+ int timeout,
+ String user,
+ String password,
+ httpRemoteProxyConfig theRemoteProxyConfig
+ ) throws IOException {
int port = u.getPort();
boolean ssl = u.getProtocol().equals("https");
if (port < 0) port = (ssl) ? 443: 80;
String path = u.getPath();
String query = u.getQuery();
if ((query != null) && (query.length() > 0)) path = path + "?" + query;
- return singleGET(u.getHost(), port, path, timeout, user, password, ssl, proxyHost, proxyPort, null);
+ return singleGET(u.getHost(), port, path, timeout, user, password, ssl, theRemoteProxyConfig, null);
}
/*
@@ -937,10 +978,18 @@ do upload
}
*/
- public static byte[] singlePOST(String host, int port, String path, int timeout,
- String user, String password, boolean ssl,
- String proxyHost, int proxyPort,
- httpHeader requestHeader, serverObjects props) throws IOException {
+ public static byte[] singlePOST(
+ String host,
+ int port,
+ String path,
+ int timeout,
+ String user,
+ String password,
+ boolean ssl,
+ httpRemoteProxyConfig theRemoteProxyConfig,
+ httpHeader requestHeader,
+ serverObjects props
+ ) throws IOException {
if (requestHeader == null) requestHeader = new httpHeader();
if ((user != null) && (password != null) && (user.length() != 0)) {
@@ -949,10 +998,11 @@ do upload
httpc con = null;
try {
- if ((proxyHost == null) || (proxyPort == 0))
+ if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
con = httpc.getInstance(host, port, timeout, ssl);
- else
- con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort);
+ } else {
+ con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig);
+ }
httpc.response res = con.POST(path, requestHeader, props, null);
//System.out.println("response=" + res.toString());
@@ -968,30 +1018,69 @@ do upload
}
- public static byte[] singlePOST(URL u, int timeout,
- String user, String password,
- String proxyHost, int proxyPort,
- serverObjects props) throws IOException {
+ public static byte[] singlePOST(
+ URL u,
+ int timeout,
+ String user,
+ String password,
+ httpRemoteProxyConfig theRemoteProxyConfig,
+ serverObjects props
+ ) throws IOException {
int port = u.getPort();
boolean ssl = u.getProtocol().equals("https");
if (port < 0) port = (ssl) ? 443 : 80;
String path = u.getPath();
String query = u.getQuery();
if ((query != null) && (query.length() > 0)) path = path + "?" + query;
- return singlePOST(u.getHost(), port, path, timeout, user, password, ssl, proxyHost, proxyPort, null, props);
+ return singlePOST(
+ u.getHost(),
+ port,
+ path,
+ timeout,
+ user,
+ password,
+ ssl,
+ theRemoteProxyConfig,
+ null,
+ props
+ );
}
- public static byte[] singlePOST(String url, int timeout, serverObjects props) throws IOException {
+ public static byte[] singlePOST(
+ String url,
+ int timeout,
+ serverObjects props
+ ) throws IOException {
try {
- return singlePOST(new URL(url), timeout, null, null, null, 0, props);
+ return singlePOST(
+ new URL(url),
+ timeout,
+ null,
+ null,
+ null,
+ props
+ );
} catch (MalformedURLException e) {
throw new IOException("Malformed URL: " + e.getMessage());
}
}
- public static ArrayList wget(URL url, int timeout, String user, String password, String proxyHost, int proxyPort) throws IOException {
+ public static ArrayList wget(
+ URL url,
+ int timeout,
+ String user,
+ String password,
+ httpRemoteProxyConfig theRemoteProxyConfig
+ ) throws IOException {
// splitting of the byte array into lines
- byte[] a = singleGET(url, timeout, user, password, proxyHost, proxyPort);
+ byte[] a = singleGET(
+ url,
+ timeout,
+ user,
+ password,
+ theRemoteProxyConfig
+ );
+
if (a == null) return null;
int s = 0;
int e;
@@ -1004,7 +1093,13 @@ do upload
return v;
}
- public static httpHeader whead(URL url, int timeout, String user, String password, String proxyHost, int proxyPort) throws IOException {
+ public static httpHeader whead(
+ URL url,
+ int timeout,
+ String user,
+ String password,
+ httpRemoteProxyConfig theRemoteProxyConfig
+ ) throws IOException {
// generate request header
httpHeader requestHeader = new httpHeader();
if ((user != null) && (password != null) && (user.length() != 0)) {
@@ -1023,9 +1118,9 @@ do upload
// start connection
httpc con = null;
try {
- if ((proxyHost == null) || (proxyPort == 0))
+ if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy()))
con = httpc.getInstance(host, port, timeout, ssl);
- else con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort);
+ else con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig);
httpc.response res = con.HEAD(path, requestHeader);
if (res.status.startsWith("2")) {
@@ -1053,9 +1148,24 @@ do upload
}
*/
- public static ArrayList wput(URL url, int timeout, String user, String password, String proxyHost, int proxyPort, serverObjects props) throws IOException {
+ public static ArrayList wput(
+ URL url,
+ int timeout,
+ String user,
+ String password,
+ httpRemoteProxyConfig theRemoteProxyConfig,
+ serverObjects props
+ ) throws IOException {
// splitting of the byte array into lines
- byte[] a = singlePOST(url, timeout, user, password, proxyHost, proxyPort, props);
+ byte[] a = singlePOST(
+ url,
+ timeout,
+ user,
+ password,
+ theRemoteProxyConfig,
+ props
+ );
+
//System.out.println("wput-out=" + new String(a));
int s = 0;
int e;
@@ -1090,8 +1200,10 @@ do upload
int timeout = Integer.parseInt(args[1]);
String proxyHost = args[2];
int proxyPort = Integer.parseInt(args[3]);
+
+ httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort);
try {
- text = wget(new URL(url), timeout, null, null, proxyHost, proxyPort);
+ text = wget(new URL(url), timeout, null, null, theRemoteProxyConfig);
} catch (MalformedURLException e) {
System.out.println("The url '" + url + "' is wrong.");
} catch (IOException e) {
diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java
index e5b753332..649ea6f0c 100644
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@@ -105,14 +105,14 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
private static int timeout = 30000;
private static boolean yacyTrigger = true;
public static boolean isTransparentProxy = false;
- public static boolean remoteProxyUse = false;
- public static String remoteProxyHost = "";
- public static int remoteProxyPort = -1;
- public static String remoteProxyNoProxy = "";
- public static String[] remoteProxyNoProxyPatterns = null;
+// public static boolean remoteProxyUse = false;
+// public static String remoteProxyHost = "";
+// public static int remoteProxyPort = -1;
+// public static String remoteProxyNoProxy = "";
+// public static String[] remoteProxyNoProxyPatterns = null;
- private static final HashSet remoteProxyAllowProxySet = new HashSet();
- private static final HashSet remoteProxyDisallowProxySet = new HashSet();
+// private static final HashSet remoteProxyAllowProxySet = new HashSet();
+// private static final HashSet remoteProxyDisallowProxySet = new HashSet();
private static htmlFilterTransformer transformer = null;
public static final String userAgent = "yacy (" + httpc.systemOST +") yacy.net";
@@ -203,16 +203,16 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// doing httpc init
httpc.useYacyReferer = sb.getConfig("useYacyReferer", "true").equals("true");
- // load remote proxy data
- remoteProxyHost = switchboard.getConfig("remoteProxyHost","");
- try {
- remoteProxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128"));
- } catch (NumberFormatException e) {
- remoteProxyPort = 3128;
- }
- remoteProxyUse = switchboard.getConfig("remoteProxyUse","false").equals("true");
- remoteProxyNoProxy = switchboard.getConfig("remoteProxyNoProxy","");
- remoteProxyNoProxyPatterns = remoteProxyNoProxy.split(",");
+// // load remote proxy data
+// remoteProxyHost = switchboard.getConfig("remoteProxyHost","");
+// try {
+// remoteProxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128"));
+// } catch (NumberFormatException e) {
+// remoteProxyPort = 3128;
+// }
+// remoteProxyUse = switchboard.getConfig("remoteProxyUse","false").equals("true");
+// remoteProxyNoProxy = switchboard.getConfig("remoteProxyNoProxy","");
+// remoteProxyNoProxyPatterns = remoteProxyNoProxy.split(",");
// set timeout
timeout = Integer.parseInt(switchboard.getConfig("clientTimeout", "10000"));
@@ -1020,7 +1020,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
String httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
- int timeout = Integer.parseInt(switchboard.getConfig("clientTimeout", "10000"));
int port, pos;
if ((pos = host.indexOf(":")) < 0) {
@@ -1043,16 +1042,23 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
}
// possibly branch into PROXY-PROXY connection
- if (remoteProxyUse) {
+ if ((switchboard.remoteProxyConfig != null) && (switchboard.remoteProxyConfig.useProxy4SSL())) {
httpc remoteProxy = null;
try {
- remoteProxy = httpc.getInstance(host, port, timeout, false, remoteProxyHost, remoteProxyPort);
+ remoteProxy = httpc.getInstance(
+ host,
+ port,
+ timeout,
+ false,
+ switchboard.remoteProxyConfig
+ );
+
httpc.response response = remoteProxy.CONNECT(host, port, requestHeader);
response.print();
if (response.success()) {
// replace connection details
- host = remoteProxyHost;
- port = remoteProxyPort;
+ host = switchboard.remoteProxyConfig.getProxyHost();
+ port = switchboard.remoteProxyConfig.getProxyPort();
// go on (see below)
} else {
// pass error response back to client
@@ -1147,37 +1153,56 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
}
private httpc newhttpc(String server, int port, int timeout) throws IOException {
+
+ // getting the remote proxy configuration
+ httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig;
+
// a new httpc connection, combined with possible remote proxy
- boolean useProxy = remoteProxyUse;
+ boolean useProxy = (remProxyConfig!=null)&&(remProxyConfig.useProxy());
+
// check no-proxy rule
- if ((useProxy) && (!(remoteProxyAllowProxySet.contains(server)))) {
- if (remoteProxyDisallowProxySet.contains(server)) {
+ if (
+ (switchboard.remoteProxyConfig != null) &&
+ (switchboard.remoteProxyConfig.useProxy()) &&
+ (!(switchboard.remoteProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
+ if (switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
useProxy = false;
} else {
// analyse remoteProxyNoProxy;
// set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly
int i = 0;
- while (i < remoteProxyNoProxyPatterns.length) {
- if (server.matches(remoteProxyNoProxyPatterns[i])) {
+ while (i < remProxyConfig.getProxyNoProxyPatterns().length) {
+ if (server.matches(remProxyConfig.getProxyNoProxyPatterns()[i])) {
// disallow proxy for this server
- remoteProxyDisallowProxySet.add(server);
+ switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.add(server);
useProxy = false;
break;
}
i++;
}
- if (i == remoteProxyNoProxyPatterns.length) {
+ if (i == remProxyConfig.getProxyNoProxyPatterns().length) {
// no pattern matches: allow server
- remoteProxyAllowProxySet.add(server);
+ switchboard.remoteProxyConfig.remoteProxyAllowProxySet.add(server);
}
}
}
+
// branch to server/proxy
if (useProxy) {
- return httpc.getInstance(server, port, timeout, false, remoteProxyHost, remoteProxyPort);
- } else {
- return httpc.getInstance(server, port, timeout, false);
+ return httpc.getInstance(
+ server,
+ port,
+ timeout,
+ false,
+ remProxyConfig
+ );
}
+ return httpc.getInstance(
+ server,
+ port,
+ timeout,
+ false
+ );
}
private httpc newhttpc(String address, int timeout) throws IOException {
diff --git a/source/de/anomic/net/natLib.java b/source/de/anomic/net/natLib.java
index 1251fc551..82e0ebfb0 100644
--- a/source/de/anomic/net/natLib.java
+++ b/source/de/anomic/net/natLib.java
@@ -60,7 +60,7 @@ public class natLib {
rm status.htm
*/
try {
- ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), 5000, "admin", password, null, 0);
+ ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), 5000, "admin", password, null);
x = nxTools.grep(x, 1, "IP Address");
if ((x == null) || (x.size() == 0)) return null;
String line = nxTools.tail1(x);
@@ -72,7 +72,7 @@ public class natLib {
private static String getWhatIsMyIP() {
try {
- ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), 5000, null, null, null, 0);
+ ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), 5000, null, null, null);
x = nxTools.grep(x, 0, "Your IP is");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 4);
@@ -83,7 +83,7 @@ public class natLib {
private static String getStanford() {
try {
- ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), 5000, null, null, null, 0);
+ ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), 5000, null, null, null);
x = nxTools.grep(x, 0, "firewall protecting your browser");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 7);
@@ -94,7 +94,7 @@ public class natLib {
private static String getIPID() {
try {
- ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), 5000, null, null, null, 0);
+ ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), 5000, null, null, null);
x = nxTools.grep(x, 2, "Your IP address");
String line = nxTools.tail1(x);
return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1);
diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java
index e73ce9606..fe112c527 100644
--- a/source/de/anomic/plasma/plasmaCrawlLoader.java
+++ b/source/de/anomic/plasma/plasmaCrawlLoader.java
@@ -56,7 +56,6 @@ public final class plasmaCrawlLoader extends Thread {
static plasmaSwitchboard switchboard;
private final plasmaHTCache cacheManager;
- private final int socketTimeout;
private final serverLog log;
private final CrawlerMessageQueue theQueue;
@@ -66,15 +65,13 @@ public final class plasmaCrawlLoader extends Thread {
private boolean stopped = false;
public plasmaCrawlLoader(
- plasmaHTCache cacheManager,
- serverLog log) {
+ plasmaHTCache theCacheManager,
+ serverLog theLog) {
this.setName("plasmaCrawlLoader");
- this.cacheManager = cacheManager;
- this.log = log;
-
- this.socketTimeout = Integer.parseInt(switchboard.getConfig("crawler.clientTimeout", "10000"));
+ this.cacheManager = theCacheManager;
+ this.log = theLog;
// configuring the crawler messagequeue
this.theQueue = new CrawlerMessageQueue();
@@ -103,11 +100,8 @@ public final class plasmaCrawlLoader extends Thread {
CrawlerFactory theFactory = new CrawlerFactory(
this.theThreadGroup,
+ switchboard,
cacheManager,
- socketTimeout,
- switchboard.getConfig("remoteProxyUse","false").equals("true"),
- switchboard.getConfig("remoteProxyHost",""),
- Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128")),
log);
this.crawlwerPool = new CrawlerPool(theFactory,this.cralwerPoolConfig,this.theThreadGroup);
@@ -363,37 +357,28 @@ final class CrawlerFactory implements org.apache.commons.pool.PoolableObjectFact
private CrawlerPool thePool;
private final ThreadGroup theThreadGroup;
private final plasmaHTCache cacheManager;
- private final int socketTimeout;
- private final boolean remoteProxyUse;
- private final String remoteProxyHost;
- private final int remoteProxyPort;
private final serverLog theLog;
+ private final plasmaSwitchboard sb;
public CrawlerFactory(
- ThreadGroup theThreadGroup,
- plasmaHTCache cacheManager,
- int socketTimeout,
- boolean remoteProxyUse,
- String remoteProxyHost,
- int remoteProxyPort,
- serverLog theLog) {
+ ThreadGroup threadGroup,
+ plasmaSwitchboard theSb,
+ plasmaHTCache theCacheManager,
+ serverLog log) {
super();
- if (theThreadGroup == null)
+ if (threadGroup == null)
throw new IllegalArgumentException("The threadgroup object must not be null.");
- this.theThreadGroup = theThreadGroup;
- this.cacheManager = cacheManager;
- this.socketTimeout = socketTimeout;
- this.remoteProxyUse = remoteProxyUse;
- this.remoteProxyHost = remoteProxyHost;
- this.remoteProxyPort = remoteProxyPort;
- this.theLog = theLog;
+ this.theThreadGroup = threadGroup;
+ this.cacheManager = theCacheManager;
+ this.sb = theSb;
+ this.theLog = log;
}
- public void setPool(CrawlerPool thePool) {
- this.thePool = thePool;
+ public void setPool(CrawlerPool pool) {
+ this.thePool = pool;
}
/**
@@ -403,11 +388,8 @@ final class CrawlerFactory implements org.apache.commons.pool.PoolableObjectFact
return new plasmaCrawlWorker(
this.theThreadGroup,
this.thePool,
+ this.sb,
this.cacheManager,
- this.socketTimeout,
- this.remoteProxyUse,
- this.remoteProxyHost,
- this.remoteProxyPort,
this.theLog);
}
diff --git a/source/de/anomic/plasma/plasmaCrawlWorker.java b/source/de/anomic/plasma/plasmaCrawlWorker.java
index a8572cf00..9e3d3c783 100644
--- a/source/de/anomic/plasma/plasmaCrawlWorker.java
+++ b/source/de/anomic/plasma/plasmaCrawlWorker.java
@@ -57,6 +57,7 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import de.anomic.http.httpHeader;
+import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
import de.anomic.http.httpdProxyHandler;
import de.anomic.server.serverCore;
@@ -72,12 +73,10 @@ public final class plasmaCrawlWorker extends Thread {
private static final String threadBaseName = "CrawlerWorker";
private final CrawlerPool myPool;
+ private final plasmaSwitchboard sb;
private final plasmaHTCache cacheManager;
- private final int socketTimeout;
- private final boolean remoteProxyUse;
- private final String remoteProxyHost;
- private final int remoteProxyPort;
private final serverLog log;
+ private int socketTimeout;
public plasmaCrawlLoaderMessage theMsg;
private URL url;
@@ -114,33 +113,35 @@ public final class plasmaCrawlWorker extends Thread {
public plasmaCrawlWorker(
ThreadGroup theTG,
- CrawlerPool thePool,
- plasmaHTCache cacheManager,
- int socketTimeout,
- boolean remoteProxyUse,
- String remoteProxyHost,
- int remoteProxyPort,
- serverLog log) {
+ CrawlerPool thePool,
+ plasmaSwitchboard theSb,
+ plasmaHTCache theCacheManager,
+ serverLog theLog) {
super(theTG,threadBaseName + "_inPool");
this.myPool = thePool;
- this.cacheManager = cacheManager;
- this.socketTimeout = socketTimeout;
- this.remoteProxyUse = remoteProxyUse;
- this.remoteProxyHost = remoteProxyHost;
- this.remoteProxyPort = remoteProxyPort;
- this.log = log;
+ this.sb = theSb;
+ this.cacheManager = theCacheManager;
+ this.log = theLog;
+
+ // setting the crawler timeout properly
+ this.socketTimeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000);
+ }
+
+ public long getDuration() {
+ long startDate = this.startdate;
+ return (startDate != 0) ? System.currentTimeMillis() - startDate : 0;
}
- public synchronized void execute(plasmaCrawlLoaderMessage theMsg) {
- this.theMsg = theMsg;
+ public synchronized void execute(plasmaCrawlLoaderMessage theNewMsg) {
+ this.theMsg = theNewMsg;
- this.url = theMsg.url;
- this.name = theMsg.name;
- this.referer = theMsg.referer;
- this.initiator = theMsg.initiator;
- this.depth = theMsg.depth;
- this.profile = theMsg.profile;
+ this.url = theNewMsg.url;
+ this.name = theNewMsg.name;
+ this.referer = theNewMsg.referer;
+ this.initiator = theNewMsg.initiator;
+ this.depth = theNewMsg.depth;
+ this.profile = theNewMsg.profile;
this.startdate = System.currentTimeMillis();
//this.error = null;
@@ -197,7 +198,7 @@ public final class plasmaCrawlWorker extends Thread {
if (!this.stopped && !this.isInterrupted()) {
try {
this.myPool.returnObject(this);
- this.setName(this.threadBaseName + "_inPool");
+ this.setName(plasmaCrawlWorker.threadBaseName + "_inPool");
}
catch (Exception e1) {
log.logSevere("pool error", e1);
@@ -210,10 +211,25 @@ public final class plasmaCrawlWorker extends Thread {
public void execute() throws IOException {
try {
- this.setName(this.threadBaseName + "_" + this.url);
- load(this.url, this.name, this.referer, this.initiator, this.depth, this.profile,
- this.socketTimeout, this.remoteProxyHost, this.remoteProxyPort, this.remoteProxyUse,
- this.cacheManager, this.log);
+ // setting threadname
+ this.setName(plasmaCrawlWorker.threadBaseName + "_" + this.url);
+
+ // refreshing timeout value
+ this.socketTimeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000);
+
+ // loading resource
+ load(
+ this.url,
+ this.name,
+ this.referer,
+ this.initiator,
+ this.depth,
+ this.profile,
+ this.socketTimeout,
+ this.sb.remoteProxyConfig,
+ this.cacheManager,
+ this.log
+ );
} catch (IOException e) {
//throw e;
@@ -223,8 +239,8 @@ public final class plasmaCrawlWorker extends Thread {
}
}
- public void setStopped(boolean stopped) {
- this.stopped = stopped;
+ public void setStopped(boolean isStopped) {
+ this.stopped = isStopped;
}
public boolean isRunning() {
@@ -251,9 +267,7 @@ public final class plasmaCrawlWorker extends Thread {
int depth,
plasmaCrawlProfile.entry profile,
int socketTimeout,
- String remoteProxyHost,
- int remoteProxyPort,
- boolean remoteProxyUse,
+ httpRemoteProxyConfig theRemoteProxyConfig,
plasmaHTCache cacheManager,
serverLog log
) throws IOException {
@@ -264,9 +278,7 @@ public final class plasmaCrawlWorker extends Thread {
depth,
profile,
socketTimeout,
- remoteProxyHost,
- remoteProxyPort,
- remoteProxyUse,
+ theRemoteProxyConfig,
cacheManager,
log,
DEFAULT_CRAWLING_RETRY_COUNT,
@@ -282,9 +294,7 @@ public final class plasmaCrawlWorker extends Thread {
int depth,
plasmaCrawlProfile.entry profile,
int socketTimeout,
- String remoteProxyHost,
- int remoteProxyPort,
- boolean remoteProxyUse,
+ httpRemoteProxyConfig theRemoteProxyConfig,
plasmaHTCache cacheManager,
serverLog log,
int crawlingRetryCount,
@@ -309,8 +319,16 @@ public final class plasmaCrawlWorker extends Thread {
String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
log.logInfo("CRAWLER Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
- sb.urlPool.errorURL.newEntry(url, referer,initiator, yacyCore.seedDB.mySeed.hash,
- name, "denied_(url_in_blacklist)", new bitfield(plasmaURL.urlFlagLength), true);
+ sb.urlPool.errorURL.newEntry(
+ url,
+ referer,
+ initiator,
+ yacyCore.seedDB.mySeed.hash,
+ name,
+ "denied_(url_in_blacklist)",
+ new bitfield(plasmaURL.urlFlagLength),
+ true
+ );
return;
}
@@ -335,8 +353,9 @@ public final class plasmaCrawlWorker extends Thread {
//System.out.println("CRAWLER_REQUEST_HEADER=" + requestHeader.toString()); // DEBUG
// open the connection
- remote = (remoteProxyUse) ? httpc.getInstance(host, port, socketTimeout, ssl, remoteProxyHost, remoteProxyPort)
- : httpc.getInstance(host, port, socketTimeout, ssl);
+ remote = ((theRemoteProxyConfig != null) && (theRemoteProxyConfig.useProxy()))
+ ? httpc.getInstance(host, port, socketTimeout, ssl, theRemoteProxyConfig)
+ : httpc.getInstance(host, port, socketTimeout, ssl);
// specifying if content encoding is allowed
remote.setAllowContentEncoding(useContentEncodingGzip);
@@ -346,6 +365,8 @@ public final class plasmaCrawlWorker extends Thread {
if (res.status.startsWith("200") || res.status.startsWith("203")) {
// the transfer is ok
+
+ // TODO: aborting download if content is to long ...
long contentLength = res.responseHeader.contentLength();
// reserve cache entry
@@ -354,7 +375,6 @@ public final class plasmaCrawlWorker extends Thread {
// request has been placed and result has been returned. work off response
File cacheFile = cacheManager.getCachePath(url);
try {
- String error = null;
if (plasmaParser.supportedContent(url,res.responseHeader.mime())) {
if (cacheFile.isFile()) {
cacheManager.deleteFile(url);
@@ -430,9 +450,7 @@ public final class plasmaCrawlWorker extends Thread {
depth,
profile,
socketTimeout,
- remoteProxyHost,
- remoteProxyPort,
- remoteProxyUse,
+ theRemoteProxyConfig,
cacheManager,
log,
--crawlingRetryCount,
@@ -517,9 +535,7 @@ public final class plasmaCrawlWorker extends Thread {
depth,
profile,
socketTimeout,
- remoteProxyHost,
- remoteProxyPort,
- remoteProxyUse,
+ theRemoteProxyConfig,
cacheManager,
log,
--crawlingRetryCount,
diff --git a/source/de/anomic/plasma/plasmaDbImporter.java b/source/de/anomic/plasma/plasmaDbImporter.java
new file mode 100644
index 000000000..d69bcba40
--- /dev/null
+++ b/source/de/anomic/plasma/plasmaDbImporter.java
@@ -0,0 +1,255 @@
+package de.anomic.plasma;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Vector;
+
+import de.anomic.server.serverDate;
+import de.anomic.server.logging.serverLog;
+
+public class plasmaDbImporter extends Thread {
+
+ public static final Vector finishedJobs = new Vector();
+ public static final ThreadGroup runningJobs = new ThreadGroup("DbImport");
+ public static int currMaxJobNr = 0;
+
+ private final int jobNr;
+ private final plasmaCrawlLURL homeUrlDB;
+ private final plasmaWordIndex homeWordIndex;
+
+ private final plasmaCrawlLURL importUrlDB;
+ private final plasmaWordIndex importWordIndex;
+ private final String importPath;
+ private final File importRoot;
+ private final int importStartSize;
+
+ private final serverLog log;
+ private boolean stopped = false;
+ private boolean paused = false;
+ private String wordHash = "------------";
+
+ long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = wordChunkStart;
+ String wordChunkStartHash = "------------", wordChunkEndHash;
+ private long urlCounter = 0, wordCounter = 0, entryCounter = 0;
+
+ private long globalStart = System.currentTimeMillis();
+ private long globalEnd;
+
+ private String error;
+
+ public void stoppIt() {
+ this.stopped = true;
+ }
+
+ public String getError() {
+ return this.error;
+ }
+
+ public int getJobNr() {
+ return this.jobNr;
+ }
+
+ public String getCurrentWordhash() {
+ return this.wordHash;
+ }
+
+ public long getUrlCounter() {
+ return this.urlCounter;
+ }
+
+ public long getWordEntityCounter() {
+ return this.wordCounter;
+ }
+
+ public long getWordEntryCounter() {
+ return this.entryCounter;
+ }
+
+ public File getImportRoot() {
+ return this.importRoot;
+ }
+
+ public int getImportWordDbSize() {
+ return this.importWordIndex.size();
+ }
+
+ public plasmaDbImporter(plasmaWordIndex theHomeIndexDB, plasmaCrawlLURL theHomeUrlDB, String theImportPath) throws IOException {
+ super(runningJobs,"DB-Import_" + theImportPath);
+
+ this.log = new serverLog("DB-IMPORT");
+
+ synchronized(runningJobs) {
+ this.jobNr = currMaxJobNr;
+ currMaxJobNr++;
+ }
+
+ if (theImportPath == null) throw new NullPointerException();
+ this.importPath = theImportPath;
+ this.importRoot = new File(theImportPath);
+
+ if (theHomeIndexDB == null) throw new NullPointerException();
+ this.homeWordIndex = theHomeIndexDB;
+
+ if (theHomeUrlDB == null) throw new NullPointerException();
+ this.homeUrlDB = theHomeUrlDB;
+
+ if (this.homeWordIndex.getRoot().equals(importRoot)) {
+ throw new IllegalArgumentException("Import and home DB directory must not be equal");
+ }
+
+ // configure import DB
+ String errorMsg = null;
+ if (!this.importRoot.exists()) errorMsg = "Import directory does not exist.";
+ if (!this.importRoot.canRead()) errorMsg = "Import directory is not readable.";
+ if (!this.importRoot.canWrite()) errorMsg = "Import directory is not writeable";
+ if (!this.importRoot.isDirectory()) errorMsg = "ImportDirectory is not a directory.";
+ if (errorMsg != null) {
+ this.log.logSevere(errorMsg + "\nName: " + this.importRoot.getAbsolutePath());
+ throw new IllegalArgumentException(errorMsg);
+ }
+
+ this.log.logFine("Initializing source word index db.");
+ this.importWordIndex = new plasmaWordIndex(this.importRoot, 8*1024*1024, this.log);
+ this.log.logFine("Initializing import URL db.");
+ this.importUrlDB = new plasmaCrawlLURL(new File(this.importRoot, "urlHash.db"), 4*1024*1024);
+ this.importStartSize = this.importWordIndex.size();
+ }
+
+ public void run() {
+ try {
+ importWordsDB();
+ } finally {
+ globalEnd = System.currentTimeMillis();
+ finishedJobs.add(this);
+ }
+ }
+
+ public long getTotalRuntime() {
+ return (this.globalEnd == 0)?System.currentTimeMillis()-this.globalStart:this.globalEnd-this.globalStart;
+ }
+
+ public int getProcessingStatus() {
+ return (this.importStartSize-this.importWordIndex.size())/(this.importStartSize/100);
+ }
+
+ public long getElapsedTime() {
+ return System.currentTimeMillis()-this.globalStart;
+ }
+
+ public long getEstimatedTime() {
+ return (this.wordCounter==0)?0:this.importWordIndex.size()*((System.currentTimeMillis()-this.globalStart)/this.wordCounter);
+ }
+
+ public void importWordsDB() {
+ this.log.logInfo("STARTING DB-IMPORT");
+
+ try {
+ this.log.logInfo("Importing DB from '" + this.importRoot.getAbsolutePath() + "' to '" + this.homeWordIndex.getRoot().getAbsolutePath() + "'.");
+ this.log.logInfo("Home word index contains " + this.homeWordIndex.size() + " words and " + this.homeUrlDB.size() + " URLs.");
+ this.log.logInfo("Import word index contains " + this.importWordIndex.size() + " words and " + this.importUrlDB.size() + " URLs.");
+
+ // iterate over all words from import db
+
+ Iterator importWordHashIterator = this.importWordIndex.wordHashes(wordChunkStartHash, true, true);
+ while (!isAborted() && importWordHashIterator.hasNext()) {
+
+ plasmaWordIndexEntity importWordIdxEntity = null;
+ try {
+ wordCounter++;
+ wordHash = (String) importWordHashIterator.next();
+ importWordIdxEntity = importWordIndex.getEntity(wordHash, true);
+
+ if (importWordIdxEntity.size() == 0) {
+ importWordIdxEntity.deleteComplete();
+ continue;
+ }
+
+ // creating a container used to hold the imported entries
+ plasmaWordIndexEntryContainer newContainer = new plasmaWordIndexEntryContainer(wordHash,importWordIdxEntity.size());
+
+ // the combined container will fit, read the container
+ Iterator importWordIdxEntries = importWordIdxEntity.elements(true);
+ plasmaWordIndexEntry importWordIdxEntry;
+ while (importWordIdxEntries.hasNext()) {
+
+ // testing if import process was aborted
+ if (isAborted()) break;
+
+ // getting next word index entry
+ entryCounter++;
+ importWordIdxEntry = (plasmaWordIndexEntry) importWordIdxEntries.next();
+ String urlHash = importWordIdxEntry.getUrlHash();
+ if ((this.importUrlDB.exists(urlHash)) && (!this.homeUrlDB.exists(urlHash))) {
+ urlCounter++;
+
+ // importing the new url
+ plasmaCrawlLURL.Entry urlEntry = this.importUrlDB.getEntry(urlHash);
+ this.homeUrlDB.newEntry(urlEntry);
+
+ if (urlCounter % 500 == 0) {
+ this.log.logFine(urlCounter + " URLs processed so far.");
+ }
+ }
+
+ // adding word index entity to container
+ newContainer.add(importWordIdxEntry,System.currentTimeMillis());
+
+ if (entryCounter % 500 == 0) {
+ this.log.logFine(entryCounter + " word entries and " + wordCounter + " word entries processed so far.");
+ }
+ }
+
+ // testing if import process was aborted
+ if (isAborted()) break;
+
+ // importing entity container to home db
+ homeWordIndex.addEntries(newContainer, true);
+
+ // delete complete index entity file
+ importWordIdxEntity.close();
+ importWordIndex.deleteIndex(wordHash);
+
+ // print out some statistical information
+ if (wordCounter%500 == 0) {
+ wordChunkEndHash = wordHash;
+ wordChunkEnd = System.currentTimeMillis();
+ long duration = wordChunkEnd - wordChunkStart;
+ log.logInfo(wordCounter + " word entities imported " +
+ "[" + wordChunkStartHash + " .. " + wordChunkEndHash + "] " +
+ this.getProcessingStatus() + "%\n" +
+ "Speed: "+ 500*1000/duration + " word entities/s" +
+ " | Elapsed time: " + serverDate.intervalToString(getElapsedTime()) +
+ " | Estimated time: " + serverDate.intervalToString(getEstimatedTime()) + "\n" +
+ "Home Words = " + homeWordIndex.size() +
+ " | Import Words = " + importWordIndex.size());
+ wordChunkStart = wordChunkEnd;
+ wordChunkStartHash = wordChunkEndHash;
+ }
+
+ } catch (Exception e) {
+ log.logSevere("Import of word entity '" + wordHash + "' failed.",e);
+ } finally {
+ if (importWordIdxEntity != null) try { importWordIdxEntity.close(); } catch (Exception e) {}
+ }
+ }
+
+ this.log.logInfo("Home word index contains " + homeWordIndex.size() + " words and " + homeUrlDB.size() + " URLs.");
+ this.log.logInfo("Import word index contains " + importWordIndex.size() + " words and " + importUrlDB.size() + " URLs.");
+
+ this.log.logInfo("DB-IMPORT FINISHED");
+ } catch (Exception e) {
+ this.log.logSevere("Database import failed.",e);
+ e.printStackTrace();
+ this.error = e.toString();
+ } finally {
+ if (importUrlDB != null) try { importUrlDB.close(); } catch (Exception e){}
+ if (importWordIndex != null) try { importWordIndex.close(5000); } catch (Exception e){}
+ }
+ }
+
+ private boolean isAborted() {
+ return (this.stopped) || Thread.currentThread().isInterrupted();
+ }
+
+}
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 4f2e98159..b31cef964 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -77,19 +77,18 @@ public class plasmaSnippetCache {
private plasmaHTCache cacheManager;
private plasmaParser parser;
private serverLog log;
- private String remoteProxyHost;
- private int remoteProxyPort;
- private boolean remoteProxyUse;
+ private plasmaSwitchboard sb;
- public plasmaSnippetCache(plasmaHTCache cacheManager, plasmaParser parser,
- String remoteProxyHost, int remoteProxyPort, boolean remoteProxyUse,
- serverLog log) {
+ public plasmaSnippetCache(
+ plasmaSwitchboard theSb,
+ plasmaHTCache cacheManager,
+ plasmaParser parser,
+ serverLog log
+ ) {
this.cacheManager = cacheManager;
this.parser = parser;
this.log = log;
- this.remoteProxyHost = remoteProxyHost;
- this.remoteProxyPort = remoteProxyPort;
- this.remoteProxyUse = remoteProxyUse;
+ this.sb = theSb;
this.snippetsScoreCounter = 0;
this.snippetsScore = new kelondroMScoreCluster();
this.snippetsCache = new HashMap();
@@ -367,11 +366,9 @@ public class plasmaSnippetCache {
0,
null,
socketTimeout,
- remoteProxyHost,
- remoteProxyPort,
- remoteProxyUse,
- cacheManager,
- log);
+ this.sb.remoteProxyConfig,
+ this.cacheManager,
+ this.log);
}
public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount) {
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 6eb0099be..ba14cce61 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -121,6 +121,7 @@ import de.anomic.data.wikiBoard;
import de.anomic.data.userDB;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
+import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMSetTools;
@@ -166,9 +167,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public plasmaCrawlStacker sbStackCrawlThread;
public messageBoard messageDB;
public wikiBoard wikiDB;
- public String remoteProxyHost;
- public int remoteProxyPort;
- public boolean remoteProxyUse;
public static plasmaCrawlRobotsTxt robots;
public plasmaCrawlProfile profiles;
public plasmaCrawlProfile.entry defaultProxyProfile;
@@ -182,7 +180,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public yacyCore yc;
public HashMap indexingTasksInProcess;
public userDB userDB;
+
+ /*
+ * Remote Proxy configuration
+ */
+// public boolean remoteProxyUse;
+// public boolean remoteProxyUse4Yacy;
+// public String remoteProxyHost;
+// public int remoteProxyPort;
+// public String remoteProxyNoProxy = "";
+// public String[] remoteProxyNoProxyPatterns = null;
+ public httpRemoteProxyConfig remoteProxyConfig = null;
+
+ /*
+ * Some constants
+ */
private static final String STR_PROXYPROFILE = "defaultProxyProfile";
private static final String STR_REMOTEPROFILE = "defaultRemoteProfile";
private static final String STR_REMOTECRAWLTRIGGER = "REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER ";
@@ -206,25 +219,45 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
this.listsPath = new File(rootPath, getConfig("listsPath", "LISTS"));
this.log.logConfig("Lists Path: " + this.listsPath.toString());
- // remote proxy configuration
- remoteProxyHost = getConfig("remoteProxyHost", "");
- try {
- remoteProxyPort = Integer.parseInt(getConfig("remoteProxyPort", "3128"));
- } catch (NumberFormatException e) {
- remoteProxyPort = 3128;
- }
- if (getConfig("remoteProxyUse", "false").equals("true")) {
- remoteProxyUse = true;
- log.logConfig("Using remote proxy:" +
- "\n\tHost: " + remoteProxyHost +
- "\n\tPort: " + remoteProxyPort);
- } else {
- remoteProxyUse = false;
- remoteProxyHost = null;
- remoteProxyPort = 0;
- }
- proxyLastAccess = System.currentTimeMillis() - 60000;
+ /* ============================================================================
+ * Remote Proxy configuration
+ * ============================================================================ */
+ this.remoteProxyConfig = httpRemoteProxyConfig.init(this);
+ this.log.logConfig("Remote proxy configuration:\n" + this.remoteProxyConfig.toString());
+
+// // reading the proxy host name
+// this.remoteProxyHost = getConfig("remoteProxyHost", "");
+//
+// // reading the proxy host port
+// try {
+// this.remoteProxyPort = Integer.parseInt(getConfig("remoteProxyPort", "3128"));
+// } catch (NumberFormatException e) {
+// this.remoteProxyPort = 3128;
+// }
+//
+// // determining if remote proxy should be used for yacy -> yacy communication
+// this.remoteProxyUse4Yacy = getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true");
+//
+// // determining addresses for which the remote proxy should not be used
+// this.remoteProxyNoProxy = getConfig("remoteProxyNoProxy","");
+// this.remoteProxyNoProxyPatterns = this.remoteProxyNoProxy.split(",");
+//
+// // determining if remote Proxy should be used
+// if (getConfig("remoteProxyUse", "false").equalsIgnoreCase("true")) {
+// this.remoteProxyUse = true;
+// this.log.logConfig("Using remote proxy:" +
+// "\n\tHost: " + this.remoteProxyHost +
+// "\n\tPort: " + this.remoteProxyPort +
+// "\n\tUseProxy4Yacy: " + Boolean.toString(this.remoteProxyUse4Yacy)
+// );
+// } else {
+// this.remoteProxyUse = false;
+// this.remoteProxyHost = null;
+// this.remoteProxyPort = 0;
+// }
+ this.proxyLastAccess = System.currentTimeMillis() - 60000;
+ // configuring list path
if (!(listsPath.exists())) listsPath.mkdirs();
// load coloured lists
@@ -420,9 +453,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// generate snippets cache
log.logConfig("Initializing Snippet Cache");
- snippetCache = new plasmaSnippetCache(cacheManager, parser,
- remoteProxyHost, remoteProxyPort, remoteProxyUse,
- log);
+ snippetCache = new plasmaSnippetCache(this,cacheManager, parser,log);
// start yacy core
log.logConfig("Starting YaCy Protocol Core");
diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java
index e13fb8d94..0042cdd96 100644
--- a/source/de/anomic/tools/loaderThreads.java
+++ b/source/de/anomic/tools/loaderThreads.java
@@ -45,6 +45,7 @@ import java.net.URL;
import java.util.ArrayList;
import java.util.Hashtable;
+import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
public class loaderThreads {
@@ -53,28 +54,30 @@ public class loaderThreads {
private int timeout;
private String user;
private String password;
- private String remoteProxyHost;
- private int remoteProxyPort;
+ private httpRemoteProxyConfig remoteProxyConfig;
// management objects for collection of threads
Hashtable threads;
int completed, failed;
public loaderThreads() {
- this(null, 0);
+ this(null);
}
- public loaderThreads(String remoteProxyHost, int remoteProxyPort) {
- this(10000, null, null, remoteProxyHost, remoteProxyPort);
+ public loaderThreads(httpRemoteProxyConfig theremoteProxyConfig) {
+ this(10000, null, null, theremoteProxyConfig);
}
- public loaderThreads(int timeout, String user, String password,
- String remoteProxyHost, int remoteProxyPort) {
+ public loaderThreads(
+ int timeout,
+ String user,
+ String password,
+ httpRemoteProxyConfig theremoteProxyConfig
+ ) {
this.timeout = timeout;
this.user = user;
this.password = password;
- this.remoteProxyHost = remoteProxyHost;
- this.remoteProxyPort = remoteProxyPort;
+ this.remoteProxyConfig = theremoteProxyConfig;
this.threads = new Hashtable();
this.completed = 0;
this.failed = 0;
@@ -145,7 +148,7 @@ public class loaderThreads {
public void run() {
try {
- page = httpc.wget(url, timeout, user, password, remoteProxyHost, remoteProxyPort);
+ page = httpc.wget(url, timeout, user, password, remoteProxyConfig);
loaded = true;
process.feed(page);
if (process.status() == loaderCore.STATUS_FAILED) {
@@ -227,7 +230,8 @@ public class loaderThreads {
}
public static void main(String[] args) {
- loaderThreads loader = new loaderThreads("192.168.1.122", 3128);
+ httpRemoteProxyConfig proxyConfig = httpRemoteProxyConfig.init("192.168.1.122", 3128);
+ loaderThreads loader = new loaderThreads(proxyConfig);
try {
loader.newPropLoaderThread("load1", new URL("http://www.anomic.de/superseed.txt"));
} catch (MalformedURLException e) {
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index bf7e03e4c..a9cf29522 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -99,20 +99,32 @@ public final class yacyClient {
10000, null, null, yacyCore.seedCache.sb.remoteProxyHost, yacyCore.seedCache.sb.remoteProxyPort));
*/
+ // building URL
final URL url = new URL("http://" + address + "/yacy/hello.html");
+
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
+ // adding all needed parameters
final serverObjects obj = new serverObjects(6);
- obj.put("iam", yacyCore.seedDB.mySeed.hash);
- obj.put("pattern", "");
- obj.put("count", "20");
- obj.put("key", key);
- obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
- obj.put("myUTC", System.currentTimeMillis());
- obj.put(yacySeed.SEED, yacyCore.seedDB.mySeed.genSeedStr(key));
- result = nxTools.table(httpc.wput(url,
- 105000, null, null,
- yacyCore.seedDB.sb.remoteProxyHost,
- yacyCore.seedDB.sb.remoteProxyPort,
- obj));
+ obj.put("iam", yacyCore.seedDB.mySeed.hash);
+ obj.put("pattern", "");
+ obj.put("count", "20");
+ obj.put("key", key);
+ obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
+ obj.put("myUTC", System.currentTimeMillis());
+ obj.put(yacySeed.SEED, yacyCore.seedDB.mySeed.genSeedStr(key));
+
+ // sending request
+ result = nxTools.table(
+ httpc.wput(url,
+ 105000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
+ obj
+ )
+ );
} catch (Exception e) {
if (Thread.currentThread().isInterrupted()) {
yacyCore.log.logFine("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' interrupted.");
@@ -215,12 +227,27 @@ public final class yacyClient {
public static yacySeed querySeed(yacySeed target, String seedHash) {
final String key = crypt.randomSalt();
try {
- final HashMap result = nxTools.table(httpc.wget(
- new URL("http://" + target.getAddress() +
- "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
- "&youare=" + target.hash + "&key=" + key +
- "&object=seed&env=" + seedHash),
- 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
+ // sending request
+ final HashMap result = nxTools.table(
+ httpc.wget(
+ new URL("http://" + target.getAddress() +
+ "/yacy/query.html" +
+ "?iam=" + yacyCore.seedDB.mySeed.hash +
+ "&youare=" + target.hash +
+ "&key=" + key +
+ "&object=seed" +
+ "&env=" + seedHash
+ ),
+ 10000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
+ )
+ );
+
if (result == null || result.size() == 0) { return null; }
//final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time
return yacySeed.genRemoteSeed((String) result.get("response"), key);
@@ -232,13 +259,28 @@ public final class yacyClient {
public static int queryRWICount(yacySeed target, String wordHash) {
try {
- final HashMap result = nxTools.table(httpc.wget(
- new URL("http://" + target.getAddress() +
- "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
- "&youare=" + target.hash + "&key=" +
- "&object=rwicount&env=" + wordHash +
- "&ttl=0"),
- 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
+ // sending request
+ final HashMap result = nxTools.table(
+ httpc.wget(
+ new URL("http://" + target.getAddress() +
+ "/yacy/query.html" +
+ "?iam=" + yacyCore.seedDB.mySeed.hash +
+ "&youare=" + target.hash +
+ "&key=" +
+ "&object=rwicount" +
+ "&env=" + wordHash +
+ "&ttl=0"
+ ),
+ 10000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
+ )
+ );
+
if (result == null || result.size() == 0) { return -1; }
return Integer.parseInt((String) result.get("response"));
} catch (Exception e) {
@@ -247,19 +289,36 @@ public final class yacyClient {
}
}
- public static int queryUrlCount(yacySeed target) {
+ public static int queryUrlCount(yacySeed target) {
if (target == null) { return -1; }
if (yacyCore.seedDB.mySeed == null) return -1;
+
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
+ // building url
final String querystr =
- "http://" + target.getAddress() +
- "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
- "&youare=" + target.hash +
- "&key=" +
- "&object=lurlcount&env=&ttl=0";
+ "http://" + target.getAddress() +
+ "/yacy/query.html" +
+ "?iam=" + yacyCore.seedDB.mySeed.hash +
+ "&youare=" + target.hash +
+ "&key=" +
+ "&object=lurlcount" +
+ "&env=" +
+ "&ttl=0";
+
+ // seinding request
try {
- final HashMap result = nxTools.table(httpc.wget(
- new URL(querystr), 6000, null, null,
- yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
+ final HashMap result = nxTools.table(
+ httpc.wget(
+ new URL(querystr),
+ 6000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
+ )
+ );
+
// yacyCore.log("DEBUG QUERY: query=" + querystr + "; result = " + result.toString());
if ((result == null) || (result.size() == 0)) return -1;
final String resp = (String) result.get("response");
@@ -290,8 +349,14 @@ public final class yacyClient {
// request result
final String key = crypt.randomSalt();
- try {
+ try {
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
+ // building url
final String url = "http://" + targetPeer.getAddress() + "/yacy/search.html";
+
+ // adding all needed parameters
/*
String url = "http://" + targetPeer.getAddress() +
"/yacy/search.html?myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) +
@@ -301,22 +366,30 @@ public final class yacyClient {
"&query=" + wordhashes;
*/
final serverObjects obj = new serverObjects(9);
- obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
- obj.put("youare", targetPeer.hash);
- obj.put("key", key);
- obj.put("count", count);
- obj.put("resource", ((global) ? "global" : "local"));
- obj.put("query", wordhashes);
- obj.put("ttl", "0");
- obj.put("duetime", Long.toString(duetime));
- obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
+ obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
+ obj.put("youare", targetPeer.hash);
+ obj.put("key", key);
+ obj.put("count", count);
+ obj.put("resource", ((global) ? "global" : "local"));
+ obj.put("query", wordhashes);
+ obj.put("ttl", "0");
+ obj.put("duetime", Long.toString(duetime));
+ obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
+
//yacyCore.log.logDebug("yacyClient.search url=" + url);
final long timestamp = System.currentTimeMillis();
- final HashMap result = nxTools.table(httpc.wput(new URL(url),
- 300000, null, null,
- yacyCore.seedDB.sb.remoteProxyHost,
- yacyCore.seedDB.sb.remoteProxyPort,
- obj));
+
+ // sending request
+ final HashMap result = nxTools.table(
+ httpc.wput(
+ new URL(url),
+ 300000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
+ obj
+ )
+ );
final long totalrequesttime = System.currentTimeMillis() - timestamp;
/*
@@ -394,13 +467,20 @@ public final class yacyClient {
// ask for allowed message size and attachement size
// if this replies null, the peer does not answer
if (yacyCore.seedDB == null || yacyCore.seedDB.mySeed == null) { return null; }
- final serverObjects post = new serverObjects(5);
+
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
+ // adding all needed parameters
final String key = crypt.randomSalt();
- post.put("key", key);
- post.put("process", "permission");
- post.put("iam", yacyCore.seedDB.mySeed.hash);
- post.put("youare", targetHash);
- post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
+ final serverObjects post = new serverObjects(5);
+ post.put("key", key);
+ post.put("process", "permission");
+ post.put("iam", yacyCore.seedDB.mySeed.hash);
+ post.put("youare", targetHash);
+ post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
+
+ // getting target address
String address;
if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) {
address = yacyCore.seedDB.mySeed.getAddress();
@@ -412,10 +492,19 @@ public final class yacyClient {
//System.out.println("remote address: " + address);
}
if (address == null) { address = "localhost:8080"; }
+
+ // sending request
try {
- return nxTools.table(httpc.wput(
- new URL("http://" + address + "/yacy/message.html"),
- 8000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post));
+ return nxTools.table(
+ httpc.wput(
+ new URL("http://" + address + "/yacy/message.html"),
+ 8000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
+ post
+ )
+ );
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logSevere("yacyClient.permissionMessage error:" + e.getMessage());
@@ -425,15 +514,22 @@ public final class yacyClient {
public static HashMap postMessage(String targetHash, String subject, byte[] message) {
// this post a message to the remote message board
- final serverObjects post = new serverObjects(7);
+
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
+ // adding all needed parameters
final String key = crypt.randomSalt();
- post.put("key", key);
- post.put("process", "post");
- post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
- post.put("youare", targetHash);
- post.put("subject", subject);
- post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
- post.put("message", new String(message));
+ final serverObjects post = new serverObjects(7);
+ post.put("key", key);
+ post.put("process", "post");
+ post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
+ post.put("youare", targetHash);
+ post.put("subject", subject);
+ post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
+ post.put("message", new String(message));
+
+ // getting target address
String address;
if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) {
address = yacyCore.seedDB.mySeed.getAddress();
@@ -441,11 +537,18 @@ public final class yacyClient {
address = yacyCore.seedDB.getConnected(targetHash).getAddress();
}
if (address == null) { address = "localhost:8080"; }
- //System.out.println("DEBUG POST " + address + "/yacy/message.html" + post.toString());
+
+ // sending request
try {
- final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/message.html"), 20000, null, null,
- yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
- //System.out.println("V=" + v.toString());
+ final ArrayList v = httpc.wput(
+ new URL("http://" + address + "/yacy/message.html"),
+ 20000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
+ post
+ );
+
return nxTools.table(v);
} catch (Exception e) {
yacyCore.log.logSevere("yacyClient.postMessage error:" + e.getMessage());
@@ -459,9 +562,12 @@ public final class yacyClient {
if (yacyCore.seedDB.mySeed == null) { return null; }
if (yacyCore.seedDB.mySeed == targetSeed) { return null; }
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
// construct request
- final serverObjects post = new serverObjects(9);
final String key = crypt.randomSalt();
+ final serverObjects post = new serverObjects(9);
post.put("key", key);
post.put("process", "crawl");
post.put("iam", yacyCore.seedDB.mySeed.hash);
@@ -472,12 +578,22 @@ public final class yacyClient {
post.put("depth", "0");
post.put("ttl", "0");
+ // determining target address
final String address = targetSeed.getAddress();
if (address == null) { return null; }
+
+ // sending request
try {
- return nxTools.table(httpc.wput(
- new URL("http://" + address + "/yacy/crawlOrder.html"),
- 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post));
+ return nxTools.table(
+ httpc.wput(
+ new URL("http://" + address + "/yacy/crawlOrder.html"),
+ 10000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
+ post
+ )
+ );
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logSevere("yacyClient.crawlOrder error: peer=" + targetSeed.getName() + ", error=" + e.getMessage());
@@ -516,26 +632,38 @@ public final class yacyClient {
stale - the resource was reloaded but not processed because source had no changes
*/
+
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// construct request
final String key = crypt.randomSalt();
+ // determining target address
String address = targetSeed.getAddress();
if (address == null) { return null; }
+
+ // sending request
try {
- return nxTools.table(httpc.wget(
- new URL("http://" + address + "/yacy/crawlReceipt.html?" +
- "iam=" + yacyCore.seedDB.mySeed.hash +
- "&youare=" + targetSeed.hash +
- "&process=" + process +
- "&key=" + key +
- "&urlhash=" + ((entry == null) ? "" : entry.hash()) +
- "&result=" + result +
- "&reason=" + reason +
- "&wordh=" + wordhashes +
- "&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key))
- ),
- 60000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
+ return nxTools.table(
+ httpc.wget(
+ new URL("http://" + address + "/yacy/crawlReceipt.html" +
+ "?iam=" + yacyCore.seedDB.mySeed.hash +
+ "&youare=" + targetSeed.hash +
+ "&process=" + process +
+ "&key=" + key +
+ "&urlhash=" + ((entry == null) ? "" : entry.hash()) +
+ "&result=" + result +
+ "&reason=" + reason +
+ "&wordh=" + wordhashes +
+ "&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key))
+ ),
+ 60000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
+ )
+ );
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logSevere("yacyClient.crawlReceipt error:" + e.getMessage());
@@ -549,24 +677,28 @@ public final class yacyClient {
*/
public static String transferIndex(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, HashMap urlCache, boolean gzipBody, int timeout) {
+
HashMap in = transferRWI(targetSeed, indexes, gzipBody, timeout);
if (in == null) { return "no_connection_1"; }
String result = (String) in.get("result");
if (result == null) { return "no_result_1"; }
if (!(result.equals("ok"))) return result;
+
// in now contains a list of unknown hashes
final String uhss = (String) in.get("unknownURL");
if (uhss == null) { return "no_unknownURL_tag_in_response"; }
if (uhss.length() == 0) { return null; } // all url's known, we are ready here
+
final String[] uhs = uhss.split(",");
-// System.out.println("DEBUG yacyClient.transferIndex: " + uhs.length + " urls unknown");
if (uhs.length == 0) { return null; } // all url's known
+
// extract the urlCache from the result
plasmaCrawlLURL.Entry[] urls = new plasmaCrawlLURL.Entry[uhs.length];
for (int i = 0; i < uhs.length; i++) {
urls[i] = (plasmaCrawlLURL.Entry) urlCache.get(uhs[i]);
if (urls[i] == null) System.out.println("DEBUG transferIndex: error with requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'");
}
+
in = transferURL(targetSeed, urls, gzipBody, timeout);
if (in == null) { return "no_connection_2"; }
result = (String) in.get("result");
@@ -574,12 +706,17 @@ public final class yacyClient {
if (!(result.equals("ok"))) { return result; }
// int doubleentries = Integer.parseInt((String) in.get("double"));
// System.out.println("DEBUG tansferIndex: transferred " + uhs.length + " URL's, double=" + doubleentries);
+
return null;
}
private static HashMap transferRWI(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, boolean gzipBody, int timeout) {
final String address = targetSeed.getAddress();
if (address == null) { return null; }
+
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
// prepare post values
final serverObjects post = new serverObjects(7);
final String key = crypt.randomSalt();
@@ -592,6 +729,8 @@ public final class yacyClient {
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetSeed.hash);
post.put("wordc", Integer.toString(indexes.length));
+
+
int indexcount = 0;
final StringBuffer entrypost = new StringBuffer(indexes.length*73);
Iterator eenum;
@@ -618,8 +757,14 @@ public final class yacyClient {
post.put("entryc", Integer.toString(indexcount));
post.put("indexes", entrypost.toString());
try {
- final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/transferRWI.html"), timeout, null, null,
- yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
+ final ArrayList v = httpc.wput(
+ new URL("http://" + address + "/yacy/transferRWI.html"),
+ timeout,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
+ post
+ );
// this should return a list of urlhashes that are unknwon
if (v != null) {
yacyCore.seedDB.mySeed.incSI(indexcount);
@@ -637,6 +782,10 @@ public final class yacyClient {
// this post a message to the remote message board
final String address = targetSeed.getAddress();
if (address == null) { return null; }
+
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
// prepare post values
final serverObjects post = new serverObjects(5+urls.length);
final String key = crypt.randomSalt();
@@ -662,8 +811,15 @@ public final class yacyClient {
}
post.put("urlc", Integer.toString(urlc));
try {
- final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/transferURL.html"), timeout, null, null,
- yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
+ final ArrayList v = httpc.wput(
+ new URL("http://" + address + "/yacy/transferURL.html"),
+ timeout,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
+ post
+ );
+
if (v != null) {
yacyCore.seedDB.mySeed.incSU(urlc);
}
@@ -675,6 +831,10 @@ public final class yacyClient {
}
public static HashMap getProfile(yacySeed targetSeed) {
+
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
// this post a message to the remote message board
final serverObjects post = new serverObjects(2);
post.put("iam", yacyCore.seedDB.mySeed.hash);
@@ -682,8 +842,15 @@ public final class yacyClient {
String address = targetSeed.getAddress();
if (address == null) { address = "localhost:8080"; }
try {
- final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/profile.html"), 20000, null, null,
- yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
+ final ArrayList v = httpc.wput(
+ new URL("http://" + address + "/yacy/profile.html"),
+ 20000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
+ post
+ );
+
return nxTools.table(v);
} catch (Exception e) {
yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage());
@@ -701,14 +868,24 @@ public final class yacyClient {
final String wordhashe = plasmaWordIndexEntry.word2hash("test");
//System.out.println("permission=" + permissionMessage(args[1]));
- final HashMap result = nxTools.table(httpc.wget(
- new URL("http://" + target.getAddress() +
- "/yacy/search.html?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
- "&youare=" + target.hash + "&key=" +
- "&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
- "&count=10&resource=global" +
- "&query=" + wordhashe),
- 5000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
+ // should we use the proxy?
+ boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
+
+ final HashMap result = nxTools.table(
+ httpc.wget(
+ new URL("http://" + target.getAddress() + "/yacy/search.html" +
+ "?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
+ "&youare=" + target.hash + "&key=" +
+ "&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
+ "&count=10" +
+ "&resource=global" +
+ "&query=" + wordhashe),
+ 5000,
+ null,
+ null,
+ (useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
+ )
+ );
System.out.println("Result=" + result.toString());
} catch (Exception e) {
e.printStackTrace();
diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java
index b9bad111e..541eef6f6 100644
--- a/source/de/anomic/yacy/yacyPeerActions.java
+++ b/source/de/anomic/yacy/yacyPeerActions.java
@@ -161,14 +161,14 @@ public class yacyPeerActions {
// load the seed list
try {
url = new URL(seedListFileURL);
- header = httpc.whead(url, 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort);
+ header = httpc.whead(url, 5000, null, null, this.sb.remoteProxyConfig);
if ((header == null) || (header.lastModified() == null)) {
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available");
} else if ((header.age() > 86400000) && (ssc > 0)) {
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
} else {
ssc++;
- seedList = httpc.wget(url, 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort);
+ seedList = httpc.wget(url, 5000, null, null, this.sb.remoteProxyConfig);
enu = seedList.iterator();
lc = 0;
while (enu.hasNext()) {
@@ -221,7 +221,7 @@ public class yacyPeerActions {
// read in remote file from url
try {
- ArrayList remote = httpc.wget(new URL(url), 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort);
+ ArrayList remote = httpc.wget(new URL(url), 5000, null, null, this.sb.remoteProxyConfig);
if ((remote != null) && (remote.size() > 0)) {
Iterator e = remote.iterator();
while (e.hasNext()) {
diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java
index f54b31796..06e964f57 100644
--- a/source/de/anomic/yacy/yacySeedDB.java
+++ b/source/de/anomic/yacy/yacySeedDB.java
@@ -708,7 +708,14 @@ public final class yacySeedDB {
private boolean checkCache(ArrayList uv, URL seedURL) throws IOException {
// check if the result can be retrieved again
- ArrayList check = httpc.wget(seedURL, 10000, null, null, sb.remoteProxyHost, sb.remoteProxyPort);
+ // TODO: should we check the useProxy4Yacy option here???
+ ArrayList check = httpc.wget(
+ seedURL,
+ 10000,
+ null,
+ null,
+ sb.remoteProxyConfig
+ );
if (check == null) {
serverLog.logFine("YACY","SaveSeedList: Testing download failed ...");
diff --git a/source/yacy.java b/source/yacy.java
index 3cd8fe9eb..de7cfb3dd 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -456,7 +456,7 @@ public final class yacy {
server.terminate(false);
server.interrupt();
if (server.isAlive()) try {
- httpc.wget(new URL("http://localhost:" + port), 1000, null, null, null, 0); // kick server
+ httpc.wget(new URL("http://localhost:" + port), 1000, null, null, null); // kick server
serverLog.logConfig("SHUTDOWN", "sent termination signal to server socket");
} catch (IOException ee) {
serverLog.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)");
diff --git a/yacy.init b/yacy.init
index 877e08f93..961a707be 100644
--- a/yacy.init
+++ b/yacy.init
@@ -142,11 +142,16 @@ plasmaBlueList=yacy.blue
# if you wish to do that, specify it here
# if you want to switch on the proxy use, set remoteProxyUse=true
# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy
+remoteProxyUse=false
+remoteProxyUse4Yacy=true
+remoteProxyUse4SSL=true
+
remoteProxyHost=192.168.2.2
remoteProxyPort=4239
+remoteProxyUser=
+remoteProxyPwd=
+
remoteProxyNoProxy=192.*,10.*,127.*,localhost
-remoteProxyUse=false
-#remoteProxyUse=true
# the proxy may filter the content of transferred web pages
# the bluelist removes specific keywords from web pages