*) Restructuring and extending of Remote Proxy Support

- remote proxy configuration can now be "really" changed on the fly and takes effect immediately
   - adding possibility to disable remote proxy usage for yacy->yacy communication
   - adding possibility to disable remote proxy usage for ssl
   - restructuring proxy configuration so that it is stored in a single place now

*) Adding possibility to import a foreign word DB (or even more of them in parallel) 
   at runtime into the peers DB
   - this can be done by calling IndexImport_p.html 
   - ATTENTION: please not that at the moment this thread must be aborted via gui
     before a normal server shutdown is done. 
   - TODO: integrating IndexImport Thread into normal server shutdown
   - TODO: Adding posibility to import crawl-queues, etc. from foreign peers
   - TODO: removing old import function from yacy.java and calling the new routines instead

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@968 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 222607ef0f
commit 02d9af1a70

@ -0,0 +1,105 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head>
<title>YaCy '#[clientname]#': Index Import</title>
#[metas]#
<meta http-equiv="REFRESH" content="30">
</head>
<body marginheight="0" marginwidth="0" leftmargin="0" topmargin="0">
#[header]#
<br><br>
<h2>Index DB Import</h2>
<p>The local index currenly consists of (at least) #[wcount]# reverse word indexes and #[ucount]# URL references</p>
<hr>
#(error)#<!-- 0 -->
::<!-- 1 -->
<p><font color="red"><b>#[error_msg]#</b></font></p>
::<!-- 2 -->
<p><font color="red"><b>Import Job with the same path already started</b></font></p>
#(/error)#
<h3>Starting new Job</h3>
<form action="IndexImport_p.html" method="post" enctype="multipart/form-data">
<table border="0" cellpadding="2" cellspacing="1">
<tr>
<td title="Path to the PLASMADB directory of the foreign peer">Import Path:</td>
<td><input name="importPath" type="text" size="50" value=""></td>
<td><input type="submit" name="startIndexDbImport" value="Start New Import"></td>
</tr>
</table>
</form>
<hr>
<form action="IndexImport_p.html" method="post" enctype="multipart/form-data">
<h3>Currently running jobs</h3>
<p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td class="small" width="150">Path</td>
<td class="small" >Status</td>
<td class="small" >%</td>
<td class="small" >Elapsed<br>Time</td>
<td class="small" >Estimated<br>Time</td>
<td class="small" >Word Hash</td>
<td class="small" ># URLs</td>
<td class="small" ># Word<br>Entities</td>
<td class="small" ># Word<br>Entries</td>
<td class="small" >Stop Import</td>
</tr>
#{running.jobs}#
<tr class="TableCellLight">
<td class="small">#[path]#</td>
<td class="small"><font color="#(stopped)#red::green#(/stopped)#">#(stopped)#Finished::Running#(/stopped)#</font></td>
<td class="small" align="right">#[percent]#</td>
<td class="small" align="right">#[elapsed]#</td>
<td class="small" align="right">#[estimated]#</td>
<td class="small" align="right"><tt>#[wordHash]#</tt></td>
<td class="small" align="rigth">#[url_num]#</td>
<td class="small" align="rigth">#[word_entity_num]#</td>
<td class="small" align="rigth">#[word_entry_num]#</td>
<td class="small">
#(stopped)#::
<input type="submit" name="stopIndexDbImport" value="Stop Index Transfer">
<input type="hidden" name="jobNr" value="#[job_nr]#">
#(/stopped)#
</td>
</tr>
#{/running.jobs}#
</table>
</form>
<hr>
<form action="IndexImport_p.html" method="post" enctype="multipart/form-data">
<h3>Finished jobs</h3>
<p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td class="small" width="150">Path</td>
<td class="small" >Status</td>
<td class="small" >%</td>
<td class="small" >Elapsed<br>Time</td>
<td class="small" >Word Hash</td>
<td class="small" ># URLs</td>
<td class="small" ># Word<br>Entities</td>
<td class="small" ># Word<br>Entries</td>
</tr>
#{finished.jobs}#
<tr class="TableCellLight">
<td class="small">#[path]#</td>
<td class="small"><font color="#(stopped)#red::green::red#(/stopped)#">#(stopped)#Finished::<b>Error:</b> #[errorMsg]##(/stopped)#</font></td>
<td class="small" align="right">#[percent]#</td>
<td class="small" align="right">#[elapsed]#</td>
<td class="small" align="right"><tt>#[wordHash]#</tt></td>
<td class="small" align="rigth">#[url_num]#</td>
<td class="small" align="rigth">#[word_entity_num]#</td>
<td class="small" align="rigth">#[word_entry_num]#</td>
</tr>
#{/finished.jobs}#
</table>
<input type="submit" name="clearFinishedJobList" value="Clear List">
</form>
<p><font size="-3"><i>Last Refresh:</i> #[date]#</font></p>
#[footer]#
</body>
</html>

@ -0,0 +1,196 @@
//IndexTransfer_p.java
//-----------------------
//part of the AnomicHTTPD caching proxy
//(C) by Michael Peter Christen; mc@anomic.de
//first published on http://www.anomic.de
//Frankfurt, Germany, 2005
//
//This file is contributed by Martin Thelian
//
// $LastChangedDate: 2005-10-17 17:46:12 +0200 (Mo, 17 Okt 2005) $
// $LastChangedRevision: 947 $
// $LastChangedBy: borg-0300 $
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//Using this software in any meaning (reading, learning, copying, compiling,
//running) means that you agree that the Author(s) is (are) not responsible
//for cost, loss of data or any harm that may be caused directly or indirectly
//by usage of this softare or this documentation. The usage of this software
//is on your own risk. The installation and usage (starting/running) of this
//software may allow other people or application to access your computer and
//any attached devices and is highly dependent on the configuration of the
//software which must be done by the user of the software; the author(s) is
//(are) also not responsible for proper configuration and usage of the
//software, even if provoked by documentation provided together with
//the software.
//
//Any changes to this file according to the GPL as documented in the file
//gpl.txt aside this file in the shipment you received can be done to the
//lines that follows this copyright notice here, but changes must not be
//done inside the copyright notive above. A re-distribution must contain
//the intact and unchanged copyright notice.
//Contributions and changes to the program code must be marked as such.
//You must compile this file with
//javac -classpath .:../Classes IndexControl_p.java
//if the shell's current path is HTROOT
import java.io.File;
import java.util.Date;
import java.util.Vector;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaDbImporter;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDate;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public final class IndexImport_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
serverObjects prop = new serverObjects();
int activeCount = 0;
if (post != null) {
if (post.containsKey("startIndexDbImport")) {
try {
// getting the import path
String importPath = (String) post.get("importPath");
boolean startImport = true;
// check if there is an already running thread with the same import path
Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2];
activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads);
for (int i=0; i < activeCount; i++) {
plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i];
if (currThread.getImportRoot().equals(new File(importPath))) {
prop.put("error",2);
startImport = false;
}
}
if (startImport) {
plasmaDbImporter newImporter = new plasmaDbImporter(switchboard.wordIndex,switchboard.urlPool.loadedURL,importPath);
newImporter.start();
prop.put("LOCATION","");
return prop;
}
} catch (Exception e) {
prop.put("error",1);
prop.put("error_error_msg",e.toString());
}
} else if (post.containsKey("clearFinishedJobList")) {
plasmaDbImporter.finishedJobs.clear();
prop.put("LOCATION","");
return prop;
} else if (post.containsKey("stopIndexDbImport")) {
// getting the job nr of the thread that should be stopped
String jobNr = (String) post.get("jobNr");
Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2];
activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads);
for (int i=0; i < activeCount; i++) {
plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i];
if (currThread.getJobNr() == Integer.valueOf(jobNr).intValue()) {
currThread.stoppIt();
try {
currThread.join();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
break;
}
}
prop.put("LOCATION","");
return prop;
}
}
prop.put("wcount", Integer.toString(switchboard.wordIndex.size()));
prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size()));
/*
* Loop over all currently running jobs
*/
Thread[] importThreads = new Thread[plasmaDbImporter.runningJobs.activeCount()*2];
activeCount = plasmaDbImporter.runningJobs.enumerate(importThreads);
for (int i=0; i < activeCount; i++) {
plasmaDbImporter currThread = (plasmaDbImporter) importThreads[i];
File importPath = currThread.getImportRoot();
String currWordHash = currThread.getCurrentWordhash();
long currWordEntryCount = currThread.getWordEntryCounter();
long currWordEntityCounter = currThread.getWordEntityCounter();
long currUrlCounter = currThread.getUrlCounter();
long currImportDbSize = currThread.getImportWordDbSize();
long estimatedTime = currThread.getEstimatedTime();
long elapsedTime = currThread.getElapsedTime();
int jobNr = currThread.getJobNr();
int percent = currThread.getProcessingStatus();
boolean isRunning = currThread.isAlive();
prop.put("running.jobs_" + i + "_path", importPath.toString());
prop.put("running.jobs_" + i + "_stopped", isRunning ? 1:0);
prop.put("running.jobs_" + i + "_percent", Integer.toString(percent));
prop.put("running.jobs_" + i + "_elapsed", serverDate.intervalToString(elapsedTime));
prop.put("running.jobs_" + i + "_estimated", serverDate.intervalToString(estimatedTime));
prop.put("running.jobs_" + i + "_wordHash", currWordHash);
prop.put("running.jobs_" + i + "_url_num", Long.toString(currUrlCounter));
prop.put("running.jobs_" + i + "_word_entity_num", Long.toString(currWordEntityCounter));
prop.put("running.jobs_" + i + "_word_entry_num", Long.toString(currWordEntryCount));
prop.put("running.jobs_" + i + "_stopped_job_nr", Integer.toString(jobNr));
}
prop.put("running.jobs",activeCount);
/*
* Loop over all finished jobs
*/
Vector finishedJobs = (Vector) plasmaDbImporter.finishedJobs.clone();
for (int i=0; i<finishedJobs.size(); i++) {
plasmaDbImporter currThread = (plasmaDbImporter) finishedJobs.get(i);
String error = currThread.getError();
prop.put("finished.jobs_" + i + "_path", currThread.getImportRoot().toString());
if (error != null) {
prop.put("finished.jobs_" + i + "_stopped", 2);
prop.put("finished.jobs_" + i + "_stopped_errorMsg", error);
} else {
prop.put("finished.jobs_" + i + "_stopped", 0);
}
prop.put("finished.jobs_" + i + "_percent", Integer.toString(currThread.getProcessingStatus()));
prop.put("finished.jobs_" + i + "_elapsed", serverDate.intervalToString(currThread.getElapsedTime()));
prop.put("finished.jobs_" + i + "_wordHash", currThread.getCurrentWordhash());
prop.put("finished.jobs_" + i + "_url_num", Long.toString(currThread.getUrlCounter()));
prop.put("finished.jobs_" + i + "_word_entity_num", Long.toString(currThread.getWordEntityCounter()));
prop.put("finished.jobs_" + i + "_word_entry_num", Long.toString(currThread.getWordEntryCounter()));
}
prop.put("finished.jobs",finishedJobs.size());
prop.put("date",(new Date()).toString());
return prop;
}
}

@ -133,7 +133,7 @@ public class Language_p {
String url = (String)post.get("url");
ArrayList langVector;
try{
langVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort);
langVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyConfig);
}catch(IOException e){
prop.put("status", 1);//unable to get url
prop.put("status_url", url);

@ -177,8 +177,8 @@ public class Network {
}
final HashMap map = new HashMap();
map.put(yacySeed.IP,(String) post.get("peerIP"));
map.put(yacySeed.PORT,(String) post.get("peerPort"));
map.put(yacySeed.IP,post.get("peerIP"));
map.put(yacySeed.PORT,post.get("peerPort"));
yacySeed peer = new yacySeed((String) post.get("peerHash"),map);
yacyCore.peerActions.updateMySeed();

@ -59,6 +59,7 @@ import java.util.List;
import org.apache.commons.pool.impl.GenericObjectPool;
import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpd;
import de.anomic.http.httpdProxyHandler;
import de.anomic.plasma.plasmaSwitchboard;
@ -343,19 +344,62 @@ public class SettingsAck_p {
}
if (post.containsKey("proxysettings")) {
httpdProxyHandler.remoteProxyUse = ((String) post.get("remoteProxyUse", "")).equals("on");
httpdProxyHandler.remoteProxyHost = (String) post.get("remoteProxyHost", "");
/* ====================================================================
* Reading out the remote proxy settings
* ==================================================================== */
boolean useRemoteProxy = post.containsKey("remoteProxyUse");
boolean useRemoteProxy4Yacy = post.containsKey("remoteProxyUse4Yacy");
boolean useRemoteProxy4SSL = post.containsKey("remoteProxyUse4SSL");
String remoteProxyHost = post.get("remoteProxyHost", "");
String remoteProxyPortStr = post.get("remoteProxyPort", "");
int remoteProxyPort = 0;
try {
httpdProxyHandler.remoteProxyPort = Integer.parseInt((String) post.get("remoteProxyPort", ""));
remoteProxyPort = Integer.parseInt(remoteProxyPortStr);
} catch (NumberFormatException e) {
httpdProxyHandler.remoteProxyPort = 3128;
remoteProxyPort = 3128;
}
httpdProxyHandler.remoteProxyNoProxy = (String) post.get("remoteProxyNoProxy", "");
httpdProxyHandler.remoteProxyNoProxyPatterns = httpdProxyHandler.remoteProxyNoProxy.split(",");
env.setConfig("remoteProxyHost", httpdProxyHandler.remoteProxyHost);
env.setConfig("remoteProxyPort", Integer.toString(httpdProxyHandler.remoteProxyPort));
env.setConfig("remoteProxyNoProxy", httpdProxyHandler.remoteProxyNoProxy);
env.setConfig("remoteProxyUse", (httpdProxyHandler.remoteProxyUse) ? "true" : "false");
String remoteProxyUser = post.get("remoteProxyUser", "");
String remoteProxyPwd = post.get("remoteProxyPwd", "");
String remoteProxyNoProxyStr = post.get("remoteProxyNoProxy", "");
String[] remoteProxyNoProxyPatterns = remoteProxyNoProxyStr.split(",");
/* ====================================================================
* Storing settings into config file
* ==================================================================== */
env.setConfig("remoteProxyHost", remoteProxyHost);
env.setConfig("remoteProxyPort", Integer.toString(remoteProxyPort));
env.getConfig("remoteProxyUser", remoteProxyUser);
env.getConfig("remoteProxyPwd", remoteProxyPwd);
env.setConfig("remoteProxyNoProxy", remoteProxyNoProxyStr);
env.setConfig("remoteProxyUse", (useRemoteProxy) ? "true" : "false");
env.setConfig("remoteProxyUse4Yacy", (useRemoteProxy4Yacy) ? "true" : "false");
env.setConfig("remoteProxyUse4SSL", (useRemoteProxy4SSL) ? "true" : "false");
/* ====================================================================
* Enabling settings
* ==================================================================== */
plasmaSwitchboard sb = (plasmaSwitchboard)env;
sb.remoteProxyConfig = httpRemoteProxyConfig.init(sb);
// httpdProxyHandler.remoteProxyUse = post.get("remoteProxyUse", "").equals("on");
// httpdProxyHandler.remoteProxyHost = post.get("remoteProxyHost", "");
// try {
// httpdProxyHandler.remoteProxyPort = Integer.parseInt((String) post.get("remoteProxyPort", ""));
// } catch (NumberFormatException e) {
// httpdProxyHandler.remoteProxyPort = 3128;
// }
// httpdProxyHandler.remoteProxyNoProxy = (String) post.get("remoteProxyNoProxy", "");
// httpdProxyHandler.remoteProxyNoProxyPatterns = httpdProxyHandler.remoteProxyNoProxy.split(",");
// env.setConfig("remoteProxyHost", httpdProxyHandler.remoteProxyHost);
// env.setConfig("remoteProxyPort", Integer.toString(httpdProxyHandler.remoteProxyPort));
// env.setConfig("remoteProxyNoProxy", httpdProxyHandler.remoteProxyNoProxy);
// env.setConfig("remoteProxyUse", (httpdProxyHandler.remoteProxyUse) ? "true" : "false");
prop.put("info", 15); // The remote-proxy setting has been changed
return prop;
}

@ -158,24 +158,63 @@ All traffic is routed throug one single port, for both proxy and server.<br>
<fieldset><legend id="proxy">Remote Proxy (optional)</legend>
<p>YaCy can use another proxy to connect to the internet. You can enter the address for the remote proxy here:</p>
<p><table border="0" cellspacing="5">
<!-- enabling/disabling remote proxy usage -->
<tr valign="top">
<td>Use remote proxy:</td>
<td><input type="checkbox" name="remoteProxyUse" align="top" #(remoteProxyUseChecked)#::checked#(/remoteProxyUseChecked)#></td>
<td>&nbsp;</td>
<td>Enables the usage of the remote proxy by yacy</td>
</tr>
<tr valign="top">
<td colspan="2">Use remote proxy for yacy <-> yacy communication</td>
<td><input type="checkbox" name="remoteProxyUse4Yacy" align="top" #(remoteProxyUse4Yacy)#::checked#(/remoteProxyUse4Yacy)#></td>
<td>Specifies if the remote proxy should be used for the communication of this peer to other yacy peers.<br>
<i>Hint:</i> Enabling this option could cause this peer to remain in junior status.</td>
</tr>
<tr valign="top">
<td colspan="2">Use remote proxy for https</td>
<td><input type="checkbox" name="remoteProxyUse4SSL" align="top" #(remoteProxyUse4SSL)#::checked#(/remoteProxyUse4SSL)#></td>
<td>Specifies if YaCy should forward ssl connections to the remote proxy.</td>
</tr>
<tr><td colspan="4"><hr></td></tr>
<!-- remote proxy hostname + port -->
<tr valign="top">
<td>Remote proxy host:</td>
<td><input name="remoteProxyHost" type="text" size="32" maxlength="128" value="#[remoteProxyHost]#"></td>
<td colspan="2"><input name="remoteProxyHost" type="text" size="32" maxlength="128" value="#[remoteProxyHost]#"></td>
<td>The ip address or domain name of the remote proxy</td>
</tr>
<tr valign="top">
<td>Remote proxy port:</td>
<td><input name="remoteProxyPort" type="text" size="5" maxlength="5" value="#[remoteProxyPort]#"></td>
<td colspan="2"><input name="remoteProxyPort" type="text" size="5" maxlength="5" value="#[remoteProxyPort]#"></td>
<td>the port of the remote proxy</td>
</tr>
<!-- remote proxy username + pwd -->
<tr valign="top">
<td>no-proxy adresses:</td>
<td><input name="remoteProxyNoProxy" type="text" size="32" maxlength="128" value="#[remoteProxyNoProxy]#"></td>
<td>Remote proxy user:</td>
<td colspan="2"><input name="remoteProxyUser" type="text" size="32 maxlength="128" value="#[remoteProxyUser]#"></td>
<td>&nbsp;</td>
</tr>
</tr>
<tr valign="top">
<td>Remote proxy pwd:</td>
<td colspan="2"><input name="remoteProxyPwd" type="password" size="32" maxlength="128" value="#[remoteProxyPwd]#"></td>
<td>&nbsp;</td>
</tr>
<tr><td colspan="4"><hr></td></tr>
<!-- no remote proxy pattern -->
<tr valign="top">
<td>Use remote proxy:</td>
<td><input type="checkbox" name="remoteProxyUse" align="top" #(remoteProxyUseChecked)#::checked#(/remoteProxyUseChecked)#></td>
<td>no-proxy adresses:</td>
<td colspan="2"><input name="remoteProxyNoProxy" type="text" size="32" maxlength="128" value="#[remoteProxyNoProxy]#"></td>
<td>IP addresses for which the remote proxy should not be used</td>
</tr>
<!-- submit button -->
<tr valign="top">
<td colspan="2"><input type="submit" name="proxysettings" value="submit">&nbsp;<i>Changes will take effect immediately.</i></td>
<td colspan="4"><input type="submit" name="proxysettings" value="submit">&nbsp;<i>Changes will take effect immediately.</i></td>
</tr>
</table>
</fieldset>

@ -109,10 +109,17 @@ public final class Settings_p {
}
// remote proxy
prop.put("remoteProxyUseChecked", env.getConfig("remoteProxyUse", "false").equals("true") ? 1 : 0);
prop.put("remoteProxyUse4Yacy", env.getConfig("remoteProxyUse4Yacy", "true").equals("true") ? 1 : 0);
prop.put("remoteProxyUse4SSL", env.getConfig("remoteProxyUse4SSL", "true").equals("true") ? 1 : 0);
prop.put("remoteProxyHost", env.getConfig("remoteProxyHost", ""));
prop.put("remoteProxyPort", env.getConfig("remoteProxyPort", ""));
prop.put("remoteProxyUser", env.getConfig("remoteProxyUser", ""));
prop.put("remoteProxyPwd", env.getConfig("remoteProxyPwd", ""));
prop.put("remoteProxyNoProxy", env.getConfig("remoteProxyNoProxy", ""));
prop.put("remoteProxyUseChecked", ((String) env.getConfig("remoteProxyUse", "false")).equals("true") ? 1 : 0);
// proxy access filter
prop.put("proxyfilter", env.getConfig("proxyClient", "*"));

@ -124,7 +124,7 @@ public class Skins_p {
String url = (String)post.get("url");
ArrayList skinVector;
try{
skinVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort);
skinVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyConfig);
}catch(IOException e){
prop.put("status", 1);//unable to get url
prop.put("status_url", url);

@ -132,6 +132,7 @@ public class Status {
prop.put("remoteProxy", 1);
prop.put("remoteProxy_host", env.getConfig("remoteProxyHost", "<unknown>"));
prop.put("remoteProxy_port", env.getConfig("remoteProxyPort", "<unknown>"));
prop.put("remoteProxy_4Yacy", env.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true")?0:1);
} else {
prop.put("remoteProxy", 0); // not used
}

@ -19,7 +19,7 @@
<td>&nbsp;</td>
</tr>
<tr class="TableCellLight">
<td>Proxy host</td>
<td>Peer host</td>
<td>#[host]#:#[port]#</td>
<td>&nbsp;</td>
</tr>
@ -30,7 +30,7 @@
</tr>
<tr class="TableCellLight">
<td>Remote proxy</td>
<td>#(remoteProxy)#not used::#[host]#:#[port]##(/remoteProxy)#</td>
<td>#(remoteProxy)#not used::#[host]#:#[port]# | Used for YaCy -> YaCy communication: #(4Yacy)#Yes::No #(/4Yacy)# #(/remoteProxy)#</td>
<td>&nbsp;</td>
</tr>
<tr class="TableCellDark">

@ -142,7 +142,7 @@ public class sharedBlacklist_p {
//Make Adresse
address = "http://" + IP + ":" + Port + "/yacy/list.html?col=black";
try {
otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); //get List
otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyConfig); //get List
} catch (Exception e) {}
//Make HTML-Optionlist with retrieved items
@ -173,7 +173,7 @@ public class sharedBlacklist_p {
Name = address;
try {
otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); //get List
otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyConfig); //get List
} catch (Exception e) {}
prop.put("status", 0); //TODO: check if the wget failed...

@ -244,10 +244,10 @@ public final class robotsParser{
try {
downloadStart = System.currentTimeMillis();
plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard();
if (!sb.remoteProxyUse) {
if ((sb.remoteProxyConfig == null) || (!sb.remoteProxyConfig.useProxy())) {
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false);
} else {
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false, sb.remoteProxyHost, sb.remoteProxyPort);
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, false, sb.remoteProxyConfig);
}
// if we previously have downloaded this robots.txt then we can set the if-modified-since header

@ -0,0 +1,181 @@
//httpRemoteProxyConfig.java
//-----------------------
//part of the AnomicHTTPD caching proxy
//(C) by Michael Peter Christen; mc@anomic.de
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//
//this file was contributed by Martin Thelian
//$LastChangedDate$
//$LastChangedBy$
//$LastChangedRevision$
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//Using this software in any meaning (reading, learning, copying, compiling,
//running) means that you agree that the Author(s) is (are) not responsible
//for cost, loss of data or any harm that may be caused directly or indirectly
//by usage of this softare or this documentation. The usage of this software
//is on your own risk. The installation and usage (starting/running) of this
//software may allow other people or application to access your computer and
//any attached devices and is highly dependent on the configuration of the
//software which must be done by the user of the software; the author(s) is
//(are) also not responsible for proper configuration and usage of the
//software, even if provoked by documentation provided together with
//the software.
//
//Any changes to this file according to the GPL as documented in the file
//gpl.txt aside this file in the shipment you received can be done to the
//lines that follows this copyright notice here, but changes must not be
//done inside the copyright notive above. A re-distribution must contain
//the intact and unchanged copyright notice.
//Contributions and changes to the program code must be marked as such.
//You must compile this file with
//javac -classpath .:../Classes Settings_p.java
//if the shell's current path is HTROOT
package de.anomic.http;
import java.util.HashSet;
import de.anomic.plasma.plasmaSwitchboard;
public final class httpRemoteProxyConfig {
/*
* Remote Proxy configuration
*/
private boolean remoteProxyUse;
private boolean remoteProxyUse4Yacy;
private boolean remoteProxyUse4SSL;
private String remoteProxyHost;
private int remoteProxyPort;
private String remoteProxyUser;
private String remoteProxyPwd;
private String remoteProxyNoProxy = "";
private String[] remoteProxyNoProxyPatterns = null;
public final HashSet remoteProxyAllowProxySet = new HashSet();
public final HashSet remoteProxyDisallowProxySet = new HashSet();
public boolean useProxy() {
return this.remoteProxyUse;
}
public boolean useProxy4Yacy() {
return this.remoteProxyUse4Yacy;
}
public boolean useProxy4SSL() {
return this.remoteProxyUse4SSL;
}
public String getProxyHost() {
return this.remoteProxyHost;
}
public int getProxyPort() {
return this.remoteProxyPort;
}
public String getProxyUser() {
return this.remoteProxyUser;
}
public String getProxyPwd() {
return this.remoteProxyPwd;
}
public String getProxyNoProxy() {
return this.remoteProxyNoProxy;
}
public String[] getProxyNoProxyPatterns() {
return this.remoteProxyNoProxyPatterns;
}
public String toString() {
StringBuffer toStrBuf = new StringBuffer();
toStrBuf
.append("Status: ").append(this.remoteProxyUse?"ON":"OFF").append(" | ")
.append("Host: ");
if ((this.remoteProxyUser != null) && (this.remoteProxyUser.length() > 0)) {
toStrBuf.append(this.remoteProxyUser)
.append("@");
}
toStrBuf
.append((this.remoteProxyHost==null)?"unknown":this.remoteProxyHost).append(":").append(this.remoteProxyPort).append(" | ")
.append("Usage: HTTP");
if (this.remoteProxyUse4Yacy) toStrBuf.append(" YACY");
if (this.remoteProxyUse4SSL) toStrBuf.append(" SSL");
toStrBuf.append(" | ")
.append("No Proxy for: ")
.append(this.remoteProxyNoProxy);
return toStrBuf.toString();
}
public static httpRemoteProxyConfig init(
String proxyHostName,
int proxyHostPort
) {
httpRemoteProxyConfig newConfig = new httpRemoteProxyConfig();
newConfig.remoteProxyUse = true;
newConfig.remoteProxyUse4SSL = true;
newConfig.remoteProxyUse4Yacy = true;
newConfig.remoteProxyHost = proxyHostName;
newConfig.remoteProxyPort = proxyHostPort;
return newConfig;
}
public static httpRemoteProxyConfig init(plasmaSwitchboard sb) {
httpRemoteProxyConfig newConfig = new httpRemoteProxyConfig();
// determining if remote proxy usage is enabled
newConfig.remoteProxyUse = sb.getConfig("remoteProxyUse", "false").equalsIgnoreCase("true");
// determining if remote proxy should be used for yacy -> yacy communication
newConfig.remoteProxyUse4Yacy = sb.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true");
// determining if remote proxy should be used for ssl connections
newConfig.remoteProxyUse4SSL = sb.getConfig("remoteProxyUse4SSL", "true").equalsIgnoreCase("true");
// reading the proxy host name
newConfig.remoteProxyHost = sb.getConfig("remoteProxyHost", "").trim();
// reading the proxy host port
try {
newConfig.remoteProxyPort = Integer.parseInt(sb.getConfig("remoteProxyPort", "3128"));
} catch (NumberFormatException e) {
newConfig.remoteProxyPort = 3128;
}
newConfig.remoteProxyUser = sb.getConfig("remoteProxyUser", "").trim();
newConfig.remoteProxyPwd = sb.getConfig("remoteProxyPwd", "").trim();
// determining addresses for which the remote proxy should not be used
newConfig.remoteProxyNoProxy = sb.getConfig("remoteProxyNoProxy","").trim();
newConfig.remoteProxyNoProxyPatterns = newConfig.remoteProxyNoProxy.split(",");
return newConfig;
}
}

@ -134,6 +134,8 @@ public final class httpc {
private boolean remoteProxyUse = false;
private String savedRemoteHost = null;
private httpRemoteProxyConfig remoteProxyConfig = null;
String requestPath = null;
private boolean allowContentEncoding = true;
static boolean useYacyReferer = true;
@ -206,8 +208,7 @@ public final class httpc {
int port,
int timeout,
boolean ssl,
String remoteProxyHost,
int remoteProxyPort
httpRemoteProxyConfig remoteProxyConfig
) throws IOException {
httpc newHttpc;
@ -220,7 +221,13 @@ public final class httpc {
// initialize it
try {
newHttpc.init(server,port,timeout,ssl,remoteProxyHost, remoteProxyPort);
newHttpc.init(
server,
port,
timeout,
ssl,
remoteProxyConfig
);
} catch (IOException e) {
try{ httpc.theHttpcPool.returnObject(newHttpc); } catch (Exception e1) {}
throw e;
@ -386,16 +393,25 @@ public final class httpc {
* @param remoteProxyPort
* @throws IOException
*/
void init(String server, int port, int timeout, boolean ssl,
String remoteProxyHost, int remoteProxyPort) throws IOException {
void init(
String server,
int port,
int timeout,
boolean ssl,
httpRemoteProxyConfig theRemoteProxyConfig) throws IOException {
if (port == -1) {
port = (ssl)? 443 : 80;
}
String remoteProxyHost = theRemoteProxyConfig.getProxyHost();
int remoteProxyPort = theRemoteProxyConfig.getProxyPort();
this.init(remoteProxyHost, remoteProxyPort, timeout, ssl);
this.remoteProxyUse = true;
this.savedRemoteHost = server + ((port == 80) ? "" : (":" + port));
this.remoteProxyConfig = theRemoteProxyConfig;
}
/**
@ -491,6 +507,7 @@ public final class httpc {
this.handle = 0;
this.remoteProxyUse = false;
this.remoteProxyConfig = null;
this.savedRemoteHost = null;
this.requestPath = null;
@ -566,6 +583,14 @@ public final class httpc {
else
header.put(httpHeader.HOST, this.host);
}
if (this.remoteProxyUse) {
String remoteProxyUser = this.remoteProxyConfig.getProxyUser();
String remoteProxyPwd = this.remoteProxyConfig.getProxyPwd();
if ((remoteProxyUser!=null)&&(remoteProxyUser.length()>0)) {
header.put(httpHeader.PROXY_AUTHORIZATION,serverCodings.standardCoder.encodeBase64String(remoteProxyUser + ":" + remoteProxyPwd));
}
}
if (!(header.containsKey(httpHeader.CONNECTION))) {
header.put(httpHeader.CONNECTION, "close");
@ -702,6 +727,8 @@ public final class httpc {
this.clientOutput.write(buffer, 0, c);
len += c;
}
// TODO: we can not set the header here. This ist too late
requestHeader.put(httpHeader.CONTENT_LENGTH, Integer.toString(len));
}
this.clientOutput.flush();
@ -806,7 +833,6 @@ public final class httpc {
// finish with a boundary
out.write(boundary.getBytes());
out.write(serverCore.crlf);
//buf.write("" + serverCore.crlfString);
}
// create body array
out.close();
@ -816,15 +842,17 @@ public final class httpc {
//System.out.println("DEBUG: PUT BODY=" + new String(body));
if (zipContent) {
requestHeader.put(httpHeader.CONTENT_ENCODING, "gzip");
//TODO: should we also set the content length here?
} else {
// size of that body
requestHeader.put(httpHeader.CONTENT_LENGTH, Integer.toString(body.length));
}
// send the header
//System.out.println("header=" + requestHeader);
send(httpHeader.METHOD_POST, path, requestHeader, false);
// send the body
//System.out.println("body=" + buf.toString());
serverCore.send(this.clientOutput, body);
return new response(false);
@ -884,11 +912,20 @@ do upload
###### End OfList ######
*/
public static byte[] singleGET(String host, int port, String path, int timeout,
String user, String password, boolean ssl,
String proxyHost, int proxyPort,
httpHeader requestHeader) throws IOException {
public static byte[] singleGET(
String host,
int port,
String path,
int timeout,
String user,
String password,
boolean ssl,
httpRemoteProxyConfig theRemoteProxyConfig,
httpHeader requestHeader
) throws IOException {
if (requestHeader == null) requestHeader = new httpHeader();
// setting host authorization header
if ((user != null) && (password != null) && (user.length() != 0)) {
requestHeader.put(httpHeader.AUTHORIZATION, serverCodings.standardCoder.encodeBase64String(user + ":" + password));
}
@ -896,10 +933,10 @@ do upload
httpc con = null;
try {
if ((proxyHost == null) || (proxyPort == 0)) {
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
con = httpc.getInstance(host, port, timeout, ssl);
} else {
con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort);
con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig);
}
httpc.response res = con.GET(path, null);
@ -915,16 +952,20 @@ do upload
}
public static byte[] singleGET(URL u, int timeout,
String user, String password,
String proxyHost, int proxyPort) throws IOException {
public static byte[] singleGET(
URL u,
int timeout,
String user,
String password,
httpRemoteProxyConfig theRemoteProxyConfig
) throws IOException {
int port = u.getPort();
boolean ssl = u.getProtocol().equals("https");
if (port < 0) port = (ssl) ? 443: 80;
String path = u.getPath();
String query = u.getQuery();
if ((query != null) && (query.length() > 0)) path = path + "?" + query;
return singleGET(u.getHost(), port, path, timeout, user, password, ssl, proxyHost, proxyPort, null);
return singleGET(u.getHost(), port, path, timeout, user, password, ssl, theRemoteProxyConfig, null);
}
/*
@ -937,10 +978,18 @@ do upload
}
*/
public static byte[] singlePOST(String host, int port, String path, int timeout,
String user, String password, boolean ssl,
String proxyHost, int proxyPort,
httpHeader requestHeader, serverObjects props) throws IOException {
public static byte[] singlePOST(
String host,
int port,
String path,
int timeout,
String user,
String password,
boolean ssl,
httpRemoteProxyConfig theRemoteProxyConfig,
httpHeader requestHeader,
serverObjects props
) throws IOException {
if (requestHeader == null) requestHeader = new httpHeader();
if ((user != null) && (password != null) && (user.length() != 0)) {
@ -949,10 +998,11 @@ do upload
httpc con = null;
try {
if ((proxyHost == null) || (proxyPort == 0))
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
con = httpc.getInstance(host, port, timeout, ssl);
else
con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort);
} else {
con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig);
}
httpc.response res = con.POST(path, requestHeader, props, null);
//System.out.println("response=" + res.toString());
@ -968,30 +1018,69 @@ do upload
}
public static byte[] singlePOST(URL u, int timeout,
String user, String password,
String proxyHost, int proxyPort,
serverObjects props) throws IOException {
public static byte[] singlePOST(
URL u,
int timeout,
String user,
String password,
httpRemoteProxyConfig theRemoteProxyConfig,
serverObjects props
) throws IOException {
int port = u.getPort();
boolean ssl = u.getProtocol().equals("https");
if (port < 0) port = (ssl) ? 443 : 80;
String path = u.getPath();
String query = u.getQuery();
if ((query != null) && (query.length() > 0)) path = path + "?" + query;
return singlePOST(u.getHost(), port, path, timeout, user, password, ssl, proxyHost, proxyPort, null, props);
return singlePOST(
u.getHost(),
port,
path,
timeout,
user,
password,
ssl,
theRemoteProxyConfig,
null,
props
);
}
public static byte[] singlePOST(String url, int timeout, serverObjects props) throws IOException {
public static byte[] singlePOST(
String url,
int timeout,
serverObjects props
) throws IOException {
try {
return singlePOST(new URL(url), timeout, null, null, null, 0, props);
return singlePOST(
new URL(url),
timeout,
null,
null,
null,
props
);
} catch (MalformedURLException e) {
throw new IOException("Malformed URL: " + e.getMessage());
}
}
public static ArrayList wget(URL url, int timeout, String user, String password, String proxyHost, int proxyPort) throws IOException {
public static ArrayList wget(
URL url,
int timeout,
String user,
String password,
httpRemoteProxyConfig theRemoteProxyConfig
) throws IOException {
// splitting of the byte array into lines
byte[] a = singleGET(url, timeout, user, password, proxyHost, proxyPort);
byte[] a = singleGET(
url,
timeout,
user,
password,
theRemoteProxyConfig
);
if (a == null) return null;
int s = 0;
int e;
@ -1004,7 +1093,13 @@ do upload
return v;
}
public static httpHeader whead(URL url, int timeout, String user, String password, String proxyHost, int proxyPort) throws IOException {
public static httpHeader whead(
URL url,
int timeout,
String user,
String password,
httpRemoteProxyConfig theRemoteProxyConfig
) throws IOException {
// generate request header
httpHeader requestHeader = new httpHeader();
if ((user != null) && (password != null) && (user.length() != 0)) {
@ -1023,9 +1118,9 @@ do upload
// start connection
httpc con = null;
try {
if ((proxyHost == null) || (proxyPort == 0))
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy()))
con = httpc.getInstance(host, port, timeout, ssl);
else con = httpc.getInstance(host, port, timeout, ssl, proxyHost, proxyPort);
else con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig);
httpc.response res = con.HEAD(path, requestHeader);
if (res.status.startsWith("2")) {
@ -1053,9 +1148,24 @@ do upload
}
*/
public static ArrayList wput(URL url, int timeout, String user, String password, String proxyHost, int proxyPort, serverObjects props) throws IOException {
public static ArrayList wput(
URL url,
int timeout,
String user,
String password,
httpRemoteProxyConfig theRemoteProxyConfig,
serverObjects props
) throws IOException {
// splitting of the byte array into lines
byte[] a = singlePOST(url, timeout, user, password, proxyHost, proxyPort, props);
byte[] a = singlePOST(
url,
timeout,
user,
password,
theRemoteProxyConfig,
props
);
//System.out.println("wput-out=" + new String(a));
int s = 0;
int e;
@ -1090,8 +1200,10 @@ do upload
int timeout = Integer.parseInt(args[1]);
String proxyHost = args[2];
int proxyPort = Integer.parseInt(args[3]);
httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort);
try {
text = wget(new URL(url), timeout, null, null, proxyHost, proxyPort);
text = wget(new URL(url), timeout, null, null, theRemoteProxyConfig);
} catch (MalformedURLException e) {
System.out.println("The url '" + url + "' is wrong.");
} catch (IOException e) {

@ -105,14 +105,14 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
private static int timeout = 30000;
private static boolean yacyTrigger = true;
public static boolean isTransparentProxy = false;
public static boolean remoteProxyUse = false;
public static String remoteProxyHost = "";
public static int remoteProxyPort = -1;
public static String remoteProxyNoProxy = "";
public static String[] remoteProxyNoProxyPatterns = null;
// public static boolean remoteProxyUse = false;
// public static String remoteProxyHost = "";
// public static int remoteProxyPort = -1;
// public static String remoteProxyNoProxy = "";
// public static String[] remoteProxyNoProxyPatterns = null;
private static final HashSet remoteProxyAllowProxySet = new HashSet();
private static final HashSet remoteProxyDisallowProxySet = new HashSet();
// private static final HashSet remoteProxyAllowProxySet = new HashSet();
// private static final HashSet remoteProxyDisallowProxySet = new HashSet();
private static htmlFilterTransformer transformer = null;
public static final String userAgent = "yacy (" + httpc.systemOST +") yacy.net";
@ -203,16 +203,16 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// doing httpc init
httpc.useYacyReferer = sb.getConfig("useYacyReferer", "true").equals("true");
// load remote proxy data
remoteProxyHost = switchboard.getConfig("remoteProxyHost","");
try {
remoteProxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128"));
} catch (NumberFormatException e) {
remoteProxyPort = 3128;
}
remoteProxyUse = switchboard.getConfig("remoteProxyUse","false").equals("true");
remoteProxyNoProxy = switchboard.getConfig("remoteProxyNoProxy","");
remoteProxyNoProxyPatterns = remoteProxyNoProxy.split(",");
// // load remote proxy data
// remoteProxyHost = switchboard.getConfig("remoteProxyHost","");
// try {
// remoteProxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128"));
// } catch (NumberFormatException e) {
// remoteProxyPort = 3128;
// }
// remoteProxyUse = switchboard.getConfig("remoteProxyUse","false").equals("true");
// remoteProxyNoProxy = switchboard.getConfig("remoteProxyNoProxy","");
// remoteProxyNoProxyPatterns = remoteProxyNoProxy.split(",");
// set timeout
timeout = Integer.parseInt(switchboard.getConfig("clientTimeout", "10000"));
@ -1020,7 +1020,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
String httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
int timeout = Integer.parseInt(switchboard.getConfig("clientTimeout", "10000"));
int port, pos;
if ((pos = host.indexOf(":")) < 0) {
@ -1043,16 +1042,23 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
}
// possibly branch into PROXY-PROXY connection
if (remoteProxyUse) {
if ((switchboard.remoteProxyConfig != null) && (switchboard.remoteProxyConfig.useProxy4SSL())) {
httpc remoteProxy = null;
try {
remoteProxy = httpc.getInstance(host, port, timeout, false, remoteProxyHost, remoteProxyPort);
remoteProxy = httpc.getInstance(
host,
port,
timeout,
false,
switchboard.remoteProxyConfig
);
httpc.response response = remoteProxy.CONNECT(host, port, requestHeader);
response.print();
if (response.success()) {
// replace connection details
host = remoteProxyHost;
port = remoteProxyPort;
host = switchboard.remoteProxyConfig.getProxyHost();
port = switchboard.remoteProxyConfig.getProxyPort();
// go on (see below)
} else {
// pass error response back to client
@ -1147,37 +1153,56 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
}
private httpc newhttpc(String server, int port, int timeout) throws IOException {
// getting the remote proxy configuration
httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig;
// a new httpc connection, combined with possible remote proxy
boolean useProxy = remoteProxyUse;
boolean useProxy = (remProxyConfig!=null)&&(remProxyConfig.useProxy());
// check no-proxy rule
if ((useProxy) && (!(remoteProxyAllowProxySet.contains(server)))) {
if (remoteProxyDisallowProxySet.contains(server)) {
if (
(switchboard.remoteProxyConfig != null) &&
(switchboard.remoteProxyConfig.useProxy()) &&
(!(switchboard.remoteProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
if (switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
useProxy = false;
} else {
// analyse remoteProxyNoProxy;
// set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly
int i = 0;
while (i < remoteProxyNoProxyPatterns.length) {
if (server.matches(remoteProxyNoProxyPatterns[i])) {
while (i < remProxyConfig.getProxyNoProxyPatterns().length) {
if (server.matches(remProxyConfig.getProxyNoProxyPatterns()[i])) {
// disallow proxy for this server
remoteProxyDisallowProxySet.add(server);
switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.add(server);
useProxy = false;
break;
}
i++;
}
if (i == remoteProxyNoProxyPatterns.length) {
if (i == remProxyConfig.getProxyNoProxyPatterns().length) {
// no pattern matches: allow server
remoteProxyAllowProxySet.add(server);
switchboard.remoteProxyConfig.remoteProxyAllowProxySet.add(server);
}
}
}
// branch to server/proxy
if (useProxy) {
return httpc.getInstance(server, port, timeout, false, remoteProxyHost, remoteProxyPort);
} else {
return httpc.getInstance(server, port, timeout, false);
return httpc.getInstance(
server,
port,
timeout,
false,
remProxyConfig
);
}
return httpc.getInstance(
server,
port,
timeout,
false
);
}
private httpc newhttpc(String address, int timeout) throws IOException {

@ -60,7 +60,7 @@ public class natLib {
rm status.htm
*/
try {
ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), 5000, "admin", password, null, 0);
ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), 5000, "admin", password, null);
x = nxTools.grep(x, 1, "IP Address");
if ((x == null) || (x.size() == 0)) return null;
String line = nxTools.tail1(x);
@ -72,7 +72,7 @@ public class natLib {
private static String getWhatIsMyIP() {
try {
ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), 5000, null, null, null, 0);
ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), 5000, null, null, null);
x = nxTools.grep(x, 0, "Your IP is");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 4);
@ -83,7 +83,7 @@ public class natLib {
private static String getStanford() {
try {
ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), 5000, null, null, null, 0);
ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), 5000, null, null, null);
x = nxTools.grep(x, 0, "firewall protecting your browser");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 7);
@ -94,7 +94,7 @@ public class natLib {
private static String getIPID() {
try {
ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), 5000, null, null, null, 0);
ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), 5000, null, null, null);
x = nxTools.grep(x, 2, "Your IP address");
String line = nxTools.tail1(x);
return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1);

@ -56,7 +56,6 @@ public final class plasmaCrawlLoader extends Thread {
static plasmaSwitchboard switchboard;
private final plasmaHTCache cacheManager;
private final int socketTimeout;
private final serverLog log;
private final CrawlerMessageQueue theQueue;
@ -66,15 +65,13 @@ public final class plasmaCrawlLoader extends Thread {
private boolean stopped = false;
public plasmaCrawlLoader(
plasmaHTCache cacheManager,
serverLog log) {
plasmaHTCache theCacheManager,
serverLog theLog) {
this.setName("plasmaCrawlLoader");
this.cacheManager = cacheManager;
this.log = log;
this.socketTimeout = Integer.parseInt(switchboard.getConfig("crawler.clientTimeout", "10000"));
this.cacheManager = theCacheManager;
this.log = theLog;
// configuring the crawler messagequeue
this.theQueue = new CrawlerMessageQueue();
@ -103,11 +100,8 @@ public final class plasmaCrawlLoader extends Thread {
CrawlerFactory theFactory = new CrawlerFactory(
this.theThreadGroup,
switchboard,
cacheManager,
socketTimeout,
switchboard.getConfig("remoteProxyUse","false").equals("true"),
switchboard.getConfig("remoteProxyHost",""),
Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128")),
log);
this.crawlwerPool = new CrawlerPool(theFactory,this.cralwerPoolConfig,this.theThreadGroup);
@ -363,37 +357,28 @@ final class CrawlerFactory implements org.apache.commons.pool.PoolableObjectFact
private CrawlerPool thePool;
private final ThreadGroup theThreadGroup;
private final plasmaHTCache cacheManager;
private final int socketTimeout;
private final boolean remoteProxyUse;
private final String remoteProxyHost;
private final int remoteProxyPort;
private final serverLog theLog;
private final plasmaSwitchboard sb;
public CrawlerFactory(
ThreadGroup theThreadGroup,
plasmaHTCache cacheManager,
int socketTimeout,
boolean remoteProxyUse,
String remoteProxyHost,
int remoteProxyPort,
serverLog theLog) {
ThreadGroup threadGroup,
plasmaSwitchboard theSb,
plasmaHTCache theCacheManager,
serverLog log) {
super();
if (theThreadGroup == null)
if (threadGroup == null)
throw new IllegalArgumentException("The threadgroup object must not be null.");
this.theThreadGroup = theThreadGroup;
this.cacheManager = cacheManager;
this.socketTimeout = socketTimeout;
this.remoteProxyUse = remoteProxyUse;
this.remoteProxyHost = remoteProxyHost;
this.remoteProxyPort = remoteProxyPort;
this.theLog = theLog;
this.theThreadGroup = threadGroup;
this.cacheManager = theCacheManager;
this.sb = theSb;
this.theLog = log;
}
public void setPool(CrawlerPool thePool) {
this.thePool = thePool;
public void setPool(CrawlerPool pool) {
this.thePool = pool;
}
/**
@ -403,11 +388,8 @@ final class CrawlerFactory implements org.apache.commons.pool.PoolableObjectFact
return new plasmaCrawlWorker(
this.theThreadGroup,
this.thePool,
this.sb,
this.cacheManager,
this.socketTimeout,
this.remoteProxyUse,
this.remoteProxyHost,
this.remoteProxyPort,
this.theLog);
}

@ -57,6 +57,7 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
import de.anomic.http.httpdProxyHandler;
import de.anomic.server.serverCore;
@ -72,12 +73,10 @@ public final class plasmaCrawlWorker extends Thread {
private static final String threadBaseName = "CrawlerWorker";
private final CrawlerPool myPool;
private final plasmaSwitchboard sb;
private final plasmaHTCache cacheManager;
private final int socketTimeout;
private final boolean remoteProxyUse;
private final String remoteProxyHost;
private final int remoteProxyPort;
private final serverLog log;
private int socketTimeout;
public plasmaCrawlLoaderMessage theMsg;
private URL url;
@ -114,33 +113,35 @@ public final class plasmaCrawlWorker extends Thread {
public plasmaCrawlWorker(
ThreadGroup theTG,
CrawlerPool thePool,
plasmaHTCache cacheManager,
int socketTimeout,
boolean remoteProxyUse,
String remoteProxyHost,
int remoteProxyPort,
serverLog log) {
CrawlerPool thePool,
plasmaSwitchboard theSb,
plasmaHTCache theCacheManager,
serverLog theLog) {
super(theTG,threadBaseName + "_inPool");
this.myPool = thePool;
this.cacheManager = cacheManager;
this.socketTimeout = socketTimeout;
this.remoteProxyUse = remoteProxyUse;
this.remoteProxyHost = remoteProxyHost;
this.remoteProxyPort = remoteProxyPort;
this.log = log;
this.sb = theSb;
this.cacheManager = theCacheManager;
this.log = theLog;
// setting the crawler timeout properly
this.socketTimeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000);
}
public long getDuration() {
long startDate = this.startdate;
return (startDate != 0) ? System.currentTimeMillis() - startDate : 0;
}
public synchronized void execute(plasmaCrawlLoaderMessage theMsg) {
this.theMsg = theMsg;
public synchronized void execute(plasmaCrawlLoaderMessage theNewMsg) {
this.theMsg = theNewMsg;
this.url = theMsg.url;
this.name = theMsg.name;
this.referer = theMsg.referer;
this.initiator = theMsg.initiator;
this.depth = theMsg.depth;
this.profile = theMsg.profile;
this.url = theNewMsg.url;
this.name = theNewMsg.name;
this.referer = theNewMsg.referer;
this.initiator = theNewMsg.initiator;
this.depth = theNewMsg.depth;
this.profile = theNewMsg.profile;
this.startdate = System.currentTimeMillis();
//this.error = null;
@ -197,7 +198,7 @@ public final class plasmaCrawlWorker extends Thread {
if (!this.stopped && !this.isInterrupted()) {
try {
this.myPool.returnObject(this);
this.setName(this.threadBaseName + "_inPool");
this.setName(plasmaCrawlWorker.threadBaseName + "_inPool");
}
catch (Exception e1) {
log.logSevere("pool error", e1);
@ -210,10 +211,25 @@ public final class plasmaCrawlWorker extends Thread {
public void execute() throws IOException {
try {
this.setName(this.threadBaseName + "_" + this.url);
load(this.url, this.name, this.referer, this.initiator, this.depth, this.profile,
this.socketTimeout, this.remoteProxyHost, this.remoteProxyPort, this.remoteProxyUse,
this.cacheManager, this.log);
// setting threadname
this.setName(plasmaCrawlWorker.threadBaseName + "_" + this.url);
// refreshing timeout value
this.socketTimeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000);
// loading resource
load(
this.url,
this.name,
this.referer,
this.initiator,
this.depth,
this.profile,
this.socketTimeout,
this.sb.remoteProxyConfig,
this.cacheManager,
this.log
);
} catch (IOException e) {
//throw e;
@ -223,8 +239,8 @@ public final class plasmaCrawlWorker extends Thread {
}
}
public void setStopped(boolean stopped) {
this.stopped = stopped;
public void setStopped(boolean isStopped) {
this.stopped = isStopped;
}
public boolean isRunning() {
@ -251,9 +267,7 @@ public final class plasmaCrawlWorker extends Thread {
int depth,
plasmaCrawlProfile.entry profile,
int socketTimeout,
String remoteProxyHost,
int remoteProxyPort,
boolean remoteProxyUse,
httpRemoteProxyConfig theRemoteProxyConfig,
plasmaHTCache cacheManager,
serverLog log
) throws IOException {
@ -264,9 +278,7 @@ public final class plasmaCrawlWorker extends Thread {
depth,
profile,
socketTimeout,
remoteProxyHost,
remoteProxyPort,
remoteProxyUse,
theRemoteProxyConfig,
cacheManager,
log,
DEFAULT_CRAWLING_RETRY_COUNT,
@ -282,9 +294,7 @@ public final class plasmaCrawlWorker extends Thread {
int depth,
plasmaCrawlProfile.entry profile,
int socketTimeout,
String remoteProxyHost,
int remoteProxyPort,
boolean remoteProxyUse,
httpRemoteProxyConfig theRemoteProxyConfig,
plasmaHTCache cacheManager,
serverLog log,
int crawlingRetryCount,
@ -309,8 +319,16 @@ public final class plasmaCrawlWorker extends Thread {
String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
log.logInfo("CRAWLER Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
sb.urlPool.errorURL.newEntry(url, referer,initiator, yacyCore.seedDB.mySeed.hash,
name, "denied_(url_in_blacklist)", new bitfield(plasmaURL.urlFlagLength), true);
sb.urlPool.errorURL.newEntry(
url,
referer,
initiator,
yacyCore.seedDB.mySeed.hash,
name,
"denied_(url_in_blacklist)",
new bitfield(plasmaURL.urlFlagLength),
true
);
return;
}
@ -335,8 +353,9 @@ public final class plasmaCrawlWorker extends Thread {
//System.out.println("CRAWLER_REQUEST_HEADER=" + requestHeader.toString()); // DEBUG
// open the connection
remote = (remoteProxyUse) ? httpc.getInstance(host, port, socketTimeout, ssl, remoteProxyHost, remoteProxyPort)
: httpc.getInstance(host, port, socketTimeout, ssl);
remote = ((theRemoteProxyConfig != null) && (theRemoteProxyConfig.useProxy()))
? httpc.getInstance(host, port, socketTimeout, ssl, theRemoteProxyConfig)
: httpc.getInstance(host, port, socketTimeout, ssl);
// specifying if content encoding is allowed
remote.setAllowContentEncoding(useContentEncodingGzip);
@ -346,6 +365,8 @@ public final class plasmaCrawlWorker extends Thread {
if (res.status.startsWith("200") || res.status.startsWith("203")) {
// the transfer is ok
// TODO: aborting download if content is to long ...
long contentLength = res.responseHeader.contentLength();
// reserve cache entry
@ -354,7 +375,6 @@ public final class plasmaCrawlWorker extends Thread {
// request has been placed and result has been returned. work off response
File cacheFile = cacheManager.getCachePath(url);
try {
String error = null;
if (plasmaParser.supportedContent(url,res.responseHeader.mime())) {
if (cacheFile.isFile()) {
cacheManager.deleteFile(url);
@ -430,9 +450,7 @@ public final class plasmaCrawlWorker extends Thread {
depth,
profile,
socketTimeout,
remoteProxyHost,
remoteProxyPort,
remoteProxyUse,
theRemoteProxyConfig,
cacheManager,
log,
--crawlingRetryCount,
@ -517,9 +535,7 @@ public final class plasmaCrawlWorker extends Thread {
depth,
profile,
socketTimeout,
remoteProxyHost,
remoteProxyPort,
remoteProxyUse,
theRemoteProxyConfig,
cacheManager,
log,
--crawlingRetryCount,

@ -0,0 +1,255 @@
package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Vector;
import de.anomic.server.serverDate;
import de.anomic.server.logging.serverLog;
public class plasmaDbImporter extends Thread {
public static final Vector finishedJobs = new Vector();
public static final ThreadGroup runningJobs = new ThreadGroup("DbImport");
public static int currMaxJobNr = 0;
private final int jobNr;
private final plasmaCrawlLURL homeUrlDB;
private final plasmaWordIndex homeWordIndex;
private final plasmaCrawlLURL importUrlDB;
private final plasmaWordIndex importWordIndex;
private final String importPath;
private final File importRoot;
private final int importStartSize;
private final serverLog log;
private boolean stopped = false;
private boolean paused = false;
private String wordHash = "------------";
long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = wordChunkStart;
String wordChunkStartHash = "------------", wordChunkEndHash;
private long urlCounter = 0, wordCounter = 0, entryCounter = 0;
private long globalStart = System.currentTimeMillis();
private long globalEnd;
private String error;
public void stoppIt() {
this.stopped = true;
}
public String getError() {
return this.error;
}
public int getJobNr() {
return this.jobNr;
}
public String getCurrentWordhash() {
return this.wordHash;
}
public long getUrlCounter() {
return this.urlCounter;
}
public long getWordEntityCounter() {
return this.wordCounter;
}
public long getWordEntryCounter() {
return this.entryCounter;
}
public File getImportRoot() {
return this.importRoot;
}
public int getImportWordDbSize() {
return this.importWordIndex.size();
}
public plasmaDbImporter(plasmaWordIndex theHomeIndexDB, plasmaCrawlLURL theHomeUrlDB, String theImportPath) throws IOException {
super(runningJobs,"DB-Import_" + theImportPath);
this.log = new serverLog("DB-IMPORT");
synchronized(runningJobs) {
this.jobNr = currMaxJobNr;
currMaxJobNr++;
}
if (theImportPath == null) throw new NullPointerException();
this.importPath = theImportPath;
this.importRoot = new File(theImportPath);
if (theHomeIndexDB == null) throw new NullPointerException();
this.homeWordIndex = theHomeIndexDB;
if (theHomeUrlDB == null) throw new NullPointerException();
this.homeUrlDB = theHomeUrlDB;
if (this.homeWordIndex.getRoot().equals(importRoot)) {
throw new IllegalArgumentException("Import and home DB directory must not be equal");
}
// configure import DB
String errorMsg = null;
if (!this.importRoot.exists()) errorMsg = "Import directory does not exist.";
if (!this.importRoot.canRead()) errorMsg = "Import directory is not readable.";
if (!this.importRoot.canWrite()) errorMsg = "Import directory is not writeable";
if (!this.importRoot.isDirectory()) errorMsg = "ImportDirectory is not a directory.";
if (errorMsg != null) {
this.log.logSevere(errorMsg + "\nName: " + this.importRoot.getAbsolutePath());
throw new IllegalArgumentException(errorMsg);
}
this.log.logFine("Initializing source word index db.");
this.importWordIndex = new plasmaWordIndex(this.importRoot, 8*1024*1024, this.log);
this.log.logFine("Initializing import URL db.");
this.importUrlDB = new plasmaCrawlLURL(new File(this.importRoot, "urlHash.db"), 4*1024*1024);
this.importStartSize = this.importWordIndex.size();
}
public void run() {
try {
importWordsDB();
} finally {
globalEnd = System.currentTimeMillis();
finishedJobs.add(this);
}
}
public long getTotalRuntime() {
return (this.globalEnd == 0)?System.currentTimeMillis()-this.globalStart:this.globalEnd-this.globalStart;
}
public int getProcessingStatus() {
return (this.importStartSize-this.importWordIndex.size())/(this.importStartSize/100);
}
public long getElapsedTime() {
return System.currentTimeMillis()-this.globalStart;
}
public long getEstimatedTime() {
return (this.wordCounter==0)?0:this.importWordIndex.size()*((System.currentTimeMillis()-this.globalStart)/this.wordCounter);
}
public void importWordsDB() {
this.log.logInfo("STARTING DB-IMPORT");
try {
this.log.logInfo("Importing DB from '" + this.importRoot.getAbsolutePath() + "' to '" + this.homeWordIndex.getRoot().getAbsolutePath() + "'.");
this.log.logInfo("Home word index contains " + this.homeWordIndex.size() + " words and " + this.homeUrlDB.size() + " URLs.");
this.log.logInfo("Import word index contains " + this.importWordIndex.size() + " words and " + this.importUrlDB.size() + " URLs.");
// iterate over all words from import db
Iterator importWordHashIterator = this.importWordIndex.wordHashes(wordChunkStartHash, true, true);
while (!isAborted() && importWordHashIterator.hasNext()) {
plasmaWordIndexEntity importWordIdxEntity = null;
try {
wordCounter++;
wordHash = (String) importWordHashIterator.next();
importWordIdxEntity = importWordIndex.getEntity(wordHash, true);
if (importWordIdxEntity.size() == 0) {
importWordIdxEntity.deleteComplete();
continue;
}
// creating a container used to hold the imported entries
plasmaWordIndexEntryContainer newContainer = new plasmaWordIndexEntryContainer(wordHash,importWordIdxEntity.size());
// the combined container will fit, read the container
Iterator importWordIdxEntries = importWordIdxEntity.elements(true);
plasmaWordIndexEntry importWordIdxEntry;
while (importWordIdxEntries.hasNext()) {
// testing if import process was aborted
if (isAborted()) break;
// getting next word index entry
entryCounter++;
importWordIdxEntry = (plasmaWordIndexEntry) importWordIdxEntries.next();
String urlHash = importWordIdxEntry.getUrlHash();
if ((this.importUrlDB.exists(urlHash)) && (!this.homeUrlDB.exists(urlHash))) {
urlCounter++;
// importing the new url
plasmaCrawlLURL.Entry urlEntry = this.importUrlDB.getEntry(urlHash);
this.homeUrlDB.newEntry(urlEntry);
if (urlCounter % 500 == 0) {
this.log.logFine(urlCounter + " URLs processed so far.");
}
}
// adding word index entity to container
newContainer.add(importWordIdxEntry,System.currentTimeMillis());
if (entryCounter % 500 == 0) {
this.log.logFine(entryCounter + " word entries and " + wordCounter + " word entries processed so far.");
}
}
// testing if import process was aborted
if (isAborted()) break;
// importing entity container to home db
homeWordIndex.addEntries(newContainer, true);
// delete complete index entity file
importWordIdxEntity.close();
importWordIndex.deleteIndex(wordHash);
// print out some statistical information
if (wordCounter%500 == 0) {
wordChunkEndHash = wordHash;
wordChunkEnd = System.currentTimeMillis();
long duration = wordChunkEnd - wordChunkStart;
log.logInfo(wordCounter + " word entities imported " +
"[" + wordChunkStartHash + " .. " + wordChunkEndHash + "] " +
this.getProcessingStatus() + "%\n" +
"Speed: "+ 500*1000/duration + " word entities/s" +
" | Elapsed time: " + serverDate.intervalToString(getElapsedTime()) +
" | Estimated time: " + serverDate.intervalToString(getEstimatedTime()) + "\n" +
"Home Words = " + homeWordIndex.size() +
" | Import Words = " + importWordIndex.size());
wordChunkStart = wordChunkEnd;
wordChunkStartHash = wordChunkEndHash;
}
} catch (Exception e) {
log.logSevere("Import of word entity '" + wordHash + "' failed.",e);
} finally {
if (importWordIdxEntity != null) try { importWordIdxEntity.close(); } catch (Exception e) {}
}
}
this.log.logInfo("Home word index contains " + homeWordIndex.size() + " words and " + homeUrlDB.size() + " URLs.");
this.log.logInfo("Import word index contains " + importWordIndex.size() + " words and " + importUrlDB.size() + " URLs.");
this.log.logInfo("DB-IMPORT FINISHED");
} catch (Exception e) {
this.log.logSevere("Database import failed.",e);
e.printStackTrace();
this.error = e.toString();
} finally {
if (importUrlDB != null) try { importUrlDB.close(); } catch (Exception e){}
if (importWordIndex != null) try { importWordIndex.close(5000); } catch (Exception e){}
}
}
private boolean isAborted() {
return (this.stopped) || Thread.currentThread().isInterrupted();
}
}

@ -77,19 +77,18 @@ public class plasmaSnippetCache {
private plasmaHTCache cacheManager;
private plasmaParser parser;
private serverLog log;
private String remoteProxyHost;
private int remoteProxyPort;
private boolean remoteProxyUse;
private plasmaSwitchboard sb;
public plasmaSnippetCache(plasmaHTCache cacheManager, plasmaParser parser,
String remoteProxyHost, int remoteProxyPort, boolean remoteProxyUse,
serverLog log) {
public plasmaSnippetCache(
plasmaSwitchboard theSb,
plasmaHTCache cacheManager,
plasmaParser parser,
serverLog log
) {
this.cacheManager = cacheManager;
this.parser = parser;
this.log = log;
this.remoteProxyHost = remoteProxyHost;
this.remoteProxyPort = remoteProxyPort;
this.remoteProxyUse = remoteProxyUse;
this.sb = theSb;
this.snippetsScoreCounter = 0;
this.snippetsScore = new kelondroMScoreCluster();
this.snippetsCache = new HashMap();
@ -367,11 +366,9 @@ public class plasmaSnippetCache {
0,
null,
socketTimeout,
remoteProxyHost,
remoteProxyPort,
remoteProxyUse,
cacheManager,
log);
this.sb.remoteProxyConfig,
this.cacheManager,
this.log);
}
public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount) {

@ -121,6 +121,7 @@ import de.anomic.data.wikiBoard;
import de.anomic.data.userDB;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMSetTools;
@ -166,9 +167,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public plasmaCrawlStacker sbStackCrawlThread;
public messageBoard messageDB;
public wikiBoard wikiDB;
public String remoteProxyHost;
public int remoteProxyPort;
public boolean remoteProxyUse;
public static plasmaCrawlRobotsTxt robots;
public plasmaCrawlProfile profiles;
public plasmaCrawlProfile.entry defaultProxyProfile;
@ -182,7 +180,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public yacyCore yc;
public HashMap indexingTasksInProcess;
public userDB userDB;
/*
* Remote Proxy configuration
*/
// public boolean remoteProxyUse;
// public boolean remoteProxyUse4Yacy;
// public String remoteProxyHost;
// public int remoteProxyPort;
// public String remoteProxyNoProxy = "";
// public String[] remoteProxyNoProxyPatterns = null;
public httpRemoteProxyConfig remoteProxyConfig = null;
/*
* Some constants
*/
private static final String STR_PROXYPROFILE = "defaultProxyProfile";
private static final String STR_REMOTEPROFILE = "defaultRemoteProfile";
private static final String STR_REMOTECRAWLTRIGGER = "REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER ";
@ -206,25 +219,45 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
this.listsPath = new File(rootPath, getConfig("listsPath", "LISTS"));
this.log.logConfig("Lists Path: " + this.listsPath.toString());
// remote proxy configuration
remoteProxyHost = getConfig("remoteProxyHost", "");
try {
remoteProxyPort = Integer.parseInt(getConfig("remoteProxyPort", "3128"));
} catch (NumberFormatException e) {
remoteProxyPort = 3128;
}
if (getConfig("remoteProxyUse", "false").equals("true")) {
remoteProxyUse = true;
log.logConfig("Using remote proxy:" +
"\n\tHost: " + remoteProxyHost +
"\n\tPort: " + remoteProxyPort);
} else {
remoteProxyUse = false;
remoteProxyHost = null;
remoteProxyPort = 0;
}
proxyLastAccess = System.currentTimeMillis() - 60000;
/* ============================================================================
* Remote Proxy configuration
* ============================================================================ */
this.remoteProxyConfig = httpRemoteProxyConfig.init(this);
this.log.logConfig("Remote proxy configuration:\n" + this.remoteProxyConfig.toString());
// // reading the proxy host name
// this.remoteProxyHost = getConfig("remoteProxyHost", "");
//
// // reading the proxy host port
// try {
// this.remoteProxyPort = Integer.parseInt(getConfig("remoteProxyPort", "3128"));
// } catch (NumberFormatException e) {
// this.remoteProxyPort = 3128;
// }
//
// // determining if remote proxy should be used for yacy -> yacy communication
// this.remoteProxyUse4Yacy = getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true");
//
// // determining addresses for which the remote proxy should not be used
// this.remoteProxyNoProxy = getConfig("remoteProxyNoProxy","");
// this.remoteProxyNoProxyPatterns = this.remoteProxyNoProxy.split(",");
//
// // determining if remote Proxy should be used
// if (getConfig("remoteProxyUse", "false").equalsIgnoreCase("true")) {
// this.remoteProxyUse = true;
// this.log.logConfig("Using remote proxy:" +
// "\n\tHost: " + this.remoteProxyHost +
// "\n\tPort: " + this.remoteProxyPort +
// "\n\tUseProxy4Yacy: " + Boolean.toString(this.remoteProxyUse4Yacy)
// );
// } else {
// this.remoteProxyUse = false;
// this.remoteProxyHost = null;
// this.remoteProxyPort = 0;
// }
this.proxyLastAccess = System.currentTimeMillis() - 60000;
// configuring list path
if (!(listsPath.exists())) listsPath.mkdirs();
// load coloured lists
@ -420,9 +453,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// generate snippets cache
log.logConfig("Initializing Snippet Cache");
snippetCache = new plasmaSnippetCache(cacheManager, parser,
remoteProxyHost, remoteProxyPort, remoteProxyUse,
log);
snippetCache = new plasmaSnippetCache(this,cacheManager, parser,log);
// start yacy core
log.logConfig("Starting YaCy Protocol Core");

@ -45,6 +45,7 @@ import java.net.URL;
import java.util.ArrayList;
import java.util.Hashtable;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
public class loaderThreads {
@ -53,28 +54,30 @@ public class loaderThreads {
private int timeout;
private String user;
private String password;
private String remoteProxyHost;
private int remoteProxyPort;
private httpRemoteProxyConfig remoteProxyConfig;
// management objects for collection of threads
Hashtable threads;
int completed, failed;
public loaderThreads() {
this(null, 0);
this(null);
}
public loaderThreads(String remoteProxyHost, int remoteProxyPort) {
this(10000, null, null, remoteProxyHost, remoteProxyPort);
public loaderThreads(httpRemoteProxyConfig theremoteProxyConfig) {
this(10000, null, null, theremoteProxyConfig);
}
public loaderThreads(int timeout, String user, String password,
String remoteProxyHost, int remoteProxyPort) {
public loaderThreads(
int timeout,
String user,
String password,
httpRemoteProxyConfig theremoteProxyConfig
) {
this.timeout = timeout;
this.user = user;
this.password = password;
this.remoteProxyHost = remoteProxyHost;
this.remoteProxyPort = remoteProxyPort;
this.remoteProxyConfig = theremoteProxyConfig;
this.threads = new Hashtable();
this.completed = 0;
this.failed = 0;
@ -145,7 +148,7 @@ public class loaderThreads {
public void run() {
try {
page = httpc.wget(url, timeout, user, password, remoteProxyHost, remoteProxyPort);
page = httpc.wget(url, timeout, user, password, remoteProxyConfig);
loaded = true;
process.feed(page);
if (process.status() == loaderCore.STATUS_FAILED) {
@ -227,7 +230,8 @@ public class loaderThreads {
}
public static void main(String[] args) {
loaderThreads loader = new loaderThreads("192.168.1.122", 3128);
httpRemoteProxyConfig proxyConfig = httpRemoteProxyConfig.init("192.168.1.122", 3128);
loaderThreads loader = new loaderThreads(proxyConfig);
try {
loader.newPropLoaderThread("load1", new URL("http://www.anomic.de/superseed.txt"));
} catch (MalformedURLException e) {

@ -99,20 +99,32 @@ public final class yacyClient {
10000, null, null, yacyCore.seedCache.sb.remoteProxyHost, yacyCore.seedCache.sb.remoteProxyPort));
*/
// building URL
final URL url = new URL("http://" + address + "/yacy/hello.html");
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// adding all needed parameters
final serverObjects obj = new serverObjects(6);
obj.put("iam", yacyCore.seedDB.mySeed.hash);
obj.put("pattern", "");
obj.put("count", "20");
obj.put("key", key);
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
obj.put("myUTC", System.currentTimeMillis());
obj.put(yacySeed.SEED, yacyCore.seedDB.mySeed.genSeedStr(key));
result = nxTools.table(httpc.wput(url,
105000, null, null,
yacyCore.seedDB.sb.remoteProxyHost,
yacyCore.seedDB.sb.remoteProxyPort,
obj));
obj.put("iam", yacyCore.seedDB.mySeed.hash);
obj.put("pattern", "");
obj.put("count", "20");
obj.put("key", key);
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
obj.put("myUTC", System.currentTimeMillis());
obj.put(yacySeed.SEED, yacyCore.seedDB.mySeed.genSeedStr(key));
// sending request
result = nxTools.table(
httpc.wput(url,
105000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
obj
)
);
} catch (Exception e) {
if (Thread.currentThread().isInterrupted()) {
yacyCore.log.logFine("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' interrupted.");
@ -215,12 +227,27 @@ public final class yacyClient {
public static yacySeed querySeed(yacySeed target, String seedHash) {
final String key = crypt.randomSalt();
try {
final HashMap result = nxTools.table(httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash + "&key=" + key +
"&object=seed&env=" + seedHash),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// sending request
final HashMap result = nxTools.table(
httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/query.html" +
"?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash +
"&key=" + key +
"&object=seed" +
"&env=" + seedHash
),
10000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
)
);
if (result == null || result.size() == 0) { return null; }
//final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time
return yacySeed.genRemoteSeed((String) result.get("response"), key);
@ -232,13 +259,28 @@ public final class yacyClient {
public static int queryRWICount(yacySeed target, String wordHash) {
try {
final HashMap result = nxTools.table(httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash + "&key=" +
"&object=rwicount&env=" + wordHash +
"&ttl=0"),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// sending request
final HashMap result = nxTools.table(
httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/query.html" +
"?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash +
"&key=" +
"&object=rwicount" +
"&env=" + wordHash +
"&ttl=0"
),
10000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
)
);
if (result == null || result.size() == 0) { return -1; }
return Integer.parseInt((String) result.get("response"));
} catch (Exception e) {
@ -247,19 +289,36 @@ public final class yacyClient {
}
}
public static int queryUrlCount(yacySeed target) {
public static int queryUrlCount(yacySeed target) {
if (target == null) { return -1; }
if (yacyCore.seedDB.mySeed == null) return -1;
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// building url
final String querystr =
"http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash +
"&key=" +
"&object=lurlcount&env=&ttl=0";
"http://" + target.getAddress() +
"/yacy/query.html" +
"?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash +
"&key=" +
"&object=lurlcount" +
"&env=" +
"&ttl=0";
// seinding request
try {
final HashMap result = nxTools.table(httpc.wget(
new URL(querystr), 6000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
final HashMap result = nxTools.table(
httpc.wget(
new URL(querystr),
6000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
)
);
// yacyCore.log("DEBUG QUERY: query=" + querystr + "; result = " + result.toString());
if ((result == null) || (result.size() == 0)) return -1;
final String resp = (String) result.get("response");
@ -290,8 +349,14 @@ public final class yacyClient {
// request result
final String key = crypt.randomSalt();
try {
try {
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// building url
final String url = "http://" + targetPeer.getAddress() + "/yacy/search.html";
// adding all needed parameters
/*
String url = "http://" + targetPeer.getAddress() +
"/yacy/search.html?myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) +
@ -301,22 +366,30 @@ public final class yacyClient {
"&query=" + wordhashes;
*/
final serverObjects obj = new serverObjects(9);
obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
obj.put("youare", targetPeer.hash);
obj.put("key", key);
obj.put("count", count);
obj.put("resource", ((global) ? "global" : "local"));
obj.put("query", wordhashes);
obj.put("ttl", "0");
obj.put("duetime", Long.toString(duetime));
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
obj.put("youare", targetPeer.hash);
obj.put("key", key);
obj.put("count", count);
obj.put("resource", ((global) ? "global" : "local"));
obj.put("query", wordhashes);
obj.put("ttl", "0");
obj.put("duetime", Long.toString(duetime));
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
//yacyCore.log.logDebug("yacyClient.search url=" + url);
final long timestamp = System.currentTimeMillis();
final HashMap result = nxTools.table(httpc.wput(new URL(url),
300000, null, null,
yacyCore.seedDB.sb.remoteProxyHost,
yacyCore.seedDB.sb.remoteProxyPort,
obj));
// sending request
final HashMap result = nxTools.table(
httpc.wput(
new URL(url),
300000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
obj
)
);
final long totalrequesttime = System.currentTimeMillis() - timestamp;
/*
@ -394,13 +467,20 @@ public final class yacyClient {
// ask for allowed message size and attachement size
// if this replies null, the peer does not answer
if (yacyCore.seedDB == null || yacyCore.seedDB.mySeed == null) { return null; }
final serverObjects post = new serverObjects(5);
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// adding all needed parameters
final String key = crypt.randomSalt();
post.put("key", key);
post.put("process", "permission");
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetHash);
post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
final serverObjects post = new serverObjects(5);
post.put("key", key);
post.put("process", "permission");
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetHash);
post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
// getting target address
String address;
if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) {
address = yacyCore.seedDB.mySeed.getAddress();
@ -412,10 +492,19 @@ public final class yacyClient {
//System.out.println("remote address: " + address);
}
if (address == null) { address = "localhost:8080"; }
// sending request
try {
return nxTools.table(httpc.wput(
new URL("http://" + address + "/yacy/message.html"),
8000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post));
return nxTools.table(
httpc.wput(
new URL("http://" + address + "/yacy/message.html"),
8000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
post
)
);
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logSevere("yacyClient.permissionMessage error:" + e.getMessage());
@ -425,15 +514,22 @@ public final class yacyClient {
public static HashMap postMessage(String targetHash, String subject, byte[] message) {
// this post a message to the remote message board
final serverObjects post = new serverObjects(7);
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// adding all needed parameters
final String key = crypt.randomSalt();
post.put("key", key);
post.put("process", "post");
post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
post.put("youare", targetHash);
post.put("subject", subject);
post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
post.put("message", new String(message));
final serverObjects post = new serverObjects(7);
post.put("key", key);
post.put("process", "post");
post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
post.put("youare", targetHash);
post.put("subject", subject);
post.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
post.put("message", new String(message));
// getting target address
String address;
if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) {
address = yacyCore.seedDB.mySeed.getAddress();
@ -441,11 +537,18 @@ public final class yacyClient {
address = yacyCore.seedDB.getConnected(targetHash).getAddress();
}
if (address == null) { address = "localhost:8080"; }
//System.out.println("DEBUG POST " + address + "/yacy/message.html" + post.toString());
// sending request
try {
final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/message.html"), 20000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
//System.out.println("V=" + v.toString());
final ArrayList v = httpc.wput(
new URL("http://" + address + "/yacy/message.html"),
20000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
post
);
return nxTools.table(v);
} catch (Exception e) {
yacyCore.log.logSevere("yacyClient.postMessage error:" + e.getMessage());
@ -459,9 +562,12 @@ public final class yacyClient {
if (yacyCore.seedDB.mySeed == null) { return null; }
if (yacyCore.seedDB.mySeed == targetSeed) { return null; }
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// construct request
final serverObjects post = new serverObjects(9);
final String key = crypt.randomSalt();
final serverObjects post = new serverObjects(9);
post.put("key", key);
post.put("process", "crawl");
post.put("iam", yacyCore.seedDB.mySeed.hash);
@ -472,12 +578,22 @@ public final class yacyClient {
post.put("depth", "0");
post.put("ttl", "0");
// determining target address
final String address = targetSeed.getAddress();
if (address == null) { return null; }
// sending request
try {
return nxTools.table(httpc.wput(
new URL("http://" + address + "/yacy/crawlOrder.html"),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post));
return nxTools.table(
httpc.wput(
new URL("http://" + address + "/yacy/crawlOrder.html"),
10000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
post
)
);
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logSevere("yacyClient.crawlOrder error: peer=" + targetSeed.getName() + ", error=" + e.getMessage());
@ -516,26 +632,38 @@ public final class yacyClient {
stale - the resource was reloaded but not processed because source had no changes
*/
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// construct request
final String key = crypt.randomSalt();
// determining target address
String address = targetSeed.getAddress();
if (address == null) { return null; }
// sending request
try {
return nxTools.table(httpc.wget(
new URL("http://" + address + "/yacy/crawlReceipt.html?" +
"iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + targetSeed.hash +
"&process=" + process +
"&key=" + key +
"&urlhash=" + ((entry == null) ? "" : entry.hash()) +
"&result=" + result +
"&reason=" + reason +
"&wordh=" + wordhashes +
"&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key))
),
60000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
return nxTools.table(
httpc.wget(
new URL("http://" + address + "/yacy/crawlReceipt.html" +
"?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + targetSeed.hash +
"&process=" + process +
"&key=" + key +
"&urlhash=" + ((entry == null) ? "" : entry.hash()) +
"&result=" + result +
"&reason=" + reason +
"&wordh=" + wordhashes +
"&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key))
),
60000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
)
);
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logSevere("yacyClient.crawlReceipt error:" + e.getMessage());
@ -549,24 +677,28 @@ public final class yacyClient {
*/
public static String transferIndex(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, HashMap urlCache, boolean gzipBody, int timeout) {
HashMap in = transferRWI(targetSeed, indexes, gzipBody, timeout);
if (in == null) { return "no_connection_1"; }
String result = (String) in.get("result");
if (result == null) { return "no_result_1"; }
if (!(result.equals("ok"))) return result;
// in now contains a list of unknown hashes
final String uhss = (String) in.get("unknownURL");
if (uhss == null) { return "no_unknownURL_tag_in_response"; }
if (uhss.length() == 0) { return null; } // all url's known, we are ready here
final String[] uhs = uhss.split(",");
// System.out.println("DEBUG yacyClient.transferIndex: " + uhs.length + " urls unknown");
if (uhs.length == 0) { return null; } // all url's known
// extract the urlCache from the result
plasmaCrawlLURL.Entry[] urls = new plasmaCrawlLURL.Entry[uhs.length];
for (int i = 0; i < uhs.length; i++) {
urls[i] = (plasmaCrawlLURL.Entry) urlCache.get(uhs[i]);
if (urls[i] == null) System.out.println("DEBUG transferIndex: error with requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'");
}
in = transferURL(targetSeed, urls, gzipBody, timeout);
if (in == null) { return "no_connection_2"; }
result = (String) in.get("result");
@ -574,12 +706,17 @@ public final class yacyClient {
if (!(result.equals("ok"))) { return result; }
// int doubleentries = Integer.parseInt((String) in.get("double"));
// System.out.println("DEBUG tansferIndex: transferred " + uhs.length + " URL's, double=" + doubleentries);
return null;
}
private static HashMap transferRWI(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, boolean gzipBody, int timeout) {
final String address = targetSeed.getAddress();
if (address == null) { return null; }
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// prepare post values
final serverObjects post = new serverObjects(7);
final String key = crypt.randomSalt();
@ -592,6 +729,8 @@ public final class yacyClient {
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetSeed.hash);
post.put("wordc", Integer.toString(indexes.length));
int indexcount = 0;
final StringBuffer entrypost = new StringBuffer(indexes.length*73);
Iterator eenum;
@ -618,8 +757,14 @@ public final class yacyClient {
post.put("entryc", Integer.toString(indexcount));
post.put("indexes", entrypost.toString());
try {
final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/transferRWI.html"), timeout, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
final ArrayList v = httpc.wput(
new URL("http://" + address + "/yacy/transferRWI.html"),
timeout,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
post
);
// this should return a list of urlhashes that are unknwon
if (v != null) {
yacyCore.seedDB.mySeed.incSI(indexcount);
@ -637,6 +782,10 @@ public final class yacyClient {
// this post a message to the remote message board
final String address = targetSeed.getAddress();
if (address == null) { return null; }
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// prepare post values
final serverObjects post = new serverObjects(5+urls.length);
final String key = crypt.randomSalt();
@ -662,8 +811,15 @@ public final class yacyClient {
}
post.put("urlc", Integer.toString(urlc));
try {
final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/transferURL.html"), timeout, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
final ArrayList v = httpc.wput(
new URL("http://" + address + "/yacy/transferURL.html"),
timeout,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
post
);
if (v != null) {
yacyCore.seedDB.mySeed.incSU(urlc);
}
@ -675,6 +831,10 @@ public final class yacyClient {
}
public static HashMap getProfile(yacySeed targetSeed) {
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
// this post a message to the remote message board
final serverObjects post = new serverObjects(2);
post.put("iam", yacyCore.seedDB.mySeed.hash);
@ -682,8 +842,15 @@ public final class yacyClient {
String address = targetSeed.getAddress();
if (address == null) { address = "localhost:8080"; }
try {
final ArrayList v = httpc.wput(new URL("http://" + address + "/yacy/profile.html"), 20000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
final ArrayList v = httpc.wput(
new URL("http://" + address + "/yacy/profile.html"),
20000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null,
post
);
return nxTools.table(v);
} catch (Exception e) {
yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage());
@ -701,14 +868,24 @@ public final class yacyClient {
final String wordhashe = plasmaWordIndexEntry.word2hash("test");
//System.out.println("permission=" + permissionMessage(args[1]));
final HashMap result = nxTools.table(httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/search.html?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
"&youare=" + target.hash + "&key=" +
"&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
"&count=10&resource=global" +
"&query=" + wordhashe),
5000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
// should we use the proxy?
boolean useProxy = (yacyCore.seedDB.sb.remoteProxyConfig != null) && (yacyCore.seedDB.sb.remoteProxyConfig.useProxy4Yacy());
final HashMap result = nxTools.table(
httpc.wget(
new URL("http://" + target.getAddress() + "/yacy/search.html" +
"?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
"&youare=" + target.hash + "&key=" +
"&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
"&count=10" +
"&resource=global" +
"&query=" + wordhashe),
5000,
null,
null,
(useProxy)?yacyCore.seedDB.sb.remoteProxyConfig:null
)
);
System.out.println("Result=" + result.toString());
} catch (Exception e) {
e.printStackTrace();

@ -161,14 +161,14 @@ public class yacyPeerActions {
// load the seed list
try {
url = new URL(seedListFileURL);
header = httpc.whead(url, 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort);
header = httpc.whead(url, 5000, null, null, this.sb.remoteProxyConfig);
if ((header == null) || (header.lastModified() == null)) {
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available");
} else if ((header.age() > 86400000) && (ssc > 0)) {
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
} else {
ssc++;
seedList = httpc.wget(url, 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort);
seedList = httpc.wget(url, 5000, null, null, this.sb.remoteProxyConfig);
enu = seedList.iterator();
lc = 0;
while (enu.hasNext()) {
@ -221,7 +221,7 @@ public class yacyPeerActions {
// read in remote file from url
try {
ArrayList remote = httpc.wget(new URL(url), 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort);
ArrayList remote = httpc.wget(new URL(url), 5000, null, null, this.sb.remoteProxyConfig);
if ((remote != null) && (remote.size() > 0)) {
Iterator e = remote.iterator();
while (e.hasNext()) {

@ -708,7 +708,14 @@ public final class yacySeedDB {
private boolean checkCache(ArrayList uv, URL seedURL) throws IOException {
// check if the result can be retrieved again
ArrayList check = httpc.wget(seedURL, 10000, null, null, sb.remoteProxyHost, sb.remoteProxyPort);
// TODO: should we check the useProxy4Yacy option here???
ArrayList check = httpc.wget(
seedURL,
10000,
null,
null,
sb.remoteProxyConfig
);
if (check == null) {
serverLog.logFine("YACY","SaveSeedList: Testing download failed ...");

@ -456,7 +456,7 @@ public final class yacy {
server.terminate(false);
server.interrupt();
if (server.isAlive()) try {
httpc.wget(new URL("http://localhost:" + port), 1000, null, null, null, 0); // kick server
httpc.wget(new URL("http://localhost:" + port), 1000, null, null, null); // kick server
serverLog.logConfig("SHUTDOWN", "sent termination signal to server socket");
} catch (IOException ee) {
serverLog.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)");

@ -142,11 +142,16 @@ plasmaBlueList=yacy.blue
# if you wish to do that, specify it here
# if you want to switch on the proxy use, set remoteProxyUse=true
# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy
remoteProxyUse=false
remoteProxyUse4Yacy=true
remoteProxyUse4SSL=true
remoteProxyHost=192.168.2.2
remoteProxyPort=4239
remoteProxyUser=
remoteProxyPwd=
remoteProxyNoProxy=192.*,10.*,127.*,localhost
remoteProxyUse=false
#remoteProxyUse=true
# the proxy may filter the content of transferred web pages
# the bluelist removes specific keywords from web pages

Loading…
Cancel
Save