*) Connection Tracking

- adding automatic refresh
   - accepts new parameter nameLookup which can be used to deactivate 
     yacy-peer name lookup (because we have problems with this on large seed-dbs)

*) ViewFile
   New page that can be used to view 
   - original content 
   - plain text content 
   - parsed content
   - parsed sentences 
   of a webpage specified by there url hash
   Mainly for debugging purpose at the moment

*) Robots.txt 
   Bugfix for if-modified-since usage
   TODO: synchronization of downloads to avoid loading the same robots-file 
   multiple times in parallel by different threads

*) Shutdown
   Better abortion of transferRWI and transferURL sessions on server shutdown

*) Status Page
   Adding icon to start/stop crawling via status page

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@950 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent bcb0d6d5ff
commit 40777556c5

@ -3,6 +3,7 @@
<head>
<title>YaCy '#[clientname]#': Connection Tracking</title>
#[metas]#
<meta http-equiv="REFRESH" content="30">
</head>
<body marginheight="0" marginwidth="0" leftmargin="0" topmargin="0">
#[header]#

@ -64,18 +64,29 @@ import de.anomic.server.serverCore.Session;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
public class Connections_p {
public final class Connections_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) sb;
serverObjects prop = new serverObjects();
// determines if name lookup should be done or not
boolean doNameLookup = true;
if (post.containsKey("nameLookup") && post.get("nameLookup","true").equals("false")) {
doNameLookup = false;
}
// getting the virtualHost string
String virtualHost = switchboard.getConfig("fileHost","localhost");
// getting the serverCore thread
serverThread httpd = switchboard.getThread("10_httpd");
// getting the session threadgroup
ThreadGroup httpSessions = ((serverCore)httpd).getSessionThreadGroup();
// getting the server core pool configuration
GenericObjectPool.Config httpdPoolConfig = ((serverCore)httpd).getPoolConfig();
/* waiting for all threads to finish */
@ -122,11 +133,14 @@ public class Connections_p {
// determining if the source is a yacy host
yacySeed seed = yacyCore.seedDB.lookupByIP(userAddress,true,false,false);
if (seed != null) {
if ((seed.hash == yacyCore.seedDB.mySeed.hash) &&
(!seed.get(yacySeed.PORT,"").equals(Integer.toString(userPort)))) {
seed = null;
yacySeed seed = null;
if (doNameLookup) {
seed = yacyCore.seedDB.lookupByIP(userAddress,true,false,false);
if (seed != null) {
if ((seed.hash.equals(yacyCore.seedDB.mySeed.hash)) &&
(!seed.get(yacySeed.PORT,"").equals(Integer.toString(userPort)))) {
seed = null;
}
}
}

@ -166,7 +166,7 @@ public class IndexCreateIndexingQueue_p {
} catch (IOException e) {}
prop.put("indexing-queue_num", entryCount);//num entries in queue
prop.put("indexing-queue_totalSize", Status.bytesToString(totalSize));//num entries in queue
prop.put("indexing-queue_totalSize", bytesToString(totalSize));//num entries in queue
prop.put("indexing-queue_list", entryCount);
}

@ -96,6 +96,7 @@ You are in <i>permanent</i> mode. Attention: If you don't have a flatrate or are
<!-- </td></tr></table> -->
</p>
#%[privateStatusTable]%#
<p><font size="-3"><i>Last Refresh:</i> #[date]#</font></p>
#[footer]#
</body>

@ -48,6 +48,7 @@
import java.lang.Math;
import java.text.DecimalFormat;
import java.util.Date;
import java.io.File;
import de.anomic.http.httpHeader;
@ -71,6 +72,16 @@ public class Status {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
if (post != null) {
if (post.containsKey("pausecrawlqueue")) {
((plasmaSwitchboard)env).pauseCrawling();
} else if (post.containsKey("continuecrawlqueue")) {
((plasmaSwitchboard)env).continueCrawling();
}
prop.put("LOCATION","");
return prop;
}
/*
versionProbe=http://www.anomic.de/AnomicHTTPProxy/release.txt
superseedFile=superseed.txt
@ -252,6 +263,7 @@ public class Status {
// return rewrite properties
prop.put("date",(new Date()).toString());
return prop;
}

@ -70,7 +70,7 @@
</tr>
<tr class="TableCellLight">
<td>Loader Queue</td>
<td>#[loaderQueueSize]# | #[loaderQueueMax]# #(loaderPaused)#::(paused)#(/loaderPaused)#</td>
<td>#[loaderQueueSize]# | #[loaderQueueMax]# #(loaderPaused)#::(paused)#(/loaderPaused)#&nbsp;<a href="Status.html?#(loaderPaused)#pausecrawlqueue::continuecrawlqueue#(/loaderPaused)#=" title="#(loaderPaused)#pause crawling::continue crawling#(/loaderPaused)#"><img src="env/grafics/#(loaderPaused)#stop.gif::start.gif#(/loaderPaused)#" border="0"></a></td>
<td>[<a href="IndexCreateLoaderQueue_p.html">Details</a>]</td>
</tr>
<tr class="TableCellDark">

@ -0,0 +1,85 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head>
<title>YaCy '#[clientname]#': View URL Content</title>
#[metas]#
</head>
<body marginheight="0" marginwidth="0" leftmargin="0" topmargin="0">
#[header]#
<br><br>
<h2>View URL Content</h2>
<p><font color="red">
#(error)#
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader">
<td>URL</td>
<td><a href="#[url]#">#[url]#</a></td>
</tr>
<tr class="TableCellDark">
<td>Hash</td>
<td><tt>#[hash]#</tt></td>
</tr>
<tr class="TableCellLight">
<td>Word Count</td>
<td><tt>#[wordCount]#</tt></td>
</tr>
<tr class="TableCellDark">
<td>Description</td>
<td><tt>#[desc]#</tt></td>
</tr>
<tr class="TableCellLight">
<td>Size</td>
<td><tt>#[size]#</tt></td>
</tr>
<tr class="TableCellDark">
<td>View as:</td>
<td>
<a href="?urlHash=#[hash]#&viewMode=iframe">Original</a> |
<a href="?urlHash=#[hash]#&viewMode=plain">Plain Text</a> |
<a href="?urlHash=#[hash]#&viewMode=parsed">Parsed Text</a> |
<a href="?urlHash=#[hash]#&viewMode=sentences">Parsed Sentences</a>
</td>
</tr>
</table>
:: <!-- 1 -->
No URL hash submitted.
:: <!-- 2 -->
Unable to find URL Entry in DB
:: <!-- 3 -->
Invalid URL
:: <!-- 4 -->
Unable to download resource content.
:: <!-- 5 -->
Unable to parse resource content.
#(/error)#
</font>
</p>
<p>
#(viewMode)#
:: <!-- 1 -->
<h3>Plain Resource Content</h3><br>
<tt>#[plainText]#</tt>
:: <!-- 2 -->
<h3>Parsed Resource Content</h3><br>
<tt>#[parsedText]#</tt>
:: <!-- 3 -->
<h3>Parsed Resource Sentences</h3><br>
<table border="0" cellpadding="2" cellspacing="1">
#{sentences}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#" title="#[sessionName]#">
<td>#[nr]#</td>
<td><tt>#[text]#</tt></td>
</tr>
#{/sentences}#
</table>
:: <!-- 4 -->
<h3>Original Resource Content</h3><br>
<iframe src="#[url]#" width="800" height="400">
</iframe>
#(/viewMode)#
</p>
#[footer]#
</body>
</html>

@ -0,0 +1,173 @@
//ViewFile.java
//-----------------------
//part of YaCy
//(C) by Michael Peter Christen; mc@anomic.de
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//
//last major change: 12.07.2004
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//Using this software in any meaning (reading, learning, copying, compiling,
//running) means that you agree that the Author(s) is (are) not responsible
//for cost, loss of data or any harm that may be caused directly or indirectly
//by usage of this softare or this documentation. The usage of this software
//is on your own risk. The installation and usage (starting/running) of this
//software may allow other people or application to access your computer and
//any attached devices and is highly dependent on the configuration of the
//software which must be done by the user of the software; the author(s) is
//(are) also not responsible for proper configuration and usage of the
//software, even if provoked by documentation provided together with
//the software.
//
//Any changes to this file according to the GPL as documented in the file
//gpl.txt aside this file in the shipment you received can be done to the
//lines that follows this copyright notice here, but changes must not be
//done inside the copyright notive above. A re-distribution must contain
//the intact and unchanged copyright notice.
//Contributions and changes to the program code must be marked as such.
//you must compile this file with
//javac -classpath .:../Classes Status.java
//if the shell's current path is HTROOT
import java.io.IOException;
import java.net.URL;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaCrawlLURL.Entry;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class ViewFile {
public static final int VIEW_MODE_NO_TEXT = 0;
public static final int VIEW_MODE_AS_PLAIN_TEXT = 1;
public static final int VIEW_MODE_AS_PARSED_TEXT = 2;
public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3;
public static final int VIEW_MODE_AS_IFRAME = 4;
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
serverObjects prop = new serverObjects();
plasmaSwitchboard sb = (plasmaSwitchboard)env;
if (post != null) {
// getting the url hash from which the content should be loaded
String urlHash = post.get("urlHash","");
if (urlHash.equals("")) {
prop.put("error",1);
prop.put("viewMode",VIEW_MODE_NO_TEXT);
return prop;
}
String viewMode = post.get("viewMode","plain");
// getting the urlEntry that belongs to the url hash
Entry urlEntry = sb.urlPool.loadedURL.getEntry(urlHash);
if (urlEntry == null) {
prop.put("error",2);
prop.put("viewMode",VIEW_MODE_NO_TEXT);
return prop;
}
// gettin the url that belongs to the entry
URL url = urlEntry.url();
if (url == null) {
prop.put("error",3);
prop.put("viewMode",VIEW_MODE_NO_TEXT);
return prop;
}
// loading the resource content as byte array
byte[] resource = null;
try {
resource = sb.cacheManager.loadResource(url);
if (resource == null) {
sb.snippetCache.loadResourceFromWeb(url, 5000);
resource = sb.cacheManager.loadResource(url);
if (resource == null) {
prop.put("error",4);
prop.put("viewMode",VIEW_MODE_NO_TEXT);
return prop;
}
}
} catch (IOException e) {
if (url == null) {
prop.put("error",4);
prop.put("viewMode",VIEW_MODE_NO_TEXT);
return prop;
}
}
if (viewMode.equals("plain")) {
String content = new String(resource);
content = content.replaceAll("<","&lt;")
.replaceAll(">","&gt;")
.replaceAll("\"","&quot;")
.replaceAll("\n","<br>")
.replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
prop.put("error",0);
prop.put("viewMode",VIEW_MODE_AS_PLAIN_TEXT);
prop.put("viewMode_plainText",content);
} else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("iframe")) {
// parsing the resource content
plasmaParserDocument document = sb.snippetCache.parseDocument(url, resource);
if (document == null) {
prop.put("error",5);
prop.put("viewMode",VIEW_MODE_NO_TEXT);
return prop;
}
if (viewMode.equals("parsed")) {
String content = new String(document.getText());
content = content.replaceAll("\n","<br>")
.replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
prop.put("viewMode",VIEW_MODE_AS_PARSED_TEXT);
prop.put("viewMode_parsedText",content);
} else if (viewMode.equals("iframe")) {
prop.put("viewMode",VIEW_MODE_AS_IFRAME);
prop.put("viewMode_url",url.toString());
} else {
prop.put("viewMode",VIEW_MODE_AS_PARSED_SENTENCES);
String[] sentences = document.getSentences();
boolean dark = true;
for (int i=0; i < sentences.length; i++) {
prop.put("viewMode_sentences_" + i + "_nr",Integer.toString(i+1));
prop.put("viewMode_sentences_" + i + "_text",sentences[i]);
prop.put("viewMode_sentences_" + i + "_dark",((dark) ? 1 : 0) ); dark=!dark;
}
prop.put("viewMode_sentences",sentences.length);
}
}
prop.put("error",0);
prop.put("error_url",url.toString());
prop.put("error_hash",urlHash);
prop.put("error_wordCount",Integer.toString(urlEntry.wordCount()));
prop.put("error_desc",urlEntry.descr());
prop.put("error_size",urlEntry.size());
}
return prop;
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 B

@ -101,7 +101,7 @@ from 'late' peers.
<p><b>#[description]#</b><br>
#(snippet)#::<i>#[text]#</i><br>#(/snippet)#
<a href="#[url]#">#[urlname]#</a><br>
#[date]#<br></p>
#[date]# | <a href="ViewFile.html?urlHash=#[urlhash]#">Info</a><br></p>
<!-- link end -->
#{/results}#

@ -61,7 +61,7 @@ import de.anomic.yacy.yacyVersion;
public final class hello {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) throws InterruptedException {
if (post == null || ss == null || yacyCore.seedDB == null || yacyCore.seedDB.mySeed == null) { return null; }
// return variable that accumulates replacements
@ -71,9 +71,9 @@ public final class hello {
// final String iam = (String) post.get("iam", ""); // complete seed of the requesting peer
// final String pattern = (String) post.get("pattern", ""); //
// final String mytime = (String) post.get(MYTIME, ""); //
final String key = (String) post.get("key", ""); // transmission key for response
final String seed = (String) post.get(yacySeed.SEED, "");
final String countStr = (String) post.get("count", "0");
final String key = post.get("key", ""); // transmission key for response
final String seed = post.get(yacySeed.SEED, "");
final String countStr = post.get("count", "0");
int i;
int count = 0;
try {count = (countStr == null) ? 0 : Integer.parseInt(countStr);} catch (NumberFormatException e) {count = 0;}
@ -93,7 +93,9 @@ public final class hello {
// if the remote client has reported its own IP address and the client supports
// the port forwarding feature (if client version >= 0.383) then we try to
// connect to the reported IP address first
if (reportedip.length() > 0 && !clientip.equals(reportedip) && clientversion >= yacyVersion.YACY_SUPPORTS_PORT_FORWARDING) {
if (reportedip.length() > 0 && !clientip.equals(reportedip) && clientversion >= yacyVersion.YACY_SUPPORTS_PORT_FORWARDING) {
serverCore.checkInterruption();
// try first the reportedip, since this may be a connect from a port-forwarding host
prop.put(yacySeed.YOURIP, reportedip);
remoteSeed.put(yacySeed.IP, reportedip);
@ -123,6 +125,8 @@ public final class hello {
// we are only allowed to connect to the client IP address if it's not our own address
if (!isLocalIP) {
serverCore.checkInterruption();
prop.put(yacySeed.YOURIP, clientip);
remoteSeed.put(yacySeed.IP, clientip);
urls = yacyClient.queryUrlCount(remoteSeed);
@ -162,6 +166,7 @@ public final class hello {
"' to '" + prop.get(yacySeed.YOURTYPE) + "'.");
}
serverCore.checkInterruption();
final StringBuffer seeds = new StringBuffer(768);
// attach some more seeds, as requested
if ((yacyCore.seedDB != null) && (yacyCore.seedDB.sizeConnected() > 0)) {

@ -46,13 +46,15 @@
// javac -classpath .:../classes transferRWI.java
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndexEntry;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
@ -61,7 +63,7 @@ import de.anomic.yacy.yacyDHTAction;
public final class transferRWI {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) throws InterruptedException {
if (post == null || ss == null) { return null; }
long start = System.currentTimeMillis();
@ -77,7 +79,7 @@ public final class transferRWI {
// final String key = (String) post.get("key", ""); // transmission key
final int wordc = Integer.parseInt((String) post.get("wordc", "")); // number of different words
final int entryc = Integer.parseInt((String) post.get("entryc", "")); // number of entries in indexes
final byte[] indexes = ((String) post.get("indexes", "")).getBytes(); // the indexes, as list of word entries
byte[] indexes = ((String) post.get("indexes", "")).getBytes(); // the indexes, as list of word entries
final boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true");
// response values
@ -93,7 +95,7 @@ public final class transferRWI {
final long startProcess = System.currentTimeMillis();
// decode request
ArrayList v = new ArrayList();
final LinkedList v = new LinkedList();
int s = 0;
int e;
while (s < indexes.length) {
@ -101,6 +103,9 @@ public final class transferRWI {
if ((e - s) > 0) v.add(new String(indexes, s, e - s));
s = e; while (s < indexes.length) if (indexes[s++] >= 32) {s--; break;}
}
// free memory
indexes = null;
// the value-vector should now have the same length as entryc
if (v.size() != entryc) sb.getLog().logSevere("ERROR WITH ENTRY COUNTER: v=" + v.size() + ", entryc=" + entryc);
@ -114,13 +119,17 @@ public final class transferRWI {
String[] wordhashes = new String[v.size()];
int received = 0;
for (int i = 0; i < v.size(); i++) {
estring = (String) v.get(i);
serverCore.checkInterruption();
estring = (String) v.removeFirst();
p = estring.indexOf("{");
if (p > 0) {
wordHash = estring.substring(0, p);
wordhashes[i] = wordHash;
entry = new plasmaWordIndexEntry(estring.substring(p));
sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), true);
serverCore.checkInterruption();
urlHash = entry.getUrlHash();
if ((!(unknownURL.contains(urlHash))) &&
(!(sb.urlPool.loadedURL.exists(urlHash)))) {
@ -155,5 +164,4 @@ public final class transferRWI {
// return rewrite properties
return prop;
}
}

@ -48,6 +48,7 @@
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
@ -55,7 +56,7 @@ import de.anomic.yacy.yacySeed;
public final class transferURL {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) throws InterruptedException {
if (post == null || ss == null) { return null; }
long start = System.currentTimeMillis();
@ -69,7 +70,7 @@ public final class transferURL {
final String iam = (String) post.get("iam", ""); // seed hash of requester
// final String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability
// final String key = (String) post.get("key", ""); // transmission key
final int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls
final int urlc = Integer.parseInt(post.get("urlc", "")); // number of transported urls
final boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true");
final boolean blockBlacklist = sb.getConfig("indexReceiveBlockBlacklist", "false").equals("true");
@ -87,13 +88,14 @@ public final class transferURL {
String urls;
plasmaCrawlLURL.Entry lEntry;
for (int i = 0; i < urlc; i++) {
serverCore.checkInterruption();
urls = (String) post.get("url" + i);
if (urls == null) {
yacyCore.log.logFine("transferURL: got null URL-string from peer " + otherPeerName);
} else {
lEntry = sb.urlPool.loadedURL.newEntry(urls, true);
if (lEntry != null && blockBlacklist &&
sb.urlBlacklist.isListed(lEntry.url().getHost().toLowerCase(), lEntry.url().getPath())) {
plasmaSwitchboard.urlBlacklist.isListed(lEntry.url().getHost().toLowerCase(), lEntry.url().getPath())) {
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName);
lEntry = null;
}
@ -122,5 +124,4 @@ public final class transferURL {
prop.put("result", result);
return prop;
}
}

@ -197,7 +197,10 @@ public final class robotsParser{
robotsTxt = (byte[])result[1];
eTag = (String) result[2];
modDate = (Date) result[3];
}
} else if (robotsTxt4Host != null) {
robotsTxt4Host.setLoadedDate(new Date());
plasmaSwitchboard.robots.addEntry(robotsTxt4Host);
}
} catch (Exception e) {
serverLog.logSevere("ROBOTS","Unable to download the robots.txt file from URL '" + robotsURL + "'. " + e.getMessage());
}
@ -218,7 +221,7 @@ public final class robotsParser{
// storing the data into the robots DB
robotsTxt4Host = plasmaSwitchboard.robots.addEntry(urlHostPort,denyPath,new Date(),modDate,eTag);
}
}
}
if (robotsTxt4Host.isDisallowed(nexturl.getPath())) {
@ -229,7 +232,7 @@ public final class robotsParser{
private static Object[] downloadRobotsTxt(URL robotsURL, int redirectionCount, plasmaCrawlRobotsTxt.Entry entry) throws Exception {
if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null};
if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null};
redirectionCount--;
boolean accessCompletelyRestricted = false;
@ -253,7 +256,7 @@ public final class robotsParser{
oldEtag = entry.getETag();
reqHeaders = new httpHeader();
Date modDate = entry.getModDate();
if (modDate != null) reqHeaders.put(httpHeader.IF_MODIFIED_SINCE,entry.getModDate());
if (modDate != null) reqHeaders.put(httpHeader.IF_MODIFIED_SINCE,httpc.dateString(entry.getModDate()));
}
httpc.response res = con.GET(robotsURL.getPath(), reqHeaders);

@ -100,7 +100,7 @@ public class wikiCode {
}
}
public String replaceHTML(String result) {
public static String replaceHTML(String result) {
int p0;
// avoide html inside

@ -80,6 +80,13 @@ import de.anomic.yacy.yacyCore;
public final class httpHeader extends TreeMap implements Map {
/* =============================================================
* Constants defining http versions
* ============================================================= */
public static final String HTTP_VERSION_0_9 = "HTTP/0.9";
public static final String HTTP_VERSION_1_0 = "HTTP/1.0";
public static final String HTTP_VERSION_1_1 = "HTTP/1.1";
/* =============================================================
* Constants defining http header names
* ============================================================= */

@ -1222,12 +1222,13 @@ public final class httpd implements serverHandler {
if (respond == null) throw new NullPointerException("The outputstream must not be null.");
if (conProp == null) throw new NullPointerException("The connection property structure must not be null.");
if (httpVersion == null) httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER,"HTTP/1.1");
if (header == null) header = new httpHeader();
try {
if ((httpStatusText == null)||(httpStatusText.length()==0)) {
if (httpVersion.equals("HTTP/1.0") && httpHeader.http1_0.containsKey(Integer.toString(httpStatusCode)))
if (httpVersion.equals(httpHeader.HTTP_VERSION_1_0) && httpHeader.http1_0.containsKey(Integer.toString(httpStatusCode)))
httpStatusText = (String) httpHeader.http1_0.get(Integer.toString(httpStatusCode));
else if (httpVersion.equals("HTTP/1.1") && httpHeader.http1_1.containsKey(Integer.toString(httpStatusCode)))
else if (httpVersion.equals(httpHeader.HTTP_VERSION_1_1) && httpHeader.http1_1.containsKey(Integer.toString(httpStatusCode)))
httpStatusText = (String) httpHeader.http1_1.get(Integer.toString(httpStatusCode));
else httpStatusText = "Unknown";
}
@ -1389,45 +1390,5 @@ public final class httpd implements serverHandler {
}
} catch (Exception e) {}
return false;
}
// public static boolean isTextMime(String mime, Set whitelist) {
// if (whitelist.contains(mime)) return true;
// // some mime-types are given as "text/html; charset=...", so look for ";"
// if (mime.length() == 0) return false;
// int pos = mime.indexOf(';');
// if (pos < 0) return false;
// return whitelist.contains(mime.substring(0, pos));
// }
}
}
/*
###
### Messages of the Server
###
# success Messages
HTTPStatus200 = OK; The URL was found. It contents follows.
HTTPStatus201 = Created; A URL was created in response to a POST.
HTTPStatus202 = Accepted; The request was accepted for processing later.
HTTPStatus203 = Non-Authoritative; The information here is unofficial.
HTTPStatus204 = No Response; The request is successful, but there is no data to send.
# redirection
HTTPStatus300 = Moved; The URL has permanently moved to a new location.
HTTPStatus301 = Found; The URL can be temporarily found at a new location.
# client errors
HTTPStatus400 = Bad Request; Syntax error in the request.
HTTPStatus401 = Unauthorized; The client is not authorized to access this web page.
HTTPStatus402 = Payment Required; A payment is required to access this web page.
HTTPStatus403 = Forbidden; This URL is forbidden. No authorization is required, it won't help.
HTTPStatus404 = Not Found; This page is not on the server.
# server errors
HTTPStatus500 = Internal Error; The server encountered an unexpected error.
HTTPStatus501 = Not Implemented; The client requested an unimplemented feature.
HTTPStatus502 = Service Overloaded; The server reached the maximum number of connections.
HTTPStatus503 = Gateway timeout; Fetching data from remote service failed.
*/

@ -534,6 +534,10 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
tp.put("clientname", switchboard.getConfig("peerName", "anomic"));
//System.out.println("respond props: " + ((tp == null) ? "null" : tp.toString())); // debug
} catch (InvocationTargetException e) {
if (e.getCause() instanceof InterruptedException) {
throw new InterruptedException(e.getCause().getMessage());
}
this.theLogger.logSevere("INTERNAL ERROR: " + e.toString() + ":" +
e.getMessage() +
" target exception at " + targetClass + ": " +

@ -60,7 +60,7 @@ import de.anomic.kelondro.kelondroException;
import de.anomic.server.logging.serverLog;
public class plasmaCrawlRobotsTxt {
private kelondroMap robotsTable;
kelondroMap robotsTable;
private final File robotsTableFile;
private int bufferkb;
@ -221,6 +221,12 @@ public class plasmaCrawlRobotsTxt {
return null;
}
public void setLoadedDate(Date newLoadedDate) {
if (newLoadedDate != null) {
this.mem.put(LOADED_DATE,Long.toString(newLoadedDate.getTime()));
}
}
public Date getModDate() {
if (this.mem.containsKey(MOD_DATE)) {
return new Date(Long.valueOf((String) this.mem.get(MOD_DATE)).longValue());

@ -84,7 +84,7 @@ public final class plasmaHTCache {
public long currCacheSize;
public long maxCacheSize;
public final File cachePath;
public static serverLog log;
public final serverLog log;
public static final HashSet filesInUse = new HashSet(); // can we delete this file
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) {
@ -100,33 +100,33 @@ public final class plasmaHTCache {
}
if (!(htCachePath.isDirectory())) {
// if the cache does not exists or is a file and not a directory, panic
log.logSevere("the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created");
this.log.logSevere("the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created");
System.exit(0);
}
// open the response header database
File dbfile = new File(cachePath, "responseHeader.db");
File dbfile = new File(this.cachePath, "responseHeader.db");
try {
if (dbfile.exists())
responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400));
this.responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400));
else
responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, plasmaCrawlLURL.urlHashLength, 150));
this.responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, plasmaURL.urlHashLength, 150));
} catch (IOException e) {
log.logSevere("the request header database could not be opened: " + e.getMessage());
this.log.logSevere("the request header database could not be opened: " + e.getMessage());
System.exit(0);
}
// init stack
cacheStack = new LinkedList();
this.cacheStack = new LinkedList();
// init cache age and size management
cacheAge = new TreeMap();
currCacheSize = 0;
this.cacheAge = new TreeMap();
this.currCacheSize = 0;
this.maxCacheSize = maxCacheSize;
// start the cache startup thread
// this will collect information about the current cache size and elements
serverInstantThread.oneTimeJob(this, "cacheScan", log, 5000);
serverInstantThread.oneTimeJob(this, "cacheScan", this.log, 5000);
}
public int size() {
@ -136,15 +136,15 @@ public final class plasmaHTCache {
}
public int dbSize() {
return responseHeaderDB.size();
return this.responseHeaderDB.size();
}
public int[] dbCacheChunkSize() {
return responseHeaderDB.cacheChunkSize();
return this.responseHeaderDB.cacheChunkSize();
}
public int[] dbCacheFillStatus() {
return responseHeaderDB.cacheFillStatus();
return this.responseHeaderDB.cacheFillStatus();
}
public void push(Entry entry) {
@ -157,17 +157,16 @@ public final class plasmaHTCache {
synchronized (this.cacheStack) {
if (this.cacheStack.size() > 0)
return (Entry) this.cacheStack.removeFirst();
else
return null;
return null;
}
}
public void storeHeader(String urlHash, httpHeader responseHeader) throws IOException {
responseHeaderDB.set(urlHash, responseHeader);
this.responseHeaderDB.set(urlHash, responseHeader);
}
public long getFreeSize() {
return (currCacheSize > maxCacheSize) ? 0 : maxCacheSize - currCacheSize;
return (this.currCacheSize > this.maxCacheSize) ? 0 : this.maxCacheSize - this.currCacheSize;
}
public boolean writeFile(URL url, byte[] array) {
@ -181,10 +180,10 @@ public final class plasmaHTCache {
// this is the case of a "(Not a directory)" error, which should be prohibited
// by the shallStoreCache() property. However, sometimes the error still occurs
// In this case do nothing.
log.logSevere("File storage failed (not a directory): " + e.getMessage());
this.log.logSevere("File storage failed (not a directory): " + e.getMessage());
return false;
} catch (IOException e) {
log.logSevere("File storage failed (IO error): " + e.getMessage());
this.log.logSevere("File storage failed (IO error): " + e.getMessage());
return false;
}
writeFileAnnouncement(file);
@ -192,10 +191,10 @@ public final class plasmaHTCache {
}
public void writeFileAnnouncement(File file) {
synchronized (cacheAge) {
synchronized (this.cacheAge) {
if (file.exists()) {
currCacheSize += file.length();
cacheAge.put(ageString(file.lastModified(), file), file);
this.currCacheSize += file.length();
this.cacheAge.put(ageString(file.lastModified(), file), file);
cleanup();
}
}
@ -209,22 +208,21 @@ public final class plasmaHTCache {
if (deleteFileandDirs(getCachePath(url), msg)) {
try {
// As the file is gone, the entry in responseHeader.db is not needed anymore
log.logFinest("Trying to remove responseHeader from URL: " + url.toString());
responseHeaderDB.remove(plasmaURL.urlHash(url));
this.log.logFinest("Trying to remove responseHeader from URL: " + url.toString());
this.responseHeaderDB.remove(plasmaURL.urlHash(url));
} catch (IOException e) {
log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e);
this.log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e);
}
return true;
} else {
return false;
}
return false;
}
private boolean deleteFile(File obj) {
if (obj.exists() && !filesInUse.contains(obj)) {
long size = obj.length();
if (obj.delete()) {
currCacheSize -= size;
this.currCacheSize -= size;
return true;
}
}
@ -233,39 +231,38 @@ public final class plasmaHTCache {
private boolean deleteFileandDirs (File obj, String msg) {
if (deleteFile(obj)) {
log.logInfo("DELETED " + msg + " CACHE : " + obj.toString());
this.log.logInfo("DELETED " + msg + " CACHE : " + obj.toString());
obj = obj.getParentFile();
// If the has been emptied, remove it
// Loop as long as we produce empty driectoriers, but stop at HTCACHE
while ((!(obj.equals(cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) {
if (obj.delete()) log.logInfo("DELETED EMPTY DIRECTORY : " + obj.toString());
while ((!(obj.equals(this.cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) {
if (obj.delete()) this.log.logInfo("DELETED EMPTY DIRECTORY : " + obj.toString());
obj = obj.getParentFile();
}
return true;
} else {
return false;
}
return false;
}
private void cleanupDoIt(long newCacheSize) {
if (cacheAge.size() == 0) return;
if (this.cacheAge.size() == 0) return;
File obj;
Iterator iter = cacheAge.keySet().iterator();
while (iter.hasNext() && (currCacheSize >= newCacheSize)) {
Iterator iter = this.cacheAge.keySet().iterator();
while (iter.hasNext() && (this.currCacheSize >= newCacheSize)) {
Object key = iter.next();
obj = (File) cacheAge.get(key);
obj = (File) this.cacheAge.get(key);
if (obj != null) {
if (filesInUse.contains(obj)) continue;
log.logFinest("Trying to delete old file: " + obj.toString());
this.log.logFinest("Trying to delete old file: " + obj.toString());
if (deleteFileandDirs (obj, "OLD")) {
try {
// As the file is gone, the entry in responseHeader.db is not needed anymore
log.logFinest("Trying to remove responseHeader for URL: " +
getURL(cachePath ,obj).toString());
responseHeaderDB.remove(plasmaURL.urlHash(getURL(cachePath ,obj)));
this.log.logFinest("Trying to remove responseHeader for URL: " +
getURL(this.cachePath ,obj).toString());
this.responseHeaderDB.remove(plasmaURL.urlHash(getURL(this.cachePath ,obj)));
} catch (IOException e) {
log.logInfo("IOExeption removing response header from DB: " +
this.log.logInfo("IOExeption removing response header from DB: " +
e.getMessage(), e);
}
}
@ -275,13 +272,13 @@ public final class plasmaHTCache {
private void cleanup() {
// clean up cache to have 4% (enough) space for next entries
if ((currCacheSize >= maxCacheSize) && (cacheAge.size() > 0)) {
if (maxCacheSize > 0) cleanupDoIt(maxCacheSize - ((maxCacheSize / 100) * 4));
if ((this.currCacheSize >= this.maxCacheSize) && (this.cacheAge.size() > 0)) {
if (this.maxCacheSize > 0) cleanupDoIt(this.maxCacheSize - ((this.maxCacheSize / 100) * 4));
}
}
public void close() throws IOException {
responseHeaderDB.close();
this.responseHeaderDB.close();
}
private String ageString(long date, File f) {
@ -299,7 +296,7 @@ public final class plasmaHTCache {
//log.logSystem("STARTING CACHE SCANNING");
kelondroMScoreCluster doms = new kelondroMScoreCluster();
int c = 0;
enumerateFiles ef = new enumerateFiles(cachePath, true, false, true, true);
enumerateFiles ef = new enumerateFiles(this.cachePath, true, false, true, true);
File f;
while (ef.hasMoreElements()) {
c++;
@ -307,19 +304,19 @@ public final class plasmaHTCache {
long d = f.lastModified();
//System.out.println("Cache: " + dom(f));
doms.incScore(dom(f));
currCacheSize += f.length();
cacheAge.put(ageString(d, f), f);
this.currCacheSize += f.length();
this.cacheAge.put(ageString(d, f), f);
}
//System.out.println("%" + (String) cacheAge.firstKey() + "=" + cacheAge.get(cacheAge.firstKey()));
long ageHours = 0;
try {
ageHours = (System.currentTimeMillis() -
Long.parseLong(((String) cacheAge.firstKey()).substring(0, 16), 16)) / 3600000;
Long.parseLong(((String) this.cacheAge.firstKey()).substring(0, 16), 16)) / 3600000;
} catch (NumberFormatException e) {
//e.printStackTrace();
}
log.logConfig("CACHE SCANNED, CONTAINS " + c +
" FILES = " + currCacheSize/1048576 + "MB, OLDEST IS " +
this.log.logConfig("CACHE SCANNED, CONTAINS " + c +
" FILES = " + this.currCacheSize/1048576 + "MB, OLDEST IS " +
((ageHours < 24) ? (ageHours + " HOURS") : ((ageHours / 24) + " DAYS")) + " OLD");
cleanup();
@ -333,18 +330,18 @@ public final class plasmaHTCache {
ip = httpc.dnsResolve(dom);
if (ip == null) continue;
result += ", " + dom + "=" + ip;
log.logConfig("PRE-FILLED " + dom + "=" + ip);
this.log.logConfig("PRE-FILLED " + dom + "=" + ip);
c++;
doms.deleteScore(dom);
// wait a short while to prevent that this looks like a DoS
try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {}
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
if (result.length() > 2) log.logConfig("PRE-FILLED DNS CACHE, FETCHED " + c +
if (result.length() > 2) this.log.logConfig("PRE-FILLED DNS CACHE, FETCHED " + c +
" ADDRESSES: " + result.substring(2));
}
private String dom(File f) {
String s = f.toString().substring(cachePath.toString().length() + 1);
String s = f.toString().substring(this.cachePath.toString().length() + 1);
int p = s.indexOf("/");
if (p < 0) p = s.indexOf("\\");
if (p < 0) return null;
@ -352,17 +349,17 @@ public final class plasmaHTCache {
}
public httpHeader getCachedResponse(String urlHash) throws IOException {
Map hdb = responseHeaderDB.get(urlHash);
Map hdb = this.responseHeaderDB.get(urlHash);
if (hdb == null) return null;
return new httpHeader(null, hdb);
}
public boolean full() {
return (cacheStack.size() > stackLimit);
return (this.cacheStack.size() > stackLimit);
}
public boolean empty() {
return (cacheStack.size() == 0);
return (this.cacheStack.size() == 0);
}
public static boolean isPicture(httpHeader response) {
@ -476,9 +473,8 @@ public final class plasmaHTCache {
return serverFileUtils.read(f);
} catch (IOException e) {
return null;
} else {
return null;
}
return null;
}
public static boolean isPOST(String urlString) {
@ -534,14 +530,14 @@ public final class plasmaHTCache {
serverLog.logFine("PLASMA", "Entry: URL=" + url.toString());
this.nomalizedURLString = htmlFilterContentScraper.urlNormalform(url);
try {
this.url = new URL(nomalizedURLString);
this.url = new URL(this.nomalizedURLString);
} catch (MalformedURLException e) {
System.out.println("internal error at httpdProxyCache.Entry: " + e);
System.exit(-1);
}
this.name = name;
this.cacheFile = getCachePath(this.url);
this.nomalizedURLHash = plasmaCrawlLURL.urlHash(nomalizedURLString);
this.nomalizedURLHash = plasmaURL.urlHash(this.nomalizedURLString);
// assigned:
this.initDate = initDate;
@ -562,10 +558,10 @@ public final class plasmaHTCache {
System.exit(0);
}
lastModified = new Date(serverDate.correctedUTCTime());
this.lastModified = new Date(serverDate.correctedUTCTime());
} else {
lastModified = responseHeader.lastModified();
if (lastModified == null) lastModified = new Date(serverDate.correctedUTCTime()); // does not exist in header
this.lastModified = responseHeader.lastModified();
if (this.lastModified == null) this.lastModified = new Date(serverDate.correctedUTCTime()); // does not exist in header
}
this.doctype = plasmaWordIndexEntry.docType(responseHeader.mime());
if (this.doctype == plasmaWordIndexEntry.DT_UNKNOWN) this.doctype = plasmaWordIndexEntry.docType(url);
@ -576,22 +572,23 @@ public final class plasmaHTCache {
}
public String name() {
return name;
return this.name;
}
public String initiator() {
return initiator;
return this.initiator;
}
public boolean proxy() {
return initiator() == null;
}
public long size() {
if (cacheArray == null) return 0; else return cacheArray.length;
if (this.cacheArray == null) return 0;
return this.cacheArray.length;
}
public URL referrerURL() {
if (requestHeader == null) return null;
if (this.requestHeader == null) return null;
try {
return new URL((String) requestHeader.get(httpHeader.REFERER, ""));
return new URL((String) this.requestHeader.get(httpHeader.REFERER, ""));
} catch (Exception e) {
return null;
}
@ -611,35 +608,35 @@ public final class plasmaHTCache {
// in case of FALSE, the reason as String is returned
// check profile
if (!(profile.storeHTCache())) return "storage_not_wanted";
if (!(this.profile.storeHTCache())) return "storage_not_wanted";
// decide upon header information if a specific file should be stored to the cache or not
// if the storage was requested by prefetching, the request map is null
// check status code
if (!((responseStatus.startsWith("200")) || (responseStatus.startsWith("203")))) return "bad_status_" + responseStatus.substring(0,3);
if (!((this.responseStatus.startsWith("200")) || (this.responseStatus.startsWith("203")))) return "bad_status_" + this.responseStatus.substring(0,3);
// check storage location
// sometimes a file name is equal to a path name in the same directory;
// or sometimes a file name is equal a directory name created earlier;
// we cannot match that here in the cache file path and therefore omit writing into the cache
if ((cacheFile.getParentFile().isFile()) || (cacheFile.isDirectory())) return "path_ambiguous";
if (cacheFile.toString().indexOf("..") >= 0) return "path_dangerous";
if ((this.cacheFile.getParentFile().isFile()) || (this.cacheFile.isDirectory())) return "path_ambiguous";
if (this.cacheFile.toString().indexOf("..") >= 0) return "path_dangerous";
// -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches
if ((isPOST(nomalizedURLString)) && (!(profile.crawlingQ()))) return "dynamic_post";
if (isCGI(nomalizedURLString)) return "dynamic_cgi";
if ((isPOST(this.nomalizedURLString)) && (!(this.profile.crawlingQ()))) return "dynamic_post";
if (isCGI(this.nomalizedURLString)) return "dynamic_cgi";
// -authorization cases in request
// authorization makes pages very individual, and therefore we cannot use the
// content in the cache
if ((requestHeader != null) && (requestHeader.containsKey(httpHeader.AUTHORIZATION))) return "personalized";
if ((this.requestHeader != null) && (this.requestHeader.containsKey(httpHeader.AUTHORIZATION))) return "personalized";
// -ranges in request and response
// we do not cache partial content
if ((requestHeader != null) && (requestHeader.containsKey(httpHeader.RANGE))) return "partial";
if ((responseHeader != null) && (responseHeader.containsKey(httpHeader.CONTENT_RANGE))) return "partial";
if ((this.requestHeader != null) && (this.requestHeader.containsKey(httpHeader.RANGE))) return "partial";
if ((this.responseHeader != null) && (this.responseHeader.containsKey(httpHeader.CONTENT_RANGE))) return "partial";
// -if-modified-since in request
// we do not care about if-modified-since, because this case only occurres if the
@ -657,8 +654,8 @@ public final class plasmaHTCache {
// -pragma in response
// if we have a pragma non-cache, we don't cache. usually if this is wanted from
// the server, it makes sense
if ((responseHeader.containsKey(httpHeader.PRAGMA)) &&
(((String) responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE"))) return "controlled_no_cache";
if ((this.responseHeader.containsKey(httpHeader.PRAGMA)) &&
(((String) this.responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE"))) return "controlled_no_cache";
// -expires in response
// we do not care about expires, because at the time this is called the data is
@ -666,12 +663,12 @@ public final class plasmaHTCache {
// -cache-control in response
// the cache-control has many value options.
String cacheControl = (String) responseHeader.get(httpHeader.CACHE_CONTROL);
String cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
if (cacheControl != null) {
cacheControl = cacheControl.trim().toUpperCase();
if (cacheControl.startsWith("MAX-AGE=")) {
// we need also the load date
Date date = responseHeader.date();
Date date = this.responseHeader.date();
if (date == null) return "stale_no_date_given_in_response";
try {
long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
@ -696,57 +693,57 @@ public final class plasmaHTCache {
// -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches
if (isPOST(nomalizedURLString)) return false;
if (isCGI(nomalizedURLString)) return false;
if (isPOST(this.nomalizedURLString)) return false;
if (isCGI(this.nomalizedURLString)) return false;
// -authorization cases in request
if (requestHeader.containsKey(httpHeader.AUTHORIZATION)) return false;
if (this.requestHeader.containsKey(httpHeader.AUTHORIZATION)) return false;
// -ranges in request
// we do not cache partial content
if ((requestHeader != null) && (requestHeader.containsKey(httpHeader.RANGE))) return false;
if ((this.requestHeader != null) && (this.requestHeader.containsKey(httpHeader.RANGE))) return false;
//Date d1, d2;
// -if-modified-since in request
// The entity has to be transferred only if it has
// been modified since the date given by the If-Modified-Since header.
if (requestHeader.containsKey(httpHeader.IF_MODIFIED_SINCE)) {
if (this.requestHeader.containsKey(httpHeader.IF_MODIFIED_SINCE)) {
// checking this makes only sense if the cached response contains
// a Last-Modified field. If the field does not exist, we go the safe way
if (!(responseHeader.containsKey(httpHeader.LAST_MODIFIED))) return false;
if (!(this.responseHeader.containsKey(httpHeader.LAST_MODIFIED))) return false;
// parse date
Date d1, d2;
d2 = responseHeader.lastModified(); if (d2 == null) d2 = new Date(serverDate.correctedUTCTime());
d1 = requestHeader.ifModifiedSince(); if (d1 == null) d1 = new Date(serverDate.correctedUTCTime());
d2 = this.responseHeader.lastModified(); if (d2 == null) d2 = new Date(serverDate.correctedUTCTime());
d1 = this.requestHeader.ifModifiedSince(); if (d1 == null) d1 = new Date(serverDate.correctedUTCTime());
// finally, we shall treat the cache as stale if the modification time is after the if-.. time
if (d2.after(d1)) return false;
}
boolean isNotPicture = !isPicture(responseHeader);
boolean isNotPicture = !isPicture(this.responseHeader);
// -cookies in request
// unfortunately, we should reload in case of a cookie
// but we think that pictures can still be considered as fresh
if ((requestHeader.containsKey(httpHeader.COOKIE)) && (isNotPicture)) return false;
if ((this.requestHeader.containsKey(httpHeader.COOKIE)) && (isNotPicture)) return false;
// -set-cookie in cached response
// this is a similar case as for COOKIE.
if ((responseHeader.containsKey(httpHeader.SET_COOKIE)) && (isNotPicture)) return false; // too strong
if ((responseHeader.containsKey(httpHeader.SET_COOKIE2)) && (isNotPicture)) return false; // too strong
if ((this.responseHeader.containsKey(httpHeader.SET_COOKIE)) && (isNotPicture)) return false; // too strong
if ((this.responseHeader.containsKey(httpHeader.SET_COOKIE2)) && (isNotPicture)) return false; // too strong
// -pragma in cached response
// logically, we would not need to care about no-cache pragmas in cached response headers,
// because they cannot exist since they are not written to the cache.
// So this IF should always fail..
if ((responseHeader.containsKey(httpHeader.PRAGMA)) &&
(((String) responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE"))) return false;
if ((this.responseHeader.containsKey(httpHeader.PRAGMA)) &&
(((String) this.responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE"))) return false;
// calculate often needed values for freshness attributes
Date date = responseHeader.date();
Date expires = responseHeader.expires();
Date lastModified = responseHeader.lastModified();
String cacheControl = (String) responseHeader.get(httpHeader.CACHE_CONTROL);
Date date = this.responseHeader.date();
Date expires = this.responseHeader.expires();
Date lastModified = this.responseHeader.lastModified();
String cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
// see for documentation also:

@ -358,7 +358,7 @@ public class plasmaSnippetCache {
}
}
private void loadResourceFromWeb(URL url, int socketTimeout) throws IOException {
public void loadResourceFromWeb(URL url, int socketTimeout) throws IOException {
plasmaCrawlWorker.load(
url,
"",

@ -1476,7 +1476,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
int p;
URL url;
plasmaCrawlLURL.Entry urlentry;
String urlstring, urlname, filename;
String urlstring, urlname, filename, urlhash;
String host, hash, address, descr = "";
yacySeed seed;
plasmaSnippetCache.result snippet;
@ -1484,6 +1484,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
while ((acc.hasMoreElements()) && (i < query.wantedResults)) {
urlentry = acc.nextElement();
url = urlentry.url();
urlhash = urlentry.hash();
host = url.getHost();
if (host.endsWith(".yacyh")) {
// translate host into current IP
@ -1525,6 +1526,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} else {
prop.put("results_" + i + "_description", descr);
prop.put("results_" + i + "_url", urlstring);
prop.put("results_" + i + "_urlhash", urlhash);
prop.put("results_" + i + "_urlname", urlname);
prop.put("results_" + i + "_date", dateString(urlentry.moddate()));
prop.put("results_" + i + "_size", Long.toString(urlentry.size()));

@ -578,8 +578,7 @@ public final class serverCore extends serverAbstractThread implements serverThre
}
super.close();
}
}
}
public final class SessionFactory implements org.apache.commons.pool.PoolableObjectFactory {
@ -707,6 +706,10 @@ public final class serverCore extends serverAbstractThread implements serverThre
this.stopped = stopped;
}
public boolean isStopped() {
return this.stopped;
}
public void close() {
if (this.isAlive()) {
try {
@ -817,7 +820,7 @@ public final class serverCore extends serverAbstractThread implements serverThre
this.running = true;
// The thread keeps running.
while (!this.stopped && !Thread.interrupted()) {
while (!this.stopped && !this.isInterrupted()) {
if (this.done) {
// We are waiting for a task now.
synchronized (this) {
@ -1114,5 +1117,11 @@ public final class serverCore extends serverAbstractThread implements serverThre
if (!this.theSessionPool.isClosed) this.theSessionPool.close();
super.finalize();
}
public static final void checkInterruption() throws InterruptedException {
Thread currentThread = Thread.currentThread();
if (currentThread.isInterrupted()) throw new InterruptedException();
if ((currentThread instanceof serverCore.Session) && ((serverCore.Session)currentThread).isStopped()) throw new InterruptedException();
}
}

@ -696,19 +696,19 @@ public final class yacyClient {
try {
final plasmaSwitchboard sb = new plasmaSwitchboard(args[0], "httpProxy.init", "DATA/SETTINGS/httpProxy.conf");
final yacyCore core = new yacyCore(sb);
core.peerActions.loadSeedLists();
final yacySeed target = core.seedDB.getConnected(args[1]);
yacyCore.peerActions.loadSeedLists();
final yacySeed target = yacyCore.seedDB.getConnected(args[1]);
final String wordhashe = plasmaWordIndexEntry.word2hash("test");
//System.out.println("permission=" + permissionMessage(args[1]));
final HashMap result = nxTools.table(httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/search.html?myseed=" + core.seedDB.mySeed.genSeedStr(null) +
"/yacy/search.html?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
"&youare=" + target.hash + "&key=" +
"&myseed=" + core.seedDB.mySeed.genSeedStr(null) +
"&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
"&count=10&resource=global" +
"&query=" + wordhashe),
5000, null, null, core.seedDB.sb.remoteProxyHost, core.seedDB.sb.remoteProxyPort));
5000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
System.out.println("Result=" + result.toString());
} catch (Exception e) {
e.printStackTrace();

Loading…
Cancel
Save