From 40777556c546176b6f993316583f39b0a076c6b2 Mon Sep 17 00:00:00 2001
From: theli
Date: Tue, 18 Oct 2005 07:45:27 +0000
Subject: [PATCH] *) Connection Tracking - adding automatic refresh -
accepts new parameter nameLookup which can be used to deactivate
yacy-peer name lookup (because we have problems with this on large seed-dbs)
*) ViewFile
New page that can be used to view
- original content
- plain text content
- parsed content
- parsed sentences
of a webpage specified by there url hash
Mainly for debugging purpose at the moment
*) Robots.txt
Bugfix for if-modified-since usage
TODO: synchronization of downloads to avoid loading the same robots-file
multiple times in parallel by different threads
*) Shutdown
Better abortion of transferRWI and transferURL sessions on server shutdown
*) Status Page
Adding icon to start/stop crawling via status page
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@950 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
htroot/Connections_p.html | 1 +
htroot/Connections_p.java | 26 ++-
htroot/IndexCreateIndexingQueue_p.java | 2 +-
htroot/Status.html | 1 +
htroot/Status.java | 12 ++
htroot/Status_p.inc | 2 +-
htroot/ViewFile.html | 85 ++++++++
htroot/ViewFile.java | 173 +++++++++++++++
htroot/env/grafics/start.gif | Bin 0 -> 88 bytes
htroot/env/grafics/stop.gif | Bin 0 -> 90 bytes
htroot/index.html | 2 +-
htroot/yacy/hello.java | 15 +-
htroot/yacy/transferRWI.java | 20 +-
htroot/yacy/transferURL.java | 9 +-
source/de/anomic/data/robotsParser.java | 11 +-
source/de/anomic/data/wikiCode.java | 2 +-
source/de/anomic/http/httpHeader.java | 7 +
source/de/anomic/http/httpd.java | 47 +----
source/de/anomic/http/httpdFileHandler.java | 4 +
.../anomic/plasma/plasmaCrawlRobotsTxt.java | 8 +-
source/de/anomic/plasma/plasmaHTCache.java | 199 +++++++++---------
.../de/anomic/plasma/plasmaSnippetCache.java | 2 +-
.../de/anomic/plasma/plasmaSwitchboard.java | 4 +-
source/de/anomic/server/serverCore.java | 15 +-
source/de/anomic/yacy/yacyClient.java | 10 +-
25 files changed, 473 insertions(+), 184 deletions(-)
create mode 100644 htroot/ViewFile.html
create mode 100644 htroot/ViewFile.java
create mode 100644 htroot/env/grafics/start.gif
create mode 100644 htroot/env/grafics/stop.gif
diff --git a/htroot/Connections_p.html b/htroot/Connections_p.html
index c3941206f..f13af8523 100644
--- a/htroot/Connections_p.html
+++ b/htroot/Connections_p.html
@@ -3,6 +3,7 @@
YaCy '#[clientname]#': Connection Tracking
#[metas]#
+
#[header]#
diff --git a/htroot/Connections_p.java b/htroot/Connections_p.java
index 40abf88b2..e611411f7 100644
--- a/htroot/Connections_p.java
+++ b/htroot/Connections_p.java
@@ -64,18 +64,29 @@ import de.anomic.server.serverCore.Session;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
-public class Connections_p {
+public final class Connections_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) sb;
serverObjects prop = new serverObjects();
+ // determines if name lookup should be done or not
+ boolean doNameLookup = true;
+ if (post.containsKey("nameLookup") && post.get("nameLookup","true").equals("false")) {
+ doNameLookup = false;
+ }
+
+ // getting the virtualHost string
String virtualHost = switchboard.getConfig("fileHost","localhost");
+ // getting the serverCore thread
serverThread httpd = switchboard.getThread("10_httpd");
+
+ // getting the session threadgroup
ThreadGroup httpSessions = ((serverCore)httpd).getSessionThreadGroup();
+ // getting the server core pool configuration
GenericObjectPool.Config httpdPoolConfig = ((serverCore)httpd).getPoolConfig();
/* waiting for all threads to finish */
@@ -122,11 +133,14 @@ public class Connections_p {
// determining if the source is a yacy host
- yacySeed seed = yacyCore.seedDB.lookupByIP(userAddress,true,false,false);
- if (seed != null) {
- if ((seed.hash == yacyCore.seedDB.mySeed.hash) &&
- (!seed.get(yacySeed.PORT,"").equals(Integer.toString(userPort)))) {
- seed = null;
+ yacySeed seed = null;
+ if (doNameLookup) {
+ seed = yacyCore.seedDB.lookupByIP(userAddress,true,false,false);
+ if (seed != null) {
+ if ((seed.hash.equals(yacyCore.seedDB.mySeed.hash)) &&
+ (!seed.get(yacySeed.PORT,"").equals(Integer.toString(userPort)))) {
+ seed = null;
+ }
}
}
diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java
index 82290feb3..691e0061d 100644
--- a/htroot/IndexCreateIndexingQueue_p.java
+++ b/htroot/IndexCreateIndexingQueue_p.java
@@ -166,7 +166,7 @@ public class IndexCreateIndexingQueue_p {
} catch (IOException e) {}
prop.put("indexing-queue_num", entryCount);//num entries in queue
- prop.put("indexing-queue_totalSize", Status.bytesToString(totalSize));//num entries in queue
+ prop.put("indexing-queue_totalSize", bytesToString(totalSize));//num entries in queue
prop.put("indexing-queue_list", entryCount);
}
diff --git a/htroot/Status.html b/htroot/Status.html
index dd6efdbe5..474fa400f 100644
--- a/htroot/Status.html
+++ b/htroot/Status.html
@@ -96,6 +96,7 @@ You are in permanent mode. Attention: If you don't have a flatrate or are
#%[privateStatusTable]%#
+Last Refresh: #[date]#
#[footer]#
diff --git a/htroot/Status.java b/htroot/Status.java
index 5e7fb3f77..0a73738d9 100644
--- a/htroot/Status.java
+++ b/htroot/Status.java
@@ -48,6 +48,7 @@
import java.lang.Math;
import java.text.DecimalFormat;
+import java.util.Date;
import java.io.File;
import de.anomic.http.httpHeader;
@@ -71,6 +72,16 @@ public class Status {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
+ if (post != null) {
+ if (post.containsKey("pausecrawlqueue")) {
+ ((plasmaSwitchboard)env).pauseCrawling();
+ } else if (post.containsKey("continuecrawlqueue")) {
+ ((plasmaSwitchboard)env).continueCrawling();
+ }
+ prop.put("LOCATION","");
+ return prop;
+ }
+
/*
versionProbe=http://www.anomic.de/AnomicHTTPProxy/release.txt
superseedFile=superseed.txt
@@ -252,6 +263,7 @@ public class Status {
// return rewrite properties
+ prop.put("date",(new Date()).toString());
return prop;
}
diff --git a/htroot/Status_p.inc b/htroot/Status_p.inc
index 3268dbb28..d516f709e 100644
--- a/htroot/Status_p.inc
+++ b/htroot/Status_p.inc
@@ -70,7 +70,7 @@
Loader Queue |
- #[loaderQueueSize]# | #[loaderQueueMax]# #(loaderPaused)#::(paused)#(/loaderPaused)# |
+ #[loaderQueueSize]# | #[loaderQueueMax]# #(loaderPaused)#::(paused)#(/loaderPaused)# #stop.gif::start.gif#(/loaderPaused)#) |
[Details] |
diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html
new file mode 100644
index 000000000..07be4b480
--- /dev/null
+++ b/htroot/ViewFile.html
@@ -0,0 +1,85 @@
+
+
+
+YaCy '#[clientname]#': View URL Content
+#[metas]#
+
+
+#[header]#
+
+View URL Content
+
+
+#(error)#
+
+::
+No URL hash submitted.
+::
+Unable to find URL Entry in DB
+::
+Invalid URL
+::
+Unable to download resource content.
+::
+Unable to parse resource content.
+#(/error)#
+
+
+
+#(viewMode)#
+::
+
Plain Resource Content
+ #[plainText]#
+::
+ Parsed Resource Content
+ #[parsedText]#
+::
+ Parsed Resource Sentences
+
+ #{sentences}#
+
+ #[nr]# |
+ #[text]# |
+
+ #{/sentences}#
+
+::
+ Original Resource Content
+
+#(/viewMode)#
+
+
+#[footer]#
+
+
\ No newline at end of file
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
new file mode 100644
index 000000000..ce94aa971
--- /dev/null
+++ b/htroot/ViewFile.java
@@ -0,0 +1,173 @@
+//ViewFile.java
+//-----------------------
+//part of YaCy
+//(C) by Michael Peter Christen; mc@anomic.de
+//first published on http://www.anomic.de
+//Frankfurt, Germany, 2004
+//
+//last major change: 12.07.2004
+//
+//This program is free software; you can redistribute it and/or modify
+//it under the terms of the GNU General Public License as published by
+//the Free Software Foundation; either version 2 of the License, or
+//(at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//GNU General Public License for more details.
+//
+//You should have received a copy of the GNU General Public License
+//along with this program; if not, write to the Free Software
+//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+//Using this software in any meaning (reading, learning, copying, compiling,
+//running) means that you agree that the Author(s) is (are) not responsible
+//for cost, loss of data or any harm that may be caused directly or indirectly
+//by usage of this softare or this documentation. The usage of this software
+//is on your own risk. The installation and usage (starting/running) of this
+//software may allow other people or application to access your computer and
+//any attached devices and is highly dependent on the configuration of the
+//software which must be done by the user of the software; the author(s) is
+//(are) also not responsible for proper configuration and usage of the
+//software, even if provoked by documentation provided together with
+//the software.
+//
+//Any changes to this file according to the GPL as documented in the file
+//gpl.txt aside this file in the shipment you received can be done to the
+//lines that follows this copyright notice here, but changes must not be
+//done inside the copyright notive above. A re-distribution must contain
+//the intact and unchanged copyright notice.
+//Contributions and changes to the program code must be marked as such.
+
+//you must compile this file with
+//javac -classpath .:../Classes Status.java
+//if the shell's current path is HTROOT
+
+import java.io.IOException;
+import java.net.URL;
+
+import de.anomic.http.httpHeader;
+import de.anomic.plasma.plasmaParserDocument;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.plasmaCrawlLURL.Entry;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public class ViewFile {
+
+ public static final int VIEW_MODE_NO_TEXT = 0;
+ public static final int VIEW_MODE_AS_PLAIN_TEXT = 1;
+ public static final int VIEW_MODE_AS_PARSED_TEXT = 2;
+ public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3;
+ public static final int VIEW_MODE_AS_IFRAME = 4;
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+
+ serverObjects prop = new serverObjects();
+ plasmaSwitchboard sb = (plasmaSwitchboard)env;
+
+ if (post != null) {
+ // getting the url hash from which the content should be loaded
+ String urlHash = post.get("urlHash","");
+ if (urlHash.equals("")) {
+ prop.put("error",1);
+ prop.put("viewMode",VIEW_MODE_NO_TEXT);
+ return prop;
+ }
+
+ String viewMode = post.get("viewMode","plain");
+
+ // getting the urlEntry that belongs to the url hash
+ Entry urlEntry = sb.urlPool.loadedURL.getEntry(urlHash);
+ if (urlEntry == null) {
+ prop.put("error",2);
+ prop.put("viewMode",VIEW_MODE_NO_TEXT);
+ return prop;
+ }
+
+ // gettin the url that belongs to the entry
+ URL url = urlEntry.url();
+ if (url == null) {
+ prop.put("error",3);
+ prop.put("viewMode",VIEW_MODE_NO_TEXT);
+ return prop;
+ }
+
+ // loading the resource content as byte array
+ byte[] resource = null;
+ try {
+ resource = sb.cacheManager.loadResource(url);
+ if (resource == null) {
+ sb.snippetCache.loadResourceFromWeb(url, 5000);
+
+ resource = sb.cacheManager.loadResource(url);
+ if (resource == null) {
+ prop.put("error",4);
+ prop.put("viewMode",VIEW_MODE_NO_TEXT);
+ return prop;
+ }
+ }
+ } catch (IOException e) {
+ if (url == null) {
+ prop.put("error",4);
+ prop.put("viewMode",VIEW_MODE_NO_TEXT);
+ return prop;
+ }
+ }
+ if (viewMode.equals("plain")) {
+ String content = new String(resource);
+ content = content.replaceAll("<","<")
+ .replaceAll(">",">")
+ .replaceAll("\"",""")
+ .replaceAll("\n","
")
+ .replaceAll("\t"," ");
+
+ prop.put("error",0);
+ prop.put("viewMode",VIEW_MODE_AS_PLAIN_TEXT);
+ prop.put("viewMode_plainText",content);
+ } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("iframe")) {
+ // parsing the resource content
+ plasmaParserDocument document = sb.snippetCache.parseDocument(url, resource);
+ if (document == null) {
+ prop.put("error",5);
+ prop.put("viewMode",VIEW_MODE_NO_TEXT);
+ return prop;
+ }
+
+ if (viewMode.equals("parsed")) {
+ String content = new String(document.getText());
+ content = content.replaceAll("\n","
")
+ .replaceAll("\t"," ");
+
+ prop.put("viewMode",VIEW_MODE_AS_PARSED_TEXT);
+ prop.put("viewMode_parsedText",content);
+ } else if (viewMode.equals("iframe")) {
+ prop.put("viewMode",VIEW_MODE_AS_IFRAME);
+ prop.put("viewMode_url",url.toString());
+ } else {
+ prop.put("viewMode",VIEW_MODE_AS_PARSED_SENTENCES);
+ String[] sentences = document.getSentences();
+
+ boolean dark = true;
+ for (int i=0; i < sentences.length; i++) {
+ prop.put("viewMode_sentences_" + i + "_nr",Integer.toString(i+1));
+ prop.put("viewMode_sentences_" + i + "_text",sentences[i]);
+ prop.put("viewMode_sentences_" + i + "_dark",((dark) ? 1 : 0) ); dark=!dark;
+ }
+ prop.put("viewMode_sentences",sentences.length);
+
+ }
+ }
+ prop.put("error",0);
+ prop.put("error_url",url.toString());
+ prop.put("error_hash",urlHash);
+ prop.put("error_wordCount",Integer.toString(urlEntry.wordCount()));
+ prop.put("error_desc",urlEntry.descr());
+ prop.put("error_size",urlEntry.size());
+ }
+
+ return prop;
+ }
+
+}
diff --git a/htroot/env/grafics/start.gif b/htroot/env/grafics/start.gif
new file mode 100644
index 0000000000000000000000000000000000000000..619bc2c5c8f0b6da59aa7dffe3bc7b503980ec35
GIT binary patch
literal 88
zcmZ?wbhEHbOV
literal 0
HcmV?d00001
diff --git a/htroot/env/grafics/stop.gif b/htroot/env/grafics/stop.gif
new file mode 100644
index 0000000000000000000000000000000000000000..5a5836b927724dddfc48f0d0087013fde88e6491
GIT binary patch
literal 90
zcmZ?wbhEHb&J0BG|gt^fc4
literal 0
HcmV?d00001
diff --git a/htroot/index.html b/htroot/index.html
index b14449595..5d7e8efd6 100644
--- a/htroot/index.html
+++ b/htroot/index.html
@@ -101,7 +101,7 @@ from 'late' peers.
#[description]#
#(snippet)#::#[text]#
#(/snippet)#
#[urlname]#
-#[date]#
+#[date]# | Info
#{/results}#
diff --git a/htroot/yacy/hello.java b/htroot/yacy/hello.java
index a9cbe6fd6..6cb611a17 100644
--- a/htroot/yacy/hello.java
+++ b/htroot/yacy/hello.java
@@ -61,7 +61,7 @@ import de.anomic.yacy.yacyVersion;
public final class hello {
- public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) {
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) throws InterruptedException {
if (post == null || ss == null || yacyCore.seedDB == null || yacyCore.seedDB.mySeed == null) { return null; }
// return variable that accumulates replacements
@@ -71,9 +71,9 @@ public final class hello {
// final String iam = (String) post.get("iam", ""); // complete seed of the requesting peer
// final String pattern = (String) post.get("pattern", ""); //
// final String mytime = (String) post.get(MYTIME, ""); //
- final String key = (String) post.get("key", ""); // transmission key for response
- final String seed = (String) post.get(yacySeed.SEED, "");
- final String countStr = (String) post.get("count", "0");
+ final String key = post.get("key", ""); // transmission key for response
+ final String seed = post.get(yacySeed.SEED, "");
+ final String countStr = post.get("count", "0");
int i;
int count = 0;
try {count = (countStr == null) ? 0 : Integer.parseInt(countStr);} catch (NumberFormatException e) {count = 0;}
@@ -93,7 +93,9 @@ public final class hello {
// if the remote client has reported its own IP address and the client supports
// the port forwarding feature (if client version >= 0.383) then we try to
// connect to the reported IP address first
- if (reportedip.length() > 0 && !clientip.equals(reportedip) && clientversion >= yacyVersion.YACY_SUPPORTS_PORT_FORWARDING) {
+ if (reportedip.length() > 0 && !clientip.equals(reportedip) && clientversion >= yacyVersion.YACY_SUPPORTS_PORT_FORWARDING) {
+ serverCore.checkInterruption();
+
// try first the reportedip, since this may be a connect from a port-forwarding host
prop.put(yacySeed.YOURIP, reportedip);
remoteSeed.put(yacySeed.IP, reportedip);
@@ -123,6 +125,8 @@ public final class hello {
// we are only allowed to connect to the client IP address if it's not our own address
if (!isLocalIP) {
+ serverCore.checkInterruption();
+
prop.put(yacySeed.YOURIP, clientip);
remoteSeed.put(yacySeed.IP, clientip);
urls = yacyClient.queryUrlCount(remoteSeed);
@@ -162,6 +166,7 @@ public final class hello {
"' to '" + prop.get(yacySeed.YOURTYPE) + "'.");
}
+ serverCore.checkInterruption();
final StringBuffer seeds = new StringBuffer(768);
// attach some more seeds, as requested
if ((yacyCore.seedDB != null) && (yacyCore.seedDB.sizeConnected() > 0)) {
diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java
index 782b50518..323d74b6b 100644
--- a/htroot/yacy/transferRWI.java
+++ b/htroot/yacy/transferRWI.java
@@ -46,13 +46,15 @@
// javac -classpath .:../classes transferRWI.java
-import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedList;
+
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndexEntry;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
+import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
@@ -61,7 +63,7 @@ import de.anomic.yacy.yacyDHTAction;
public final class transferRWI {
- public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) {
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) throws InterruptedException {
if (post == null || ss == null) { return null; }
long start = System.currentTimeMillis();
@@ -77,7 +79,7 @@ public final class transferRWI {
// final String key = (String) post.get("key", ""); // transmission key
final int wordc = Integer.parseInt((String) post.get("wordc", "")); // number of different words
final int entryc = Integer.parseInt((String) post.get("entryc", "")); // number of entries in indexes
- final byte[] indexes = ((String) post.get("indexes", "")).getBytes(); // the indexes, as list of word entries
+ byte[] indexes = ((String) post.get("indexes", "")).getBytes(); // the indexes, as list of word entries
final boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true");
// response values
@@ -93,7 +95,7 @@ public final class transferRWI {
final long startProcess = System.currentTimeMillis();
// decode request
- ArrayList v = new ArrayList();
+ final LinkedList v = new LinkedList();
int s = 0;
int e;
while (s < indexes.length) {
@@ -101,6 +103,9 @@ public final class transferRWI {
if ((e - s) > 0) v.add(new String(indexes, s, e - s));
s = e; while (s < indexes.length) if (indexes[s++] >= 32) {s--; break;}
}
+ // free memory
+ indexes = null;
+
// the value-vector should now have the same length as entryc
if (v.size() != entryc) sb.getLog().logSevere("ERROR WITH ENTRY COUNTER: v=" + v.size() + ", entryc=" + entryc);
@@ -114,13 +119,17 @@ public final class transferRWI {
String[] wordhashes = new String[v.size()];
int received = 0;
for (int i = 0; i < v.size(); i++) {
- estring = (String) v.get(i);
+ serverCore.checkInterruption();
+
+ estring = (String) v.removeFirst();
p = estring.indexOf("{");
if (p > 0) {
wordHash = estring.substring(0, p);
wordhashes[i] = wordHash;
entry = new plasmaWordIndexEntry(estring.substring(p));
sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), true);
+ serverCore.checkInterruption();
+
urlHash = entry.getUrlHash();
if ((!(unknownURL.contains(urlHash))) &&
(!(sb.urlPool.loadedURL.exists(urlHash)))) {
@@ -155,5 +164,4 @@ public final class transferRWI {
// return rewrite properties
return prop;
}
-
}
\ No newline at end of file
diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java
index c344e99cc..f99a2ef54 100644
--- a/htroot/yacy/transferURL.java
+++ b/htroot/yacy/transferURL.java
@@ -48,6 +48,7 @@
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaCrawlLURL;
+import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
@@ -55,7 +56,7 @@ import de.anomic.yacy.yacySeed;
public final class transferURL {
- public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) {
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch ss) throws InterruptedException {
if (post == null || ss == null) { return null; }
long start = System.currentTimeMillis();
@@ -69,7 +70,7 @@ public final class transferURL {
final String iam = (String) post.get("iam", ""); // seed hash of requester
// final String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability
// final String key = (String) post.get("key", ""); // transmission key
- final int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls
+ final int urlc = Integer.parseInt(post.get("urlc", "")); // number of transported urls
final boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true");
final boolean blockBlacklist = sb.getConfig("indexReceiveBlockBlacklist", "false").equals("true");
@@ -87,13 +88,14 @@ public final class transferURL {
String urls;
plasmaCrawlLURL.Entry lEntry;
for (int i = 0; i < urlc; i++) {
+ serverCore.checkInterruption();
urls = (String) post.get("url" + i);
if (urls == null) {
yacyCore.log.logFine("transferURL: got null URL-string from peer " + otherPeerName);
} else {
lEntry = sb.urlPool.loadedURL.newEntry(urls, true);
if (lEntry != null && blockBlacklist &&
- sb.urlBlacklist.isListed(lEntry.url().getHost().toLowerCase(), lEntry.url().getPath())) {
+ plasmaSwitchboard.urlBlacklist.isListed(lEntry.url().getHost().toLowerCase(), lEntry.url().getPath())) {
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName);
lEntry = null;
}
@@ -122,5 +124,4 @@ public final class transferURL {
prop.put("result", result);
return prop;
}
-
}
\ No newline at end of file
diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java
index 3d6b11802..22a19e4a8 100644
--- a/source/de/anomic/data/robotsParser.java
+++ b/source/de/anomic/data/robotsParser.java
@@ -197,7 +197,10 @@ public final class robotsParser{
robotsTxt = (byte[])result[1];
eTag = (String) result[2];
modDate = (Date) result[3];
- }
+ } else if (robotsTxt4Host != null) {
+ robotsTxt4Host.setLoadedDate(new Date());
+ plasmaSwitchboard.robots.addEntry(robotsTxt4Host);
+ }
} catch (Exception e) {
serverLog.logSevere("ROBOTS","Unable to download the robots.txt file from URL '" + robotsURL + "'. " + e.getMessage());
}
@@ -218,7 +221,7 @@ public final class robotsParser{
// storing the data into the robots DB
robotsTxt4Host = plasmaSwitchboard.robots.addEntry(urlHostPort,denyPath,new Date(),modDate,eTag);
- }
+ }
}
if (robotsTxt4Host.isDisallowed(nexturl.getPath())) {
@@ -229,7 +232,7 @@ public final class robotsParser{
private static Object[] downloadRobotsTxt(URL robotsURL, int redirectionCount, plasmaCrawlRobotsTxt.Entry entry) throws Exception {
- if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null};
+ if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null};
redirectionCount--;
boolean accessCompletelyRestricted = false;
@@ -253,7 +256,7 @@ public final class robotsParser{
oldEtag = entry.getETag();
reqHeaders = new httpHeader();
Date modDate = entry.getModDate();
- if (modDate != null) reqHeaders.put(httpHeader.IF_MODIFIED_SINCE,entry.getModDate());
+ if (modDate != null) reqHeaders.put(httpHeader.IF_MODIFIED_SINCE,httpc.dateString(entry.getModDate()));
}
httpc.response res = con.GET(robotsURL.getPath(), reqHeaders);
diff --git a/source/de/anomic/data/wikiCode.java b/source/de/anomic/data/wikiCode.java
index 899ff8fdf..7ead416a8 100644
--- a/source/de/anomic/data/wikiCode.java
+++ b/source/de/anomic/data/wikiCode.java
@@ -100,7 +100,7 @@ public class wikiCode {
}
}
- public String replaceHTML(String result) {
+ public static String replaceHTML(String result) {
int p0;
// avoide html inside
diff --git a/source/de/anomic/http/httpHeader.java b/source/de/anomic/http/httpHeader.java
index e45d0c533..19f5e5236 100644
--- a/source/de/anomic/http/httpHeader.java
+++ b/source/de/anomic/http/httpHeader.java
@@ -80,6 +80,13 @@ import de.anomic.yacy.yacyCore;
public final class httpHeader extends TreeMap implements Map {
+ /* =============================================================
+ * Constants defining http versions
+ * ============================================================= */
+ public static final String HTTP_VERSION_0_9 = "HTTP/0.9";
+ public static final String HTTP_VERSION_1_0 = "HTTP/1.0";
+ public static final String HTTP_VERSION_1_1 = "HTTP/1.1";
+
/* =============================================================
* Constants defining http header names
* ============================================================= */
diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java
index eb1f3acbb..d9a325ec6 100644
--- a/source/de/anomic/http/httpd.java
+++ b/source/de/anomic/http/httpd.java
@@ -1222,12 +1222,13 @@ public final class httpd implements serverHandler {
if (respond == null) throw new NullPointerException("The outputstream must not be null.");
if (conProp == null) throw new NullPointerException("The connection property structure must not be null.");
if (httpVersion == null) httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER,"HTTP/1.1");
+ if (header == null) header = new httpHeader();
try {
if ((httpStatusText == null)||(httpStatusText.length()==0)) {
- if (httpVersion.equals("HTTP/1.0") && httpHeader.http1_0.containsKey(Integer.toString(httpStatusCode)))
+ if (httpVersion.equals(httpHeader.HTTP_VERSION_1_0) && httpHeader.http1_0.containsKey(Integer.toString(httpStatusCode)))
httpStatusText = (String) httpHeader.http1_0.get(Integer.toString(httpStatusCode));
- else if (httpVersion.equals("HTTP/1.1") && httpHeader.http1_1.containsKey(Integer.toString(httpStatusCode)))
+ else if (httpVersion.equals(httpHeader.HTTP_VERSION_1_1) && httpHeader.http1_1.containsKey(Integer.toString(httpStatusCode)))
httpStatusText = (String) httpHeader.http1_1.get(Integer.toString(httpStatusCode));
else httpStatusText = "Unknown";
}
@@ -1389,45 +1390,5 @@ public final class httpd implements serverHandler {
}
} catch (Exception e) {}
return false;
- }
-
-
-// public static boolean isTextMime(String mime, Set whitelist) {
-// if (whitelist.contains(mime)) return true;
-// // some mime-types are given as "text/html; charset=...", so look for ";"
-// if (mime.length() == 0) return false;
-// int pos = mime.indexOf(';');
-// if (pos < 0) return false;
-// return whitelist.contains(mime.substring(0, pos));
-// }
+ }
}
-
-/*
- ###
- ### Messages of the Server
- ###
-
- # success Messages
- HTTPStatus200 = OK; The URL was found. It contents follows.
- HTTPStatus201 = Created; A URL was created in response to a POST.
- HTTPStatus202 = Accepted; The request was accepted for processing later.
- HTTPStatus203 = Non-Authoritative; The information here is unofficial.
- HTTPStatus204 = No Response; The request is successful, but there is no data to send.
-
- # redirection
- HTTPStatus300 = Moved; The URL has permanently moved to a new location.
- HTTPStatus301 = Found; The URL can be temporarily found at a new location.
-
- # client errors
- HTTPStatus400 = Bad Request; Syntax error in the request.
- HTTPStatus401 = Unauthorized; The client is not authorized to access this web page.
- HTTPStatus402 = Payment Required; A payment is required to access this web page.
- HTTPStatus403 = Forbidden; This URL is forbidden. No authorization is required, it won't help.
- HTTPStatus404 = Not Found; This page is not on the server.
-
- # server errors
- HTTPStatus500 = Internal Error; The server encountered an unexpected error.
- HTTPStatus501 = Not Implemented; The client requested an unimplemented feature.
- HTTPStatus502 = Service Overloaded; The server reached the maximum number of connections.
- HTTPStatus503 = Gateway timeout; Fetching data from remote service failed.
- */
diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java
index 1e59ef301..39a15b792 100644
--- a/source/de/anomic/http/httpdFileHandler.java
+++ b/source/de/anomic/http/httpdFileHandler.java
@@ -534,6 +534,10 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
tp.put("clientname", switchboard.getConfig("peerName", "anomic"));
//System.out.println("respond props: " + ((tp == null) ? "null" : tp.toString())); // debug
} catch (InvocationTargetException e) {
+ if (e.getCause() instanceof InterruptedException) {
+ throw new InterruptedException(e.getCause().getMessage());
+ }
+
this.theLogger.logSevere("INTERNAL ERROR: " + e.toString() + ":" +
e.getMessage() +
" target exception at " + targetClass + ": " +
diff --git a/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java b/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java
index 86ab710a7..33fe17c39 100644
--- a/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java
+++ b/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java
@@ -60,7 +60,7 @@ import de.anomic.kelondro.kelondroException;
import de.anomic.server.logging.serverLog;
public class plasmaCrawlRobotsTxt {
- private kelondroMap robotsTable;
+ kelondroMap robotsTable;
private final File robotsTableFile;
private int bufferkb;
@@ -221,6 +221,12 @@ public class plasmaCrawlRobotsTxt {
return null;
}
+ public void setLoadedDate(Date newLoadedDate) {
+ if (newLoadedDate != null) {
+ this.mem.put(LOADED_DATE,Long.toString(newLoadedDate.getTime()));
+ }
+ }
+
public Date getModDate() {
if (this.mem.containsKey(MOD_DATE)) {
return new Date(Long.valueOf((String) this.mem.get(MOD_DATE)).longValue());
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index 2b8bc4d5d..09bfc984b 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -84,7 +84,7 @@ public final class plasmaHTCache {
public long currCacheSize;
public long maxCacheSize;
public final File cachePath;
- public static serverLog log;
+ public final serverLog log;
public static final HashSet filesInUse = new HashSet(); // can we delete this file
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) {
@@ -100,33 +100,33 @@ public final class plasmaHTCache {
}
if (!(htCachePath.isDirectory())) {
// if the cache does not exists or is a file and not a directory, panic
- log.logSevere("the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created");
+ this.log.logSevere("the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created");
System.exit(0);
}
// open the response header database
- File dbfile = new File(cachePath, "responseHeader.db");
+ File dbfile = new File(this.cachePath, "responseHeader.db");
try {
if (dbfile.exists())
- responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400));
+ this.responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400));
else
- responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, plasmaCrawlLURL.urlHashLength, 150));
+ this.responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, plasmaURL.urlHashLength, 150));
} catch (IOException e) {
- log.logSevere("the request header database could not be opened: " + e.getMessage());
+ this.log.logSevere("the request header database could not be opened: " + e.getMessage());
System.exit(0);
}
// init stack
- cacheStack = new LinkedList();
+ this.cacheStack = new LinkedList();
// init cache age and size management
- cacheAge = new TreeMap();
- currCacheSize = 0;
+ this.cacheAge = new TreeMap();
+ this.currCacheSize = 0;
this.maxCacheSize = maxCacheSize;
// start the cache startup thread
// this will collect information about the current cache size and elements
- serverInstantThread.oneTimeJob(this, "cacheScan", log, 5000);
+ serverInstantThread.oneTimeJob(this, "cacheScan", this.log, 5000);
}
public int size() {
@@ -136,15 +136,15 @@ public final class plasmaHTCache {
}
public int dbSize() {
- return responseHeaderDB.size();
+ return this.responseHeaderDB.size();
}
public int[] dbCacheChunkSize() {
- return responseHeaderDB.cacheChunkSize();
+ return this.responseHeaderDB.cacheChunkSize();
}
public int[] dbCacheFillStatus() {
- return responseHeaderDB.cacheFillStatus();
+ return this.responseHeaderDB.cacheFillStatus();
}
public void push(Entry entry) {
@@ -157,17 +157,16 @@ public final class plasmaHTCache {
synchronized (this.cacheStack) {
if (this.cacheStack.size() > 0)
return (Entry) this.cacheStack.removeFirst();
- else
- return null;
+ return null;
}
}
public void storeHeader(String urlHash, httpHeader responseHeader) throws IOException {
- responseHeaderDB.set(urlHash, responseHeader);
+ this.responseHeaderDB.set(urlHash, responseHeader);
}
public long getFreeSize() {
- return (currCacheSize > maxCacheSize) ? 0 : maxCacheSize - currCacheSize;
+ return (this.currCacheSize > this.maxCacheSize) ? 0 : this.maxCacheSize - this.currCacheSize;
}
public boolean writeFile(URL url, byte[] array) {
@@ -181,10 +180,10 @@ public final class plasmaHTCache {
// this is the case of a "(Not a directory)" error, which should be prohibited
// by the shallStoreCache() property. However, sometimes the error still occurs
// In this case do nothing.
- log.logSevere("File storage failed (not a directory): " + e.getMessage());
+ this.log.logSevere("File storage failed (not a directory): " + e.getMessage());
return false;
} catch (IOException e) {
- log.logSevere("File storage failed (IO error): " + e.getMessage());
+ this.log.logSevere("File storage failed (IO error): " + e.getMessage());
return false;
}
writeFileAnnouncement(file);
@@ -192,10 +191,10 @@ public final class plasmaHTCache {
}
public void writeFileAnnouncement(File file) {
- synchronized (cacheAge) {
+ synchronized (this.cacheAge) {
if (file.exists()) {
- currCacheSize += file.length();
- cacheAge.put(ageString(file.lastModified(), file), file);
+ this.currCacheSize += file.length();
+ this.cacheAge.put(ageString(file.lastModified(), file), file);
cleanup();
}
}
@@ -209,22 +208,21 @@ public final class plasmaHTCache {
if (deleteFileandDirs(getCachePath(url), msg)) {
try {
// As the file is gone, the entry in responseHeader.db is not needed anymore
- log.logFinest("Trying to remove responseHeader from URL: " + url.toString());
- responseHeaderDB.remove(plasmaURL.urlHash(url));
+ this.log.logFinest("Trying to remove responseHeader from URL: " + url.toString());
+ this.responseHeaderDB.remove(plasmaURL.urlHash(url));
} catch (IOException e) {
- log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e);
+ this.log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e);
}
return true;
- } else {
- return false;
}
+ return false;
}
private boolean deleteFile(File obj) {
if (obj.exists() && !filesInUse.contains(obj)) {
long size = obj.length();
if (obj.delete()) {
- currCacheSize -= size;
+ this.currCacheSize -= size;
return true;
}
}
@@ -233,39 +231,38 @@ public final class plasmaHTCache {
private boolean deleteFileandDirs (File obj, String msg) {
if (deleteFile(obj)) {
- log.logInfo("DELETED " + msg + " CACHE : " + obj.toString());
+ this.log.logInfo("DELETED " + msg + " CACHE : " + obj.toString());
obj = obj.getParentFile();
// If the has been emptied, remove it
// Loop as long as we produce empty driectoriers, but stop at HTCACHE
- while ((!(obj.equals(cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) {
- if (obj.delete()) log.logInfo("DELETED EMPTY DIRECTORY : " + obj.toString());
+ while ((!(obj.equals(this.cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) {
+ if (obj.delete()) this.log.logInfo("DELETED EMPTY DIRECTORY : " + obj.toString());
obj = obj.getParentFile();
}
return true;
- } else {
- return false;
}
+ return false;
}
private void cleanupDoIt(long newCacheSize) {
- if (cacheAge.size() == 0) return;
+ if (this.cacheAge.size() == 0) return;
File obj;
- Iterator iter = cacheAge.keySet().iterator();
- while (iter.hasNext() && (currCacheSize >= newCacheSize)) {
+ Iterator iter = this.cacheAge.keySet().iterator();
+ while (iter.hasNext() && (this.currCacheSize >= newCacheSize)) {
Object key = iter.next();
- obj = (File) cacheAge.get(key);
+ obj = (File) this.cacheAge.get(key);
if (obj != null) {
if (filesInUse.contains(obj)) continue;
- log.logFinest("Trying to delete old file: " + obj.toString());
+ this.log.logFinest("Trying to delete old file: " + obj.toString());
if (deleteFileandDirs (obj, "OLD")) {
try {
// As the file is gone, the entry in responseHeader.db is not needed anymore
- log.logFinest("Trying to remove responseHeader for URL: " +
- getURL(cachePath ,obj).toString());
- responseHeaderDB.remove(plasmaURL.urlHash(getURL(cachePath ,obj)));
+ this.log.logFinest("Trying to remove responseHeader for URL: " +
+ getURL(this.cachePath ,obj).toString());
+ this.responseHeaderDB.remove(plasmaURL.urlHash(getURL(this.cachePath ,obj)));
} catch (IOException e) {
- log.logInfo("IOExeption removing response header from DB: " +
+ this.log.logInfo("IOExeption removing response header from DB: " +
e.getMessage(), e);
}
}
@@ -275,13 +272,13 @@ public final class plasmaHTCache {
private void cleanup() {
// clean up cache to have 4% (enough) space for next entries
- if ((currCacheSize >= maxCacheSize) && (cacheAge.size() > 0)) {
- if (maxCacheSize > 0) cleanupDoIt(maxCacheSize - ((maxCacheSize / 100) * 4));
+ if ((this.currCacheSize >= this.maxCacheSize) && (this.cacheAge.size() > 0)) {
+ if (this.maxCacheSize > 0) cleanupDoIt(this.maxCacheSize - ((this.maxCacheSize / 100) * 4));
}
}
public void close() throws IOException {
- responseHeaderDB.close();
+ this.responseHeaderDB.close();
}
private String ageString(long date, File f) {
@@ -299,7 +296,7 @@ public final class plasmaHTCache {
//log.logSystem("STARTING CACHE SCANNING");
kelondroMScoreCluster doms = new kelondroMScoreCluster();
int c = 0;
- enumerateFiles ef = new enumerateFiles(cachePath, true, false, true, true);
+ enumerateFiles ef = new enumerateFiles(this.cachePath, true, false, true, true);
File f;
while (ef.hasMoreElements()) {
c++;
@@ -307,19 +304,19 @@ public final class plasmaHTCache {
long d = f.lastModified();
//System.out.println("Cache: " + dom(f));
doms.incScore(dom(f));
- currCacheSize += f.length();
- cacheAge.put(ageString(d, f), f);
+ this.currCacheSize += f.length();
+ this.cacheAge.put(ageString(d, f), f);
}
//System.out.println("%" + (String) cacheAge.firstKey() + "=" + cacheAge.get(cacheAge.firstKey()));
long ageHours = 0;
try {
ageHours = (System.currentTimeMillis() -
- Long.parseLong(((String) cacheAge.firstKey()).substring(0, 16), 16)) / 3600000;
+ Long.parseLong(((String) this.cacheAge.firstKey()).substring(0, 16), 16)) / 3600000;
} catch (NumberFormatException e) {
//e.printStackTrace();
}
- log.logConfig("CACHE SCANNED, CONTAINS " + c +
- " FILES = " + currCacheSize/1048576 + "MB, OLDEST IS " +
+ this.log.logConfig("CACHE SCANNED, CONTAINS " + c +
+ " FILES = " + this.currCacheSize/1048576 + "MB, OLDEST IS " +
((ageHours < 24) ? (ageHours + " HOURS") : ((ageHours / 24) + " DAYS")) + " OLD");
cleanup();
@@ -333,18 +330,18 @@ public final class plasmaHTCache {
ip = httpc.dnsResolve(dom);
if (ip == null) continue;
result += ", " + dom + "=" + ip;
- log.logConfig("PRE-FILLED " + dom + "=" + ip);
+ this.log.logConfig("PRE-FILLED " + dom + "=" + ip);
c++;
doms.deleteScore(dom);
// wait a short while to prevent that this looks like a DoS
- try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {}
+ try {Thread.sleep(100);} catch (InterruptedException e) {}
}
- if (result.length() > 2) log.logConfig("PRE-FILLED DNS CACHE, FETCHED " + c +
+ if (result.length() > 2) this.log.logConfig("PRE-FILLED DNS CACHE, FETCHED " + c +
" ADDRESSES: " + result.substring(2));
}
private String dom(File f) {
- String s = f.toString().substring(cachePath.toString().length() + 1);
+ String s = f.toString().substring(this.cachePath.toString().length() + 1);
int p = s.indexOf("/");
if (p < 0) p = s.indexOf("\\");
if (p < 0) return null;
@@ -352,17 +349,17 @@ public final class plasmaHTCache {
}
public httpHeader getCachedResponse(String urlHash) throws IOException {
- Map hdb = responseHeaderDB.get(urlHash);
+ Map hdb = this.responseHeaderDB.get(urlHash);
if (hdb == null) return null;
return new httpHeader(null, hdb);
}
public boolean full() {
- return (cacheStack.size() > stackLimit);
+ return (this.cacheStack.size() > stackLimit);
}
public boolean empty() {
- return (cacheStack.size() == 0);
+ return (this.cacheStack.size() == 0);
}
public static boolean isPicture(httpHeader response) {
@@ -476,9 +473,8 @@ public final class plasmaHTCache {
return serverFileUtils.read(f);
} catch (IOException e) {
return null;
- } else {
- return null;
}
+ return null;
}
public static boolean isPOST(String urlString) {
@@ -534,14 +530,14 @@ public final class plasmaHTCache {
serverLog.logFine("PLASMA", "Entry: URL=" + url.toString());
this.nomalizedURLString = htmlFilterContentScraper.urlNormalform(url);
try {
- this.url = new URL(nomalizedURLString);
+ this.url = new URL(this.nomalizedURLString);
} catch (MalformedURLException e) {
System.out.println("internal error at httpdProxyCache.Entry: " + e);
System.exit(-1);
}
this.name = name;
this.cacheFile = getCachePath(this.url);
- this.nomalizedURLHash = plasmaCrawlLURL.urlHash(nomalizedURLString);
+ this.nomalizedURLHash = plasmaURL.urlHash(this.nomalizedURLString);
// assigned:
this.initDate = initDate;
@@ -562,10 +558,10 @@ public final class plasmaHTCache {
System.exit(0);
}
- lastModified = new Date(serverDate.correctedUTCTime());
+ this.lastModified = new Date(serverDate.correctedUTCTime());
} else {
- lastModified = responseHeader.lastModified();
- if (lastModified == null) lastModified = new Date(serverDate.correctedUTCTime()); // does not exist in header
+ this.lastModified = responseHeader.lastModified();
+ if (this.lastModified == null) this.lastModified = new Date(serverDate.correctedUTCTime()); // does not exist in header
}
this.doctype = plasmaWordIndexEntry.docType(responseHeader.mime());
if (this.doctype == plasmaWordIndexEntry.DT_UNKNOWN) this.doctype = plasmaWordIndexEntry.docType(url);
@@ -576,22 +572,23 @@ public final class plasmaHTCache {
}
public String name() {
- return name;
+ return this.name;
}
public String initiator() {
- return initiator;
+ return this.initiator;
}
public boolean proxy() {
return initiator() == null;
}
public long size() {
- if (cacheArray == null) return 0; else return cacheArray.length;
+ if (this.cacheArray == null) return 0;
+ return this.cacheArray.length;
}
public URL referrerURL() {
- if (requestHeader == null) return null;
+ if (this.requestHeader == null) return null;
try {
- return new URL((String) requestHeader.get(httpHeader.REFERER, ""));
+ return new URL((String) this.requestHeader.get(httpHeader.REFERER, ""));
} catch (Exception e) {
return null;
}
@@ -611,35 +608,35 @@ public final class plasmaHTCache {
// in case of FALSE, the reason as String is returned
// check profile
- if (!(profile.storeHTCache())) return "storage_not_wanted";
+ if (!(this.profile.storeHTCache())) return "storage_not_wanted";
// decide upon header information if a specific file should be stored to the cache or not
// if the storage was requested by prefetching, the request map is null
// check status code
- if (!((responseStatus.startsWith("200")) || (responseStatus.startsWith("203")))) return "bad_status_" + responseStatus.substring(0,3);
+ if (!((this.responseStatus.startsWith("200")) || (this.responseStatus.startsWith("203")))) return "bad_status_" + this.responseStatus.substring(0,3);
// check storage location
// sometimes a file name is equal to a path name in the same directory;
// or sometimes a file name is equal a directory name created earlier;
// we cannot match that here in the cache file path and therefore omit writing into the cache
- if ((cacheFile.getParentFile().isFile()) || (cacheFile.isDirectory())) return "path_ambiguous";
- if (cacheFile.toString().indexOf("..") >= 0) return "path_dangerous";
+ if ((this.cacheFile.getParentFile().isFile()) || (this.cacheFile.isDirectory())) return "path_ambiguous";
+ if (this.cacheFile.toString().indexOf("..") >= 0) return "path_dangerous";
// -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches
- if ((isPOST(nomalizedURLString)) && (!(profile.crawlingQ()))) return "dynamic_post";
- if (isCGI(nomalizedURLString)) return "dynamic_cgi";
+ if ((isPOST(this.nomalizedURLString)) && (!(this.profile.crawlingQ()))) return "dynamic_post";
+ if (isCGI(this.nomalizedURLString)) return "dynamic_cgi";
// -authorization cases in request
// authorization makes pages very individual, and therefore we cannot use the
// content in the cache
- if ((requestHeader != null) && (requestHeader.containsKey(httpHeader.AUTHORIZATION))) return "personalized";
+ if ((this.requestHeader != null) && (this.requestHeader.containsKey(httpHeader.AUTHORIZATION))) return "personalized";
// -ranges in request and response
// we do not cache partial content
- if ((requestHeader != null) && (requestHeader.containsKey(httpHeader.RANGE))) return "partial";
- if ((responseHeader != null) && (responseHeader.containsKey(httpHeader.CONTENT_RANGE))) return "partial";
+ if ((this.requestHeader != null) && (this.requestHeader.containsKey(httpHeader.RANGE))) return "partial";
+ if ((this.responseHeader != null) && (this.responseHeader.containsKey(httpHeader.CONTENT_RANGE))) return "partial";
// -if-modified-since in request
// we do not care about if-modified-since, because this case only occurres if the
@@ -657,8 +654,8 @@ public final class plasmaHTCache {
// -pragma in response
// if we have a pragma non-cache, we don't cache. usually if this is wanted from
// the server, it makes sense
- if ((responseHeader.containsKey(httpHeader.PRAGMA)) &&
- (((String) responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE"))) return "controlled_no_cache";
+ if ((this.responseHeader.containsKey(httpHeader.PRAGMA)) &&
+ (((String) this.responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE"))) return "controlled_no_cache";
// -expires in response
// we do not care about expires, because at the time this is called the data is
@@ -666,12 +663,12 @@ public final class plasmaHTCache {
// -cache-control in response
// the cache-control has many value options.
- String cacheControl = (String) responseHeader.get(httpHeader.CACHE_CONTROL);
+ String cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
if (cacheControl != null) {
cacheControl = cacheControl.trim().toUpperCase();
if (cacheControl.startsWith("MAX-AGE=")) {
// we need also the load date
- Date date = responseHeader.date();
+ Date date = this.responseHeader.date();
if (date == null) return "stale_no_date_given_in_response";
try {
long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
@@ -696,57 +693,57 @@ public final class plasmaHTCache {
// -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches
- if (isPOST(nomalizedURLString)) return false;
- if (isCGI(nomalizedURLString)) return false;
+ if (isPOST(this.nomalizedURLString)) return false;
+ if (isCGI(this.nomalizedURLString)) return false;
// -authorization cases in request
- if (requestHeader.containsKey(httpHeader.AUTHORIZATION)) return false;
+ if (this.requestHeader.containsKey(httpHeader.AUTHORIZATION)) return false;
// -ranges in request
// we do not cache partial content
- if ((requestHeader != null) && (requestHeader.containsKey(httpHeader.RANGE))) return false;
+ if ((this.requestHeader != null) && (this.requestHeader.containsKey(httpHeader.RANGE))) return false;
//Date d1, d2;
// -if-modified-since in request
// The entity has to be transferred only if it has
// been modified since the date given by the If-Modified-Since header.
- if (requestHeader.containsKey(httpHeader.IF_MODIFIED_SINCE)) {
+ if (this.requestHeader.containsKey(httpHeader.IF_MODIFIED_SINCE)) {
// checking this makes only sense if the cached response contains
// a Last-Modified field. If the field does not exist, we go the safe way
- if (!(responseHeader.containsKey(httpHeader.LAST_MODIFIED))) return false;
+ if (!(this.responseHeader.containsKey(httpHeader.LAST_MODIFIED))) return false;
// parse date
Date d1, d2;
- d2 = responseHeader.lastModified(); if (d2 == null) d2 = new Date(serverDate.correctedUTCTime());
- d1 = requestHeader.ifModifiedSince(); if (d1 == null) d1 = new Date(serverDate.correctedUTCTime());
+ d2 = this.responseHeader.lastModified(); if (d2 == null) d2 = new Date(serverDate.correctedUTCTime());
+ d1 = this.requestHeader.ifModifiedSince(); if (d1 == null) d1 = new Date(serverDate.correctedUTCTime());
// finally, we shall treat the cache as stale if the modification time is after the if-.. time
if (d2.after(d1)) return false;
}
- boolean isNotPicture = !isPicture(responseHeader);
+ boolean isNotPicture = !isPicture(this.responseHeader);
// -cookies in request
// unfortunately, we should reload in case of a cookie
// but we think that pictures can still be considered as fresh
- if ((requestHeader.containsKey(httpHeader.COOKIE)) && (isNotPicture)) return false;
+ if ((this.requestHeader.containsKey(httpHeader.COOKIE)) && (isNotPicture)) return false;
// -set-cookie in cached response
// this is a similar case as for COOKIE.
- if ((responseHeader.containsKey(httpHeader.SET_COOKIE)) && (isNotPicture)) return false; // too strong
- if ((responseHeader.containsKey(httpHeader.SET_COOKIE2)) && (isNotPicture)) return false; // too strong
+ if ((this.responseHeader.containsKey(httpHeader.SET_COOKIE)) && (isNotPicture)) return false; // too strong
+ if ((this.responseHeader.containsKey(httpHeader.SET_COOKIE2)) && (isNotPicture)) return false; // too strong
// -pragma in cached response
// logically, we would not need to care about no-cache pragmas in cached response headers,
// because they cannot exist since they are not written to the cache.
// So this IF should always fail..
- if ((responseHeader.containsKey(httpHeader.PRAGMA)) &&
- (((String) responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE"))) return false;
+ if ((this.responseHeader.containsKey(httpHeader.PRAGMA)) &&
+ (((String) this.responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE"))) return false;
// calculate often needed values for freshness attributes
- Date date = responseHeader.date();
- Date expires = responseHeader.expires();
- Date lastModified = responseHeader.lastModified();
- String cacheControl = (String) responseHeader.get(httpHeader.CACHE_CONTROL);
+ Date date = this.responseHeader.date();
+ Date expires = this.responseHeader.expires();
+ Date lastModified = this.responseHeader.lastModified();
+ String cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
// see for documentation also:
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 0c4736152..4f2e98159 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -358,7 +358,7 @@ public class plasmaSnippetCache {
}
}
- private void loadResourceFromWeb(URL url, int socketTimeout) throws IOException {
+ public void loadResourceFromWeb(URL url, int socketTimeout) throws IOException {
plasmaCrawlWorker.load(
url,
"",
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 8d4b811e2..34af738ff 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -1476,7 +1476,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
int p;
URL url;
plasmaCrawlLURL.Entry urlentry;
- String urlstring, urlname, filename;
+ String urlstring, urlname, filename, urlhash;
String host, hash, address, descr = "";
yacySeed seed;
plasmaSnippetCache.result snippet;
@@ -1484,6 +1484,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
while ((acc.hasMoreElements()) && (i < query.wantedResults)) {
urlentry = acc.nextElement();
url = urlentry.url();
+ urlhash = urlentry.hash();
host = url.getHost();
if (host.endsWith(".yacyh")) {
// translate host into current IP
@@ -1525,6 +1526,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} else {
prop.put("results_" + i + "_description", descr);
prop.put("results_" + i + "_url", urlstring);
+ prop.put("results_" + i + "_urlhash", urlhash);
prop.put("results_" + i + "_urlname", urlname);
prop.put("results_" + i + "_date", dateString(urlentry.moddate()));
prop.put("results_" + i + "_size", Long.toString(urlentry.size()));
diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java
index 5d522ca2f..39a3ea817 100644
--- a/source/de/anomic/server/serverCore.java
+++ b/source/de/anomic/server/serverCore.java
@@ -578,8 +578,7 @@ public final class serverCore extends serverAbstractThread implements serverThre
}
super.close();
- }
-
+ }
}
public final class SessionFactory implements org.apache.commons.pool.PoolableObjectFactory {
@@ -707,6 +706,10 @@ public final class serverCore extends serverAbstractThread implements serverThre
this.stopped = stopped;
}
+ public boolean isStopped() {
+ return this.stopped;
+ }
+
public void close() {
if (this.isAlive()) {
try {
@@ -817,7 +820,7 @@ public final class serverCore extends serverAbstractThread implements serverThre
this.running = true;
// The thread keeps running.
- while (!this.stopped && !Thread.interrupted()) {
+ while (!this.stopped && !this.isInterrupted()) {
if (this.done) {
// We are waiting for a task now.
synchronized (this) {
@@ -1114,5 +1117,11 @@ public final class serverCore extends serverAbstractThread implements serverThre
if (!this.theSessionPool.isClosed) this.theSessionPool.close();
super.finalize();
}
+
+ public static final void checkInterruption() throws InterruptedException {
+ Thread currentThread = Thread.currentThread();
+ if (currentThread.isInterrupted()) throw new InterruptedException();
+ if ((currentThread instanceof serverCore.Session) && ((serverCore.Session)currentThread).isStopped()) throw new InterruptedException();
+ }
}
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index 864f134f0..bf7e03e4c 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -696,19 +696,19 @@ public final class yacyClient {
try {
final plasmaSwitchboard sb = new plasmaSwitchboard(args[0], "httpProxy.init", "DATA/SETTINGS/httpProxy.conf");
final yacyCore core = new yacyCore(sb);
- core.peerActions.loadSeedLists();
- final yacySeed target = core.seedDB.getConnected(args[1]);
+ yacyCore.peerActions.loadSeedLists();
+ final yacySeed target = yacyCore.seedDB.getConnected(args[1]);
final String wordhashe = plasmaWordIndexEntry.word2hash("test");
//System.out.println("permission=" + permissionMessage(args[1]));
final HashMap result = nxTools.table(httpc.wget(
new URL("http://" + target.getAddress() +
- "/yacy/search.html?myseed=" + core.seedDB.mySeed.genSeedStr(null) +
+ "/yacy/search.html?myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
"&youare=" + target.hash + "&key=" +
- "&myseed=" + core.seedDB.mySeed.genSeedStr(null) +
+ "&myseed=" + yacyCore.seedDB.mySeed.genSeedStr(null) +
"&count=10&resource=global" +
"&query=" + wordhashe),
- 5000, null, null, core.seedDB.sb.remoteProxyHost, core.seedDB.sb.remoteProxyPort));
+ 5000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
System.out.println("Result=" + result.toString());
} catch (Exception e) {
e.printStackTrace();