diff --git a/htroot/ProxyIndexingMonitor_p.java b/htroot/ProxyIndexingMonitor_p.java
index ca7cc7c03..91badc907 100644
--- a/htroot/ProxyIndexingMonitor_p.java
+++ b/htroot/ProxyIndexingMonitor_p.java
@@ -50,7 +50,6 @@ import java.io.File;
import java.io.IOException;
import de.anomic.http.httpHeader;
-import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@@ -110,14 +109,13 @@ public class ProxyIndexingMonitor_p {
sb.setCacheSize(Long.parseLong(newProxyCacheSize));
// implant these settings also into the crawling profile for the proxy
- plasmaCrawlProfile.entry profile = sb.profiles.getEntry(sb.getConfig("defaultProxyProfile", ""));
- if (profile == null) {
+ if (sb.defaultProxyProfile == null) {
prop.put("info", 1); //delete DATA/PLASMADB/crawlProfiles0.db
} else {
try {
- profile.changeEntry("generalDepth", Integer.toString(newProxyPrefetchDepth));
- profile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false");
- profile.changeEntry("remoteIndexing",proxyCrawlOrder ? "true":"false");
+ sb.defaultProxyProfile.changeEntry("generalDepth", Integer.toString(newProxyPrefetchDepth));
+ sb.defaultProxyProfile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false");
+ sb.defaultProxyProfile.changeEntry("remoteIndexing",proxyCrawlOrder ? "true":"false");
prop.put("info", 2);//new proxyPrefetchdepth
prop.put("info_message", newProxyPrefetchDepth);
diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java
index 441bdef2c..3d2f87f61 100644
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@@ -455,10 +455,10 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
requestDate, // init date
0, // crawling depth
url, // url
- "", // name of the url is unknown
- //requestHeader, // request headers
+ "", // name of the url is unknown
+ //requestHeader, // request headers
"200 OK", // request status
- //cachedResponseHeader, // response headers
+ //cachedResponseHeader, // response headers
cachedResInfo,
null, // initiator
switchboard.defaultProxyProfile // profile
diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java
index c05f08e5b..8d9918354 100644
--- a/source/de/anomic/plasma/plasmaCrawlEURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlEURL.java
@@ -205,7 +205,7 @@ public class plasmaCrawlEURL extends indexURL {
private String hash; // the url's hash
private String referrer; // the url's referrer hash
private String initiator; // the crawling initiator
- private String executor; // the crawling initiator
+ private String executor; // the crawling initiator
private URL url; // the url as string
private String name; // the name of the url, from anchor tag name
private Date initdate; // the time when the url was first time appeared
diff --git a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java
index 60929d606..cd6eb1cd8 100644
--- a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java
+++ b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java
@@ -65,7 +65,7 @@ public final class plasmaCrawlLoaderMessage {
// loadParallel(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) {
public plasmaCrawlLoaderMessage(
URL url,
- String name,
+ String name, // the name of the url, from anchor tag name
String referer,
String initiator,
int depth,
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 138baf738..6ee1f2de8 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -65,6 +65,7 @@ import de.anomic.plasma.crawler.plasmaCrawlerException;
import de.anomic.plasma.parser.ParserException;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySearch;
+import de.anomic.yacy.yacyCore;
public class plasmaSnippetCache {
@@ -209,10 +210,6 @@ public class plasmaSnippetCache {
if (resContent != null) {
// if the content was found
resContentLength = this.cacheManager.getResourceContentLength(url);
-
- // getting resource metadata
- resInfo = this.cacheManager.loadResourceInfo(url);
-
} else if (fetchOnline) {
// if not found try to download it
@@ -616,12 +613,12 @@ public class plasmaSnippetCache {
) throws plasmaCrawlerException {
plasmaHTCache.Entry result = this.sb.cacheLoader.loadSync(
- url,
- "",
- null,
- null,
- 0,
- null,
+ url, // the url
+ "", // name of the url, from anchor tag name
+ null, // referer
+ yacyCore.seedDB.mySeed.hash, // initiator
+ 0, // depth
+ sb.defaultSnippetProfile, // crawl profile
socketTimeout,
keepInMemory
);
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index e64c00f61..f5990ebaf 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -219,6 +219,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public plasmaCrawlProfile profiles;
public plasmaCrawlProfile.entry defaultProxyProfile;
public plasmaCrawlProfile.entry defaultRemoteProfile;
+ public plasmaCrawlProfile.entry defaultSnippetProfile;
public boolean rankingOn;
public plasmaRankingDistribution rankingOwnDistribution;
public plasmaRankingDistribution rankingOtherDistribution;
@@ -251,8 +252,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
/*
* Some constants
*/
- private static final String STR_PROXYPROFILE = "defaultProxyProfile";
- private static final String STR_REMOTEPROFILE = "defaultRemoteProfile";
private static final String STR_REMOTECRAWLTRIGGER = "REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER ";
private serverSemaphore shutdownSync = new serverSemaphore(0);
@@ -744,23 +743,35 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
private void initProfiles() {
- if ((this.profiles.size() == 0) ||
- (getConfig(STR_PROXYPROFILE, "").length() == 0) ||
- (this.profiles.getEntry(getConfig(STR_PROXYPROFILE, "")) == null)) {
+ this.defaultProxyProfile = null;
+ this.defaultRemoteProfile = null;
+ this.defaultSnippetProfile = null;
+ Iterator i = this.profiles.profiles(true);
+ plasmaCrawlProfile.entry profile;
+ String name;
+ while (i.hasNext()) {
+ profile = (plasmaCrawlProfile.entry) i.next();
+ name = profile.name();
+ if (name.equals("proxy")) this.defaultProxyProfile = profile;
+ if (name.equals("remote")) this.defaultRemoteProfile = profile;
+ if (name.equals("snippet")) this.defaultSnippetProfile = profile;
+ }
+ if (this.defaultProxyProfile == null) {
// generate new default entry for proxy crawling
- this.defaultProxyProfile = this.profiles.newEntry("proxy", "", ".*", ".*", Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), 60 * 24 * 30, -1, -1, false, true, true, true, getConfigBool("proxyCrawlOrder", false), true, true, true);
- setConfig(STR_PROXYPROFILE, this.defaultProxyProfile.handle());
- } else {
- this.defaultProxyProfile = this.profiles.getEntry(getConfig(STR_PROXYPROFILE, ""));
+ this.defaultProxyProfile = this.profiles.newEntry("proxy", "", ".*", ".*",
+ Integer.parseInt(getConfig("proxyPrefetchDepth", "0")),
+ Integer.parseInt(getConfig("proxyPrefetchDepth", "0")),
+ 60 * 24, -1, -1, false, true, true, true, getConfigBool("proxyCrawlOrder", false), true, true, true);
}
- if ((profiles.size() == 1) ||
- (getConfig(STR_REMOTEPROFILE, "").length() == 0) ||
- (profiles.getEntry(getConfig(STR_REMOTEPROFILE, "")) == null)) {
+ if (this.defaultRemoteProfile == null) {
// generate new default entry for remote crawling
- defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, 60 * 24 * 30, -1, -1, true, false, true, true, false, true, true, false);
- setConfig(STR_REMOTEPROFILE, defaultRemoteProfile.handle());
- } else {
- defaultRemoteProfile = profiles.getEntry(getConfig(STR_REMOTEPROFILE, ""));
+ defaultRemoteProfile = this.profiles.newEntry("remote", "", ".*", ".*", 0, 0,
+ -1, -1, -1, true, false, true, true, false, true, true, false);
+ }
+ if (this.defaultSnippetProfile == null) {
+ // generate new default entry for snippet fetch and optional crawling
+ defaultSnippetProfile = this.profiles.newEntry("snippet", "", ".*", ".*", 0, 0,
+ 60 * 24 * 30, -1, -1, true, true, true, true, false, true, true, false);
}
}
@@ -785,7 +796,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// getting next profile
entry = (plasmaCrawlProfile.entry) iter.next();
- if (!((entry.name().equals("proxy")) || (entry.name().equals("remote")))) {
+ if (!((entry.name().equals("proxy")) ||
+ (entry.name().equals("remote")) ||
+ (entry.name().equals("snippet")))) {
iter.remove();
hasDoneSomething = true;
}
diff --git a/yacy.init b/yacy.init
index 6033bb0ba..5171fa40d 100644
--- a/yacy.init
+++ b/yacy.init
@@ -439,11 +439,6 @@ crawlingQ=false
storeHTCache=false
storeTXCache=true
-# default crawl profile entries
-# if these entries are empty, then a new entry will be generated
-defaultProxyProfile=
-defaultRemoteProfile=
-
# peers may initiate remote crawling tasks.
# every peer may allow or disallow to be used as crawling-peer;
# you can also set a maximum crawl depth that can be requested or accepted