Re-crawl known URLs: |
diff --git a/htroot/CrawlStart_p.java b/htroot/CrawlStart_p.java
index 15077ecde..05a889303 100644
--- a/htroot/CrawlStart_p.java
+++ b/htroot/CrawlStart_p.java
@@ -24,6 +24,7 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+import de.anomic.crawler.CrawlProfile;
import de.anomic.http.httpRequestHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSwitchboardConstants;
@@ -44,7 +45,8 @@ public class CrawlStart_p {
prop.put("starturl", (intranet) ? repository : "http://");
prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0"));
- prop.put("crawlingFilter", (intranet) ? repository + ".*" : ".*");
+ prop.put("mustmatch", (intranet) ? repository + ".*" : CrawlProfile.MATCH_ALL);
+ prop.put("mustnotmatch", CrawlProfile.MATCH_NEVER);
prop.put("crawlingIfOlderCheck", "0");
prop.put("crawlingIfOlderUnitYearCheck", "0");
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index dfa10b9f4..5af704396 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -91,7 +91,8 @@ public class QuickCrawlLink_p {
final String title = post.get("title",null);
// getting other parameters if set
- final String crawlingFilter = post.get("crawlingFilter", ".*");
+ final String crawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL);
+ final String crawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER);
final int CrawlingDepth = Integer.parseInt(post.get("crawlingDepth", "0"));
final boolean crawlDynamic = post.get("crawlingQ", "").equals("on");
final boolean indexText = post.get("indexText", "on").equals("on");
@@ -129,11 +130,11 @@ public class QuickCrawlLink_p {
try {
pe = sb.webIndex.profilesActiveCrawls.newEntry(
crawlingStartURL.getHost(),
- crawlingStartURL,
- crawlingFilter,
- crawlingFilter,
- CrawlingDepth,
- CrawlingDepth,
+ crawlingStartURL,
+ CrawlProfile.KEYWORDS_USER,
+ crawlingMustMatch,
+ crawlingMustNotMatch,
+ CrawlingDepth,
60 * 24 * 30, // recrawlIfOlder (minutes); here: one month
-1, // domFilterDepth, if negative: no auto-filter
-1, // domMaxPages, if negative: no count restriction
diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java
index ea3a3651d..831a5a0de 100644
--- a/htroot/WatchCrawler_p.java
+++ b/htroot/WatchCrawler_p.java
@@ -123,16 +123,16 @@ public class WatchCrawler_p {
crawlingStart = (crawlingStartURL == null) ? null : crawlingStartURL.toNormalform(true, true);
// set the crawling filter
- String newcrawlingfilter = post.get("crawlingFilter", ".*");
- if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
-
+ String newcrawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL);
+ String newcrawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER);
+ if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL; // avoid that all urls are filtered out if bad value was submitted
+ // special cases:
if (crawlingStartURL!= null && fullDomain) {
- newcrawlingfilter = ".*" + crawlingStartURL.getHost() + ".*";
+ newcrawlingMustMatch = ".*" + crawlingStartURL.getHost() + ".*";
}
if (crawlingStart!= null && subPath && (pos = crawlingStart.lastIndexOf("/")) > 0) {
- newcrawlingfilter = crawlingStart.substring(0, pos + 1) + ".*";
+ newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";
}
- env.setConfig("crawlingFilter", newcrawlingfilter);
final boolean crawlOrder = post.get("crawlOrder", "off").equals("on");
env.setConfig("crawlOrder", (crawlOrder) ? "true" : "false");
@@ -183,12 +183,12 @@ public class WatchCrawler_p {
if ((crawlingStart == null || crawlingStartURL == null) /* || (!(crawlingStart.matches(newcrawlingfilter))) */) {
// print error message
prop.put("info", "4"); //crawlfilter does not match url
- prop.putHTML("info_newcrawlingfilter", newcrawlingfilter);
+ prop.putHTML("info_newcrawlingfilter", newcrawlingMustMatch);
prop.putHTML("info_crawlingStart", crawlingStart);
} else try {
// check if the crawl filter works correctly
- Pattern.compile(newcrawlingfilter);
+ Pattern.compile(newcrawlingMustMatch);
// stack request
// first delete old entry, if exists
@@ -201,8 +201,12 @@ public class WatchCrawler_p {
// stack url
sb.webIndex.profilesPassiveCrawls.removeEntry(crawlingStartURL.hash()); // if there is an old entry, delete it
final CrawlProfile.entry pe = sb.webIndex.profilesActiveCrawls.newEntry(
- crawlingStartURL.getHost(), crawlingStartURL, newcrawlingfilter, newcrawlingfilter,
- newcrawlingdepth, newcrawlingdepth,
+ crawlingStartURL.getHost(),
+ crawlingStartURL,
+ CrawlProfile.KEYWORDS_USER,
+ newcrawlingMustMatch,
+ newcrawlingMustNotMatch,
+ newcrawlingdepth,
crawlingIfOlder, crawlingDomFilterDepth, crawlingDomMaxPages,
crawlingQ,
indexText, indexMedia,
@@ -270,7 +274,7 @@ public class WatchCrawler_p {
}
} catch (final PatternSyntaxException e) {
prop.put("info", "4"); //crawlfilter does not match url
- prop.putHTML("info_newcrawlingfilter", newcrawlingfilter);
+ prop.putHTML("info_newcrawlingfilter", newcrawlingMustMatch);
prop.putHTML("info_error", e.getMessage());
} catch (final Exception e) {
// mist
@@ -286,7 +290,7 @@ public class WatchCrawler_p {
final String fileName = post.get("crawlingFile");
try {
// check if the crawl filter works correctly
- Pattern.compile(newcrawlingfilter);
+ Pattern.compile(newcrawlingMustMatch);
// loading the file content
final File file = new File(fileName);
@@ -306,7 +310,21 @@ public class WatchCrawler_p {
// creating a crawler profile
final yacyURL crawlURL = new yacyURL("file://" + file.toString(), null);
- final CrawlProfile.entry profile = sb.webIndex.profilesActiveCrawls.newEntry(fileName, crawlURL, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingIfOlder, crawlingDomFilterDepth, crawlingDomMaxPages, crawlingQ, indexText, indexMedia, storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw);
+ final CrawlProfile.entry profile = sb.webIndex.profilesActiveCrawls.newEntry(
+ fileName, crawlURL, CrawlProfile.KEYWORDS_USER,
+ newcrawlingMustMatch,
+ CrawlProfile.MATCH_NEVER,
+ newcrawlingdepth,
+ crawlingIfOlder,
+ crawlingDomFilterDepth,
+ crawlingDomMaxPages,
+ crawlingQ,
+ indexText,
+ indexMedia,
+ storeHTCache,
+ true,
+ crawlOrder,
+ xsstopw, xdstopw, xpstopw);
// pause local crawl here
sb.pauseCrawlJob(plasmaSwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
@@ -333,7 +351,7 @@ public class WatchCrawler_p {
} catch (final PatternSyntaxException e) {
// print error message
prop.put("info", "4"); //crawlfilter does not match url
- prop.putHTML("info_newcrawlingfilter", newcrawlingfilter);
+ prop.putHTML("info_newcrawlingfilter", newcrawlingMustMatch);
prop.putHTML("info_error", e.getMessage());
} catch (final Exception e) {
// mist
@@ -353,8 +371,10 @@ public class WatchCrawler_p {
// create a new profile
final CrawlProfile.entry pe = sb.webIndex.profilesActiveCrawls.newEntry(
- sitemapURLStr, sitemapURL, newcrawlingfilter, newcrawlingfilter,
- newcrawlingdepth, newcrawlingdepth,
+ sitemapURLStr, sitemapURL, CrawlProfile.KEYWORDS_USER,
+ newcrawlingMustMatch,
+ CrawlProfile.MATCH_NEVER,
+ newcrawlingdepth,
crawlingIfOlder, crawlingDomFilterDepth, crawlingDomMaxPages,
crawlingQ,
indexText, indexMedia,
diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java
index c750e1228..684a160fd 100644
--- a/htroot/sharedBlacklist_p.java
+++ b/htroot/sharedBlacklist_p.java
@@ -104,7 +104,6 @@ public class sharedBlacklist_p {
final String Hash = post.get("hash");
// generate the download URL
- String downloadURL = null;
String downloadURLOld = null;
if( sb.webIndex.seedDB != null ){ //no nullpointer error..
final yacySeed seed = sb.webIndex.seedDB.getConnected(Hash);
@@ -113,8 +112,6 @@ public class sharedBlacklist_p {
final String Port = seed.get(yacySeed.PORT, "8080");
final String peerName = seed.get(yacySeed.NAME, "<" + IP + ":" + Port + ">");
prop.putHTML("page_source", peerName);
-
- downloadURL = "http://" + IP + ":" + Port + "/xml/blacklists.xml";
downloadURLOld = "http://" + IP + ":" + Port + "/yacy/list.html?col=black";
} else {
prop.put("status", STATUS_PEER_UNKNOWN);//YaCy-Peer not found
diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java
index d2cfbe625..99ae67397 100644
--- a/source/de/anomic/crawler/CrawlProfile.java
+++ b/source/de/anomic/crawler/CrawlProfile.java
@@ -28,6 +28,8 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
import de.anomic.kelondro.kelondroBLOB;
import de.anomic.kelondro.kelondroBLOBHeap;
@@ -43,6 +45,20 @@ import de.anomic.yacy.yacyURL;
public class CrawlProfile {
+ public static final String MATCH_ALL = ".*";
+ public static final String MATCH_NEVER = "";
+ public static final HashSet NO_KEYWORDS = new HashSet(0);
+ public static final HashSet KEYWORDS_PROXY = word2set("xproxy");
+ public static final HashSet KEYWORDS_REMOTE = word2set("xremote");
+ public static final HashSet KEYWORDS_USER = word2set("xuser");
+ public static final HashSet KEYWORDS_SNIPPET = word2set("xsnippet");
+
+ private static final HashSet word2set(String word) {
+ HashSet s = new HashSet(1);
+ s.add(word);
+ return s;
+ }
+
static HashMap> domsCache = new HashMap>();
kelondroMap profileTable;
@@ -145,8 +161,11 @@ public class CrawlProfile {
return ne;
}
- public entry newEntry(final String name, final yacyURL startURL, final String generalFilter, final String specificFilter,
- final int generalDepth, final int specificDepth,
+ public entry newEntry( final String name,
+ final yacyURL startURL,
+ final Set keywords,
+ final String mustmatch, final String mustnotmatch,
+ final int generalDepth,
final long recrawlIfOlder /*date*/, final int domFilterDepth, final int domMaxPages,
final boolean crawlingQ,
final boolean indexText, final boolean indexMedia,
@@ -154,8 +173,11 @@ public class CrawlProfile {
final boolean remoteIndexing,
final boolean xsstopw, final boolean xdstopw, final boolean xpstopw) {
- final entry ne = new entry(name, startURL, generalFilter, specificFilter,
- generalDepth, specificDepth,
+ final entry ne = new entry(
+ name, startURL,
+ keywords,
+ mustmatch, mustnotmatch,
+ generalDepth,
recrawlIfOlder, domFilterDepth, domMaxPages,
crawlingQ,
indexText, indexMedia,
@@ -235,10 +257,9 @@ public class CrawlProfile {
public static final String HANDLE = "handle";
public static final String NAME = "name";
public static final String START_URL = "startURL";
- public static final String GENERAL_FILTER = "generalFilter";
- public static final String SPECIFIC_FILTER = "specificFilter";
- public static final String GENERAL_DEPTH = "generalDepth";
- public static final String SPECIFIC_DEPTH = "specificDepth";
+ public static final String FILTER_MUSTMATCH = "generalFilter";
+ public static final String FILTER_MUSTNOTMATCH = "nevermatch";
+ public static final String DEPTH = "generalDepth";
public static final String RECRAWL_IF_OLDER = "recrawlIfOlder";
public static final String DOM_FILTER_DEPTH = "domFilterDepth";
public static final String DOM_MAX_PAGES = "domMaxPages";
@@ -254,10 +275,16 @@ public class CrawlProfile {
Map mem;
private Map doms;
+ private Pattern mustmatch = null, mustnotmatch = null;
+
- public entry(final String name, final yacyURL startURL, final String generalFilter, final String specificFilter,
- final int generalDepth, final int specificDepth,
- final long recrawlIfOlder /*date*/, final int domFilterDepth, final int domMaxPages,
+ public entry(final String name, final yacyURL startURL,
+ final Set keywords,
+ final String mustmatch,
+ final String mustnotmatch,
+ final int depth,
+ final long recrawlIfOlder /*date*/,
+ final int domFilterDepth, final int domMaxPages,
final boolean crawlingQ,
final boolean indexText, final boolean indexMedia,
final boolean storeHTCache, final boolean storeTXCache,
@@ -269,10 +296,9 @@ public class CrawlProfile {
mem.put(HANDLE, handle);
mem.put(NAME, name);
mem.put(START_URL, (startURL == null) ? "" : startURL.toNormalform(true, false));
- mem.put(GENERAL_FILTER, (generalFilter == null) ? ".*" : generalFilter);
- mem.put(SPECIFIC_FILTER, (specificFilter == null) ? ".*" : specificFilter);
- mem.put(GENERAL_DEPTH, Integer.toString(generalDepth));
- mem.put(SPECIFIC_DEPTH, Integer.toString(specificDepth));
+ mem.put(FILTER_MUSTMATCH, (mustmatch == null) ? MATCH_ALL : mustmatch);
+ mem.put(FILTER_MUSTNOTMATCH, (mustnotmatch == null) ? MATCH_NEVER : mustnotmatch);
+ mem.put(DEPTH, Integer.toString(depth));
mem.put(RECRAWL_IF_OLDER, Long.toString(recrawlIfOlder));
mem.put(DOM_FILTER_DEPTH, Integer.toString(domFilterDepth));
mem.put(DOM_MAX_PAGES, Integer.toString(domMaxPages));
@@ -322,27 +348,24 @@ public class CrawlProfile {
final String r = mem.get(START_URL);
return r;
}
- public String generalFilter() {
- final String r = mem.get(GENERAL_FILTER);
- if (r == null) return ".*";
- return r;
- }
- public String specificFilter() {
- final String r = mem.get(SPECIFIC_FILTER);
- if (r == null) return ".*";
- return r;
+ public Pattern mustMatchPattern() {
+ if (this.mustmatch == null) {
+ String r = mem.get(FILTER_MUSTMATCH);
+ if (r == null) r = MATCH_ALL;
+ this.mustmatch = Pattern.compile(r);
+ }
+ return this.mustmatch;
}
- public int generalDepth() {
- final String r = mem.get(GENERAL_DEPTH);
- if (r == null) return 0;
- try {
- return Integer.parseInt(r);
- } catch (final NumberFormatException e) {
- return 0;
+ public Pattern mustNotMatchPattern() {
+ if (this.mustnotmatch == null) {
+ String r = mem.get(FILTER_MUSTNOTMATCH);
+ if (r == null) r = MATCH_NEVER;
+ this.mustnotmatch = Pattern.compile(r);
}
+ return this.mustnotmatch;
}
- public int specificDepth() {
- final String r = mem.get(SPECIFIC_DEPTH);
+ public int depth() {
+ final String r = mem.get(DEPTH);
if (r == null) return 0;
try {
return Integer.parseInt(r);
@@ -497,4 +520,5 @@ public class CrawlProfile {
return domname;
}
}
+
}
diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index 36450e566..2be78d90f 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -232,8 +232,9 @@ public class CrawlQueues {
+ ", initiator=" + urlEntry.initiator()
+ ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false")
+ ", depth=" + urlEntry.depth()
- + ", crawlDepth=" + profile.generalDepth()
- + ", filter=" + profile.generalFilter()
+ + ", crawlDepth=" + profile.depth()
+ + ", must-match=" + profile.mustMatchPattern().toString()
+ + ", must-not-match=" + profile.mustNotMatchPattern().toString()
+ ", permission=" + ((sb.webIndex.seedDB == null) ? "undefined" : (((sb.webIndex.seedDB.mySeed().isSenior()) || (sb.webIndex.seedDB.mySeed().isPrincipal())) ? "true" : "false")));
processLocalCrawling(urlEntry, stats);
diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java
index 8fea71b6e..660ed7fe7 100644
--- a/source/de/anomic/crawler/CrawlStacker.java
+++ b/source/de/anomic/crawler/CrawlStacker.java
@@ -409,14 +409,22 @@ public final class CrawlStacker extends Thread {
return errorMsg;
}
- // filter deny
- if ((entry.depth() > 0) && (!(entry.url().toString().matches(profile.generalFilter())))) {
- reason = "url does not match general filter";
- if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' does not match crawling filter '" + profile.generalFilter() + "'. " +
+ // filter with must-match
+ if ((entry.depth() > 0) && !profile.mustMatchPattern().matcher(entry.url().toString()).matches()) {
+ reason = "url does not match must-match filter";
+ if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' does not match must-match crawling filter '" + profile.mustMatchPattern().toString() + "'. " +
"Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
return reason;
}
-
+
+ // filter with must-not-match
+ if ((entry.depth() > 0) && profile.mustNotMatchPattern().matcher(entry.url().toString()).matches()) {
+ reason = "url matches must-not-match filter";
+ if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' does matches do-not-match crawling filter '" + profile.mustNotMatchPattern().toString() + "'. " +
+ "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
+ return reason;
+ }
+
// deny cgi
if (entry.url().isCGI()) {
reason = "cgi url not allowed";
@@ -486,7 +494,7 @@ public final class CrawlStacker extends Thread {
final boolean remote = profile.handle().equals(this.sb.webIndex.defaultRemoteProfile.handle());
final boolean global =
(profile.remoteIndexing()) /* granted */ &&
- (entry.depth() == profile.generalDepth()) /* leaf node */ &&
+ (entry.depth() == profile.depth()) /* leaf node */ &&
//(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
(
(sb.webIndex.seedDB.mySeed().isSenior()) ||
diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java
index e67c9d7b7..ab91f2ec7 100644
--- a/source/de/anomic/data/SitemapParser.java
+++ b/source/de/anomic/data/SitemapParser.java
@@ -330,26 +330,27 @@ public class SitemapParser extends DefaultHandler {
}
private CrawlProfile.entry createProfile(final String domainName, final yacyURL sitemapURL) {
- return this.sb.webIndex.profilesActiveCrawls.newEntry(domainName, sitemapURL,
- // crawlingFilter
- ".*", ".*",
- // Depth
- 0, 0,
- // force recrawling
- 0,
- // disable Auto-Dom-Filter
- -1, -1,
- // allow crawling of dynamic URLs
- true,
- // index text + media
- true, true,
- // don't store downloaded pages to Web Cache
- false,
- // store to TX cache
- true,
- // remote Indexing disabled
- false,
- // exclude stop-words
- true, true, true);
+ return this.sb.webIndex.profilesActiveCrawls.newEntry(
+ domainName, sitemapURL, CrawlProfile.KEYWORDS_USER,
+ // crawling Filter
+ CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
+ // Depth
+ 0,
+ // force recrawling
+ 0,
+ // disable Auto-Dom-Filter
+ -1, -1,
+ // allow crawling of dynamic URLs
+ true,
+ // index text + media
+ true, true,
+ // don't store downloaded pages to Web Cache
+ false,
+ // store to TX cache
+ true,
+ // remote Indexing disabled
+ false,
+ // exclude stop-words
+ true, true, true);
}
}
diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java
index 7f89cd545..6cbb6578e 100644
--- a/source/de/anomic/data/bookmarksDB.java
+++ b/source/de/anomic/data/bookmarksDB.java
@@ -226,22 +226,22 @@ public class bookmarksDB {
int pos = 0;
// set crawlingStart to BookmarkUrl
String crawlingStart = bm.getUrl();
- String newcrawlingfilter = crawlingfilter;
+ String newcrawlingMustMatch = crawlingfilter;
yacyURL crawlingStartURL = new yacyURL(crawlingStart, null);
// set the crawling filter
- if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
+ if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = ".*"; // avoid that all urls are filtered out if bad value was submitted
- if (crawlingStartURL!= null && newcrawlingfilter.equals("dom")) {
- newcrawlingfilter = ".*" + crawlingStartURL.getHost() + ".*";
+ if (crawlingStartURL!= null && newcrawlingMustMatch.equals("dom")) {
+ newcrawlingMustMatch = ".*" + crawlingStartURL.getHost() + ".*";
}
- if (crawlingStart!= null && newcrawlingfilter.equals("sub") && (pos = crawlingStart.lastIndexOf("/")) > 0) {
- newcrawlingfilter = crawlingStart.substring(0, pos + 1) + ".*";
+ if (crawlingStart!= null && newcrawlingMustMatch.equals("sub") && (pos = crawlingStart.lastIndexOf("/")) > 0) {
+ newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";
}
// check if the crawl filter works correctly
- Pattern.compile(newcrawlingfilter);
+ Pattern.compile(newcrawlingMustMatch);
String urlhash = crawlingStartURL.hash();
sb.webIndex.removeURL(urlhash);
@@ -251,8 +251,10 @@ public class bookmarksDB {
// stack url
sb.webIndex.profilesPassiveCrawls.removeEntry(crawlingStartURL.hash()); // if there is an old entry, delete it
CrawlProfile.entry pe = sb.webIndex.profilesActiveCrawls.newEntry(
- folder+"/"+crawlingStartURL, crawlingStartURL, newcrawlingfilter, newcrawlingfilter,
- newcrawlingdepth, newcrawlingdepth,
+ folder+"/"+crawlingStartURL, crawlingStartURL, CrawlProfile.KEYWORDS_USER,
+ newcrawlingMustMatch,
+ CrawlProfile.MATCH_NEVER,
+ newcrawlingdepth,
sb.webIndex.profilesActiveCrawls.getRecrawlDate(crawlingIfOlder), crawlingDomFilterDepth, crawlingDomMaxPages,
crawlingQ,
indexText, indexMedia,
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index e5ae12e4d..90c9b0a7a 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -1558,8 +1558,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch |