diff --git a/htroot/CrawlProfileEditor_p.html b/htroot/CrawlProfileEditor_p.html
index e52bf2b8e..42a331f28 100644
--- a/htroot/CrawlProfileEditor_p.html
+++ b/htroot/CrawlProfileEditor_p.html
@@ -32,11 +32,12 @@
#[name]# |
+ #[collections]# |
#(terminateButton)#::
Running
|
#[depth]# |
- #[mustmatch]# |
- #[mustnotmatch]# |
- #[crawlingIfOlder]# |
+ #[crawlerURLMustMatch]# |
+ #[crawlerURLMustNotMatch]# |
+ #[recrawlIfOlder]# |
#{crawlingDomFilterContent}##[item]# #{/crawlingDomFilterContent}# |
- #[crawlingDomMaxPages]# |
+ #[domMaxPages]# |
#(withQuery)#no::yes#(/withQuery)# |
- #(storeCache)#no::yes#(/storeCache)# |
+ #(storeHTCache)#no::yes#(/storeHTCache)# |
#(indexText)#no::yes#(/indexText)# |
#(indexMedia)#no::yes#(/indexMedia)# |
#(remoteIndexing)#no::yes#(/remoteIndexing)# |
diff --git a/htroot/CrawlProfileEditor_p.xml b/htroot/CrawlProfileEditor_p.xml
index 8d23bec3f..9ee5cf28c 100644
--- a/htroot/CrawlProfileEditor_p.xml
+++ b/htroot/CrawlProfileEditor_p.xml
@@ -2,23 +2,40 @@
#{crawlProfiles}#
+ #[handle]#
#[name]#
- #(status)#terminated::active::system#(/status)#
+ #[collections]#
+ #[agentName]#
+ #[userAgent]#
#[depth]#
- #[mustmatch]#
- #[mustnotmatch]#
- #[crawlingIfOlder]#
+ #(directDocByURL)#false::true#(/directDocByURL)#
+ #[recrawlIfOlder]#
+ #[domMaxPages]#
+ #(crawlingQ)#false::true#(/crawlingQ)#
+ #(followFrames)#false::true#(/followFrames)#
+ #(obeyHtmlRobotsNoindex)#false::true#(/obeyHtmlRobotsNoindex)#
+ #(obeyHtmlRobotsNofollow)#false::true#(/obeyHtmlRobotsNofollow)#
+ #(indexText)#false::true#(/indexText)#
+ #(indexMedia)#false::true#(/indexMedia)#
+ #(storeHTCache)#false::true#(/storeHTCache)#
+ #(remoteIndexing)#false::true#(/remoteIndexing)#
+ #[cacheStrategy]#
+ #[crawlerURLMustMatch]#
+ #[crawlerURLMustNotMatch]#
+ #[crawlerIPMustMatch]#
+ #[crawlerIPMustNotMatch]#
+ #[crawlerCountryMustMatch]#
+ #[crawlerNoLimitURLMustMatch]#
+ #[indexURLMustMatch]#
+ #[indexURLMustNotMatch]#
+ #[indexContentMustMatch]#
+ #[indexContentMustNotMatch]#
+ #(status)#terminated::active::system#(/status)#
#{crawlingDomFilterContent}#
- #[item]#
#{/crawlingDomFilterContent}#
- #[crawlingDomMaxPages]#
- #(withQuery)#no::yes#(/withQuery)#
- #(storeCache)#no::yes#(/storeCache)#
- #(indexText)#no::yes#(/indexText)#
- #(indexMedia)#no::yes#(/indexMedia)#
- #(remoteIndexing)#no::yes#(/remoteIndexing)#
#{/crawlProfiles}#
diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java
index 4fb782f60..909427d57 100644
--- a/source/net/yacy/crawler/data/CrawlProfile.java
+++ b/source/net/yacy/crawler/data/CrawlProfile.java
@@ -25,7 +25,6 @@
package net.yacy.crawler.data;
-import java.text.DateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
@@ -625,39 +624,58 @@ public class CrawlProfile extends ConcurrentHashMap implements M
boolean terminateButton = active && !CrawlSwitchboard.DEFAULT_PROFILES.contains(this.name());
boolean deleteButton = !active;
prop.put(CRAWL_PROFILE_PREFIX + count + "_dark", dark ? "1" : "0");
- prop.put(CRAWL_PROFILE_PREFIX + count + "_name", this.collectionName());
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_handle", this.handle());
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_name", this.name());
+ //prop.putXML(CRAWL_PROFILE_PREFIX + count + "_collection", this.get(COLLECTIONS)); // TODO: remove, replace with 'collections'
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_collections", this.get(COLLECTIONS));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_agentName", this.get(AGENT_NAME));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_userAgent", this.getAgent().userAgent);
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_depth", this.depth());
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_directDocByURL", this.directDocByURL() ? 1 : 0);
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_recrawlIfOlder", this.recrawlIfOlder() == Long.MAX_VALUE ? "eternity" : (new Date(this.recrawlIfOlder()).toString()));
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_domMaxPages", this.domMaxPages());
+ //prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingDomMaxPages", (this.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : Integer.toString(this.domMaxPages())); // TODO: remove, replace with 'domMaxPages'
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingQ", this.crawlingQ() ? 1 : 0);
+ //prop.put(CRAWL_PROFILE_PREFIX + count + "_withQuery", (this.crawlingQ()) ? "1" : "0"); // TODO: remove, replace with crawlingQ
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_followFrames", this.followFrames() ? 1 : 0);
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_obeyHtmlRobotsNoindex", this.obeyHtmlRobotsNoindex() ? 1 : 0);
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_obeyHtmlRobotsNofollow", this.obeyHtmlRobotsNofollow() ? 1 : 0);
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_indexText", this.indexText() ? 1 : 0);
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_indexMedia", this.indexMedia() ? 1 : 0);
+ //prop.put(CRAWL_PROFILE_PREFIX + count + "_storeCache", this.storeHTCache() ? 1 : 0); // TODO: remove, replace with 'storeHTCache'
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_storeHTCache", this.storeHTCache() ? 1 : 0);
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_remoteIndexing", this.remoteIndexing() ? 1 : 0);
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_cacheStrategy", this.get(CACHE_STRAGEGY));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerURLMustMatch", this.get(CRAWLER_URL_MUSTMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerURLMustNotMatch", this.get(CRAWLER_URL_MUSTNOTMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerIPMustMatch", this.get(CRAWLER_IP_MUSTMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerIPMustNotMatch", this.get(CRAWLER_IP_MUSTNOTMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerCountryMustMatch", this.get(CRAWLER_COUNTRY_MUSTMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerNoLimitURLMustMatch", this.get(CRAWLER_URL_NODEPTHLIMITMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_indexURLMustMatch", this.get(INDEXING_URL_MUSTMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_indexURLMustNotMatch", this.get(INDEXING_URL_MUSTNOTMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_indexContentMustMatch", this.get(INDEXING_CONTENT_MUSTMATCH));
+ prop.putXML(CRAWL_PROFILE_PREFIX + count + "_indexContentMustNotMatch", this.get(INDEXING_CONTENT_MUSTNOTMATCH));
+ //prop.putXML(CRAWL_PROFILE_PREFIX + count + "_mustmatch", this.urlMustMatchPattern().toString()); // TODO: remove, replace with crawlerURLMustMatch
+ //prop.putXML(CRAWL_PROFILE_PREFIX + count + "_mustnotmatch", this.urlMustNotMatchPattern().toString()); // TODO: remove, replace with crawlerURLMustNotMatch
+ //prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingIfOlder", (this.recrawlIfOlder() == 0L) ? "no re-crawl" : DateFormat.getDateTimeInstance().format(this.recrawlIfOlder())); // TODO: remove, replace with recrawlIfOlder
+ prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterDepth", "inactive");
prop.put(CRAWL_PROFILE_PREFIX + count + "_status", terminateButton ? 1 : deleteButton ? 0 : 2);
prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton", terminateButton);
prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", this.handle());
prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", deleteButton);
prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton_handle", this.handle());
- prop.put(CRAWL_PROFILE_PREFIX + count + "_handle", this.handle());
- prop.put(CRAWL_PROFILE_PREFIX + count + "_depth", this.depth());
- prop.put(CRAWL_PROFILE_PREFIX + count + "_mustmatch", this.urlMustMatchPattern().toString());
- prop.put(CRAWL_PROFILE_PREFIX + count + "_mustnotmatch", this.urlMustNotMatchPattern().toString());
- prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingIfOlder", (this.recrawlIfOlder() == 0L) ? "no re-crawl" : DateFormat.getDateTimeInstance().format(this.recrawlIfOlder()));
- prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterDepth", "inactive");
-
+
int i = 0;
- if (active && this.domMaxPages() > 0
- && this.domMaxPages() != Integer.MAX_VALUE) {
- String item;
- while (i <= domlistlength && !(item = this.domName(true, i)).isEmpty()){
- if (i == domlistlength) {
- item += " ...";
+ if (active && this.domMaxPages() > 0 && this.domMaxPages() != Integer.MAX_VALUE) {
+ String item;
+ while (i <= domlistlength && !(item = this.domName(true, i)).isEmpty()) {
+ if (i == domlistlength) item += " ...";
+ prop.putHTML(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterContent_" + i + "_item", item);
+ i++;
}
- prop.putHTML(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterContent_" + i + "_item", item);
- i++;
}
- }
-
prop.put(CRAWL_PROFILE_PREFIX+count+"_crawlingDomFilterContent", i);
- prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingDomMaxPages", (this.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : Integer.toString(this.domMaxPages()));
- prop.put(CRAWL_PROFILE_PREFIX + count + "_withQuery", (this.crawlingQ()) ? "1" : "0");
- prop.put(CRAWL_PROFILE_PREFIX + count + "_storeCache", (this.storeHTCache()) ? "1" : "0");
- prop.put(CRAWL_PROFILE_PREFIX + count + "_indexText", (this.indexText()) ? "1" : "0");
- prop.put(CRAWL_PROFILE_PREFIX + count + "_indexMedia", (this.indexMedia()) ? "1" : "0");
- prop.put(CRAWL_PROFILE_PREFIX + count + "_remoteIndexing", (this.remoteIndexing()) ? "1" : "0");
}
}
diff --git a/source/net/yacy/server/serverObjects.java b/source/net/yacy/server/serverObjects.java
index c7836e045..28f14f4a9 100644
--- a/source/net/yacy/server/serverObjects.java
+++ b/source/net/yacy/server/serverObjects.java
@@ -307,11 +307,11 @@ public class serverObjects implements Serializable, Cloneable {
* @see CharacterCoding#encodeUnicode2html(String, boolean)
*/
public void putHTML(final String key, final String value) {
- put(key, CharacterCoding.unicode2html(value, true));
+ put(key, value == null ? "" : CharacterCoding.unicode2html(value, true));
}
public void putHTML(final String key, final byte[] value) {
- putHTML(key, UTF8.String(value));
+ putHTML(key, value == null ? "" : UTF8.String(value));
}
/**
@@ -321,7 +321,7 @@ public class serverObjects implements Serializable, Cloneable {
* replaced in the returned String.
*/
public void putXML(final String key, final String value) {
- put(key, CharacterCoding.unicode2xml(value, true));
+ put(key, value == null ? "" : CharacterCoding.unicode2xml(value, true));
}
/**
@@ -332,9 +332,9 @@ public class serverObjects implements Serializable, Cloneable {
* @return
*/
public void put(final RequestHeader.FileType fileType, final String key, final String value) {
- if (fileType == FileType.JSON) putJSON(key, value);
- else if (fileType == FileType.XML) putXML(key, value);
- else putHTML(key, value);
+ if (fileType == FileType.JSON) putJSON(key, value == null ? "" : value);
+ else if (fileType == FileType.XML) putXML(key, value == null ? "" : value);
+ else putHTML(key, value == null ? "" : value);
}
/**