From ce360ef43ef57ac950e5bb97f8abfb5deaff9176 Mon Sep 17 00:00:00 2001 From: low012 Date: Fri, 2 Mar 2007 21:09:28 +0000 Subject: [PATCH] *) no more HTML in plasmaCrawlProfile.java anymore *)
will not be displayed in items in Auto Filter Content on WatchCrawler_p.html anymore *) removed unnecessary replaceHTML() git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3425 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/WatchCrawler_p.html | 2 +- htroot/WatchCrawler_p.java | 74 +++++++++++-------- .../de/anomic/plasma/plasmaCrawlProfile.java | 21 +++--- 3 files changed, 57 insertions(+), 40 deletions(-) diff --git a/htroot/WatchCrawler_p.html b/htroot/WatchCrawler_p.html index 78dcbd071..d760e4673 100644 --- a/htroot/WatchCrawler_p.html +++ b/htroot/WatchCrawler_p.html @@ -179,7 +179,7 @@ #[filter]# #[crawlingIfOlder]# #[crawlingDomFilterDepth]# - #[crawlingDomFilterContent]# + #{crawlingDomFilterContent}##[item]#
#{/crawlingDomFilterContent}# #[crawlingDomMaxPages]# #(withQuery)#no::yes#(/withQuery)# #(storeCache)#no::yes#(/storeCache)# diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 94023d55e..3d6a30a14 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -1,29 +1,3 @@ -import java.io.File; -import java.io.Writer; -import java.net.MalformedURLException; -import java.util.Date; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; - -import de.anomic.data.wikiCode; -import de.anomic.htmlFilter.htmlFilterContentScraper; -import de.anomic.htmlFilter.htmlFilterWriter; -import de.anomic.http.httpHeader; -import de.anomic.kelondro.kelondroBitfield; -import de.anomic.net.URL; -import de.anomic.plasma.plasmaCrawlEURL; -import de.anomic.plasma.plasmaCrawlProfile; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.plasmaURL; -import de.anomic.server.serverFileUtils; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacyNewsRecord; - // WatchCrawler_p.java // (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany // first published 18.12.2006 on http://www.anomic.de @@ -51,6 +25,32 @@ import de.anomic.yacy.yacyNewsRecord; // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +import java.io.File; +import java.io.Writer; +import java.net.MalformedURLException; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import de.anomic.data.wikiCode; +import de.anomic.htmlFilter.htmlFilterContentScraper; +import de.anomic.htmlFilter.htmlFilterWriter; +import de.anomic.http.httpHeader; +import de.anomic.kelondro.kelondroBitfield; +import de.anomic.net.URL; +import de.anomic.plasma.plasmaCrawlEURL; +import de.anomic.plasma.plasmaCrawlProfile; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.plasmaURL; +import de.anomic.server.serverFileUtils; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacyNewsRecord; + public class WatchCrawler_p { // this servlet does NOT create the WatchCrawler page content! @@ -332,14 +332,28 @@ public class WatchCrawler_p { while (it.hasNext()) { profile = (plasmaCrawlProfile.entry) it.next(); prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0)); - prop.put("crawlProfiles_"+count+"_name", wikiCode.replaceHTML(profile.name())); - prop.put("crawlProfiles_"+count+"_startURL", wikiCode.replaceHTML(profile.startURL())); - prop.put("crawlProfiles_"+count+"_handle", wikiCode.replaceHTML(profile.handle())); + prop.put("crawlProfiles_"+count+"_name", profile.name()); + prop.put("crawlProfiles_"+count+"_startURL", profile.startURL()); + prop.put("crawlProfiles_"+count+"_handle", profile.handle()); prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth()); prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter()); prop.put("crawlProfiles_"+count+"_crawlingIfOlder", (profile.recrawlIfOlder() == Long.MAX_VALUE) ? "no re-crawl" : ""+profile.recrawlIfOlder()); prop.put("crawlProfiles_"+count+"_crawlingDomFilterDepth", (profile.domFilterDepth() == Integer.MAX_VALUE) ? "inactive" : Integer.toString(profile.domFilterDepth())); - prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", profile.domNames(true, domlistlength)); + + //start contrib [MN] + int i = 0; + String item; + while((i <= domlistlength) && !((item = profile.domName(true, i)).equals(""))){ + if(i == domlistlength){ + item = item + " ..."; + } + prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent_"+i+"_item", item); + i++; + } + + prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", i); + //end contrib [MN] + prop.put("crawlProfiles_"+count+"_crawlingDomMaxPages", (profile.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : ""+profile.domMaxPages()); prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0)); prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0)); diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java index 82009298f..d21ee5f59 100644 --- a/source/de/anomic/plasma/plasmaCrawlProfile.java +++ b/source/de/anomic/plasma/plasmaCrawlProfile.java @@ -47,6 +47,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; +import java.util.Set; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroDyn; @@ -461,21 +462,23 @@ public class plasmaCrawlProfile { if (domFilterDepth() == Integer.MAX_VALUE) return true; return doms.containsKey(domain); } - public String domNames(boolean attr, int maxlength) { + + public String domName(boolean attr, int index){ Iterator domnamesi = doms.entrySet().iterator(); - String domnames=""; + String domname=""; Map.Entry ey; DomProfile dp; - while (domnamesi.hasNext()) { + int i = 0; + while ((domnamesi.hasNext()) && (i < index)) { + ey = (Map.Entry) domnamesi.next(); + i++; + } + if(domnamesi.hasNext()){ ey = (Map.Entry) domnamesi.next(); dp = (DomProfile) ey.getValue(); - domnames += ((String) ey.getKey()) + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count + " ") : " ") + "
"; - if ((maxlength > 0) && (domnames.length() >= maxlength)) { - domnames = domnames.substring(0, maxlength-3) + "..."; - break; - } + domname = ((String) ey.getKey()) + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count) : " "); } - return domnames; + return domname; } } }