From 208b5297f1ef6c0e3c7df5275b1412d3a979f95f Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 5 Jul 2007 22:56:37 +0000 Subject: [PATCH] enhanced handling of news records: result is a speedup of Surftips, Supporter, and Network page git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3954 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/CrawlStartSimple_p.java | 76 +++++++++---------- htroot/Network.java | 33 ++++---- htroot/News.java | 19 +++-- htroot/Supporter.html | 2 +- htroot/Supporter.java | 25 +++--- htroot/Surftips.java | 43 ++++++----- source/de/anomic/kelondro/kelondroStack.java | 51 ++++--------- .../de/anomic/plasma/plasmaCrawlBalancer.java | 2 +- .../de/anomic/plasma/plasmaSwitchboard.java | 6 ++ source/de/anomic/yacy/yacyNewsPool.java | 9 +++ source/de/anomic/yacy/yacyNewsQueue.java | 39 +++++++++- yacy.init | 5 ++ 12 files changed, 170 insertions(+), 140 deletions(-) diff --git a/htroot/CrawlStartSimple_p.java b/htroot/CrawlStartSimple_p.java index ecdf849ad..41fec93b5 100644 --- a/htroot/CrawlStartSimple_p.java +++ b/htroot/CrawlStartSimple_p.java @@ -24,8 +24,8 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import java.io.IOException; import java.util.Enumeration; +import java.util.Iterator; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaURL; @@ -101,56 +101,50 @@ public class CrawlStartSimple_p { boolean dark = true; // create other peer crawl table using YaCyNews - int availableNews = yacyCore.newsPool.size(yacyNewsPool.INCOMING_DB); + Iterator recordIterator = yacyCore.newsPool.recordIterator(yacyNewsPool.INCOMING_DB, true); int showedCrawl = 0; yacyNewsRecord record; yacySeed peer; String peername; - try { - for (int c = 0; c < availableNews; c++) { - record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c); - if (record == null) continue; - if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { - peer = yacyCore.seedDB.get(record.originator()); - if (peer == null) peername = record.originator(); else peername = peer.getName(); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", ((dark) ? 1 : 0)); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created()); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0); - showedCrawl++; - if (showedCrawl > 20) break; - } - + while (recordIterator.hasNext()) { + record = (yacyNewsRecord) recordIterator.next(); + if (record == null) continue; + if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { + peer = yacyCore.seedDB.get(record.originator()); + if (peer == null) peername = record.originator(); else peername = peer.getName(); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", ((dark) ? 1 : 0)); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created()); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0); + showedCrawl++; + if (showedCrawl > 20) break; } - } catch (IOException e) {} + } prop.put("otherCrawlStartInProgress", showedCrawl); // finished remote crawls - availableNews = yacyCore.newsPool.size(yacyNewsPool.PROCESSED_DB); + recordIterator = yacyCore.newsPool.recordIterator(yacyNewsPool.PROCESSED_DB, true); showedCrawl = 0; - try { - for (int c = 0; c < availableNews; c++) { - record = yacyCore.newsPool.get(yacyNewsPool.PROCESSED_DB, c); - if (record == null) continue; - if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { - peer = yacyCore.seedDB.get(record.originator()); - if (peer == null) peername = record.originator(); else peername = peer.getName(); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", ((dark) ? 1 : 0)); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created()); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", peername); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0); - showedCrawl++; - if (showedCrawl > 20) break; - } - + while (recordIterator.hasNext()) { + record = (yacyNewsRecord) recordIterator.next(); + if (record == null) continue; + if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { + peer = yacyCore.seedDB.get(record.originator()); + if (peer == null) peername = record.originator(); else peername = peer.getName(); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", ((dark) ? 1 : 0)); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created()); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", peername); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0); + showedCrawl++; + if (showedCrawl > 20) break; } - } catch (IOException e) {} + } prop.put("otherCrawlStartFinished", showedCrawl); diff --git a/htroot/Network.java b/htroot/Network.java index 895f2a2f4..094866d13 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -46,10 +46,10 @@ // javac -classpath .:../classes Network.java // if the shell's current path is HTROOT -import java.io.IOException; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -269,25 +269,22 @@ public class Network { final HashMap updatedWiki = new HashMap(); final HashMap updatedBlog = new HashMap(); final HashMap isCrawling = new HashMap(); - int availableNews = yacyCore.newsPool.size(yacyNewsPool.INCOMING_DB); - if (availableNews > 300) { availableNews = 300; } yacyNewsRecord record; - try { - for (int c = availableNews - 1; c >= 0; c--) { - record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c); - if (record == null) { - break; - } else if (record.category().equals(yacyNewsPool.CATEGORY_PROFILE_UPDATE)) { - updatedProfile.add(record.originator()); - } else if (record.category().equals(yacyNewsPool.CATEGORY_WIKI_UPDATE)) { - updatedWiki.put(record.originator(), record.attributes()); - } else if (record.category().equals(yacyNewsPool.CATEGORY_BLOG_ADD)) { - updatedBlog.put(record.originator(), record.attributes()); - } else if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { - isCrawling.put(record.originator(), record.attributes().get("startURL")); - } + Iterator recordIterator = yacyCore.newsPool.recordIterator(yacyNewsPool.INCOMING_DB, true); + while (recordIterator.hasNext()) { + record = (yacyNewsRecord) recordIterator.next(); + if (record == null) { + continue; + } else if (record.category().equals(yacyNewsPool.CATEGORY_PROFILE_UPDATE)) { + updatedProfile.add(record.originator()); + } else if (record.category().equals(yacyNewsPool.CATEGORY_WIKI_UPDATE)) { + updatedWiki.put(record.originator(), record.attributes()); + } else if (record.category().equals(yacyNewsPool.CATEGORY_BLOG_ADD)) { + updatedBlog.put(record.originator(), record.attributes()); + } else if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { + isCrawling.put(record.originator(), record.attributes().get("startURL")); } - } catch (IOException e) {} + } boolean dark = true; yacySeed seed; diff --git a/htroot/News.java b/htroot/News.java index dd118fdae..8f3ef766d 100644 --- a/htroot/News.java +++ b/htroot/News.java @@ -99,7 +99,7 @@ public class News { yacyCore.newsPool.clear(tableID); } else { while (yacyCore.newsPool.size(tableID) > 0) { - record = yacyCore.newsPool.get(tableID, 0); + record = (yacyNewsRecord) yacyCore.newsPool.recordIterator(tableID, true).next(); yacyCore.newsPool.moveOff(tableID, record.id()); } } @@ -127,14 +127,15 @@ public class News { if (yacyCore.seedDB == null) { } else { - int maxCount = yacyCore.newsPool.size(tableID); - if (maxCount > 300) maxCount = 300; - + int maxCount = Math.min(1000, yacyCore.newsPool.size(tableID)); + Iterator recordIterator = yacyCore.newsPool.recordIterator(tableID, false); yacyNewsRecord record; yacySeed seed; - for (int i = 0; i < maxCount; i++) try { - record = yacyCore.newsPool.get(tableID, i); + int i = 0; + while ((recordIterator.hasNext()) && (i < maxCount)) { + record = (yacyNewsRecord) recordIterator.next(); if (record == null) continue; + seed = yacyCore.seedDB.getConnected(record.originator()); if (seed == null) seed = yacyCore.seedDB.getDisconnected(record.originator()); String category = record.category(); @@ -199,8 +200,10 @@ public class News { prop.put("table_list_" + i + "_link", link); prop.put("table_list_" + i + "_title", title); prop.put("table_list_" + i + "_description", description); - } catch (IOException e) {e.printStackTrace();} - prop.put("table_list", maxCount); + + i++; + } + prop.put("table_list", i); } } diff --git a/htroot/Supporter.html b/htroot/Supporter.html index bf4005763..32383408d 100644 --- a/htroot/Supporter.html +++ b/htroot/Supporter.html @@ -48,7 +48,7 @@ #{/results}# -

+

...provided by YaCy peers with an URL in their profile. This shows only URLs from peers that are currently online.