enhanced handling of news records:

result is a speedup of Surftips, Supporter, and Network page

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3954 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent f40566f9bb
commit 208b5297f1

@ -24,8 +24,8 @@
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.IOException;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.Iterator;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaURL;
@ -101,56 +101,50 @@ public class CrawlStartSimple_p {
boolean dark = true; boolean dark = true;
// create other peer crawl table using YaCyNews // create other peer crawl table using YaCyNews
int availableNews = yacyCore.newsPool.size(yacyNewsPool.INCOMING_DB); Iterator recordIterator = yacyCore.newsPool.recordIterator(yacyNewsPool.INCOMING_DB, true);
int showedCrawl = 0; int showedCrawl = 0;
yacyNewsRecord record; yacyNewsRecord record;
yacySeed peer; yacySeed peer;
String peername; String peername;
try { while (recordIterator.hasNext()) {
for (int c = 0; c < availableNews; c++) { record = (yacyNewsRecord) recordIterator.next();
record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c); if (record == null) continue;
if (record == null) continue; if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) {
if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { peer = yacyCore.seedDB.get(record.originator());
peer = yacyCore.seedDB.get(record.originator()); if (peer == null) peername = record.originator(); else peername = peer.getName();
if (peer == null) peername = record.originator(); else peername = peer.getName(); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", ((dark) ? 1 : 0));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", ((dark) ? 1 : 0)); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created());
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created()); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername);
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString());
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString());
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0);
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0); showedCrawl++;
showedCrawl++; if (showedCrawl > 20) break;
if (showedCrawl > 20) break;
}
} }
} catch (IOException e) {} }
prop.put("otherCrawlStartInProgress", showedCrawl); prop.put("otherCrawlStartInProgress", showedCrawl);
// finished remote crawls // finished remote crawls
availableNews = yacyCore.newsPool.size(yacyNewsPool.PROCESSED_DB); recordIterator = yacyCore.newsPool.recordIterator(yacyNewsPool.PROCESSED_DB, true);
showedCrawl = 0; showedCrawl = 0;
try { while (recordIterator.hasNext()) {
for (int c = 0; c < availableNews; c++) { record = (yacyNewsRecord) recordIterator.next();
record = yacyCore.newsPool.get(yacyNewsPool.PROCESSED_DB, c); if (record == null) continue;
if (record == null) continue; if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) {
if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { peer = yacyCore.seedDB.get(record.originator());
peer = yacyCore.seedDB.get(record.originator()); if (peer == null) peername = record.originator(); else peername = peer.getName();
if (peer == null) peername = record.originator(); else peername = peer.getName(); prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", ((dark) ? 1 : 0));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", ((dark) ? 1 : 0)); prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created());
prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created()); prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", peername);
prop.put("otherCrawlStartFinished_" + showedCrawl + "_peername", peername); prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString());
prop.put("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString());
prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0);
prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", (record.attributes().get("crawlingQ").equals("true")) ? 1 : 0); showedCrawl++;
showedCrawl++; if (showedCrawl > 20) break;
if (showedCrawl > 20) break;
}
} }
} catch (IOException e) {} }
prop.put("otherCrawlStartFinished", showedCrawl); prop.put("otherCrawlStartFinished", showedCrawl);

@ -46,10 +46,10 @@
// javac -classpath .:../classes Network.java // javac -classpath .:../classes Network.java
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.IOException;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -269,25 +269,22 @@ public class Network {
final HashMap updatedWiki = new HashMap(); final HashMap updatedWiki = new HashMap();
final HashMap updatedBlog = new HashMap(); final HashMap updatedBlog = new HashMap();
final HashMap isCrawling = new HashMap(); final HashMap isCrawling = new HashMap();
int availableNews = yacyCore.newsPool.size(yacyNewsPool.INCOMING_DB);
if (availableNews > 300) { availableNews = 300; }
yacyNewsRecord record; yacyNewsRecord record;
try { Iterator recordIterator = yacyCore.newsPool.recordIterator(yacyNewsPool.INCOMING_DB, true);
for (int c = availableNews - 1; c >= 0; c--) { while (recordIterator.hasNext()) {
record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c); record = (yacyNewsRecord) recordIterator.next();
if (record == null) { if (record == null) {
break; continue;
} else if (record.category().equals(yacyNewsPool.CATEGORY_PROFILE_UPDATE)) { } else if (record.category().equals(yacyNewsPool.CATEGORY_PROFILE_UPDATE)) {
updatedProfile.add(record.originator()); updatedProfile.add(record.originator());
} else if (record.category().equals(yacyNewsPool.CATEGORY_WIKI_UPDATE)) { } else if (record.category().equals(yacyNewsPool.CATEGORY_WIKI_UPDATE)) {
updatedWiki.put(record.originator(), record.attributes()); updatedWiki.put(record.originator(), record.attributes());
} else if (record.category().equals(yacyNewsPool.CATEGORY_BLOG_ADD)) { } else if (record.category().equals(yacyNewsPool.CATEGORY_BLOG_ADD)) {
updatedBlog.put(record.originator(), record.attributes()); updatedBlog.put(record.originator(), record.attributes());
} else if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { } else if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) {
isCrawling.put(record.originator(), record.attributes().get("startURL")); isCrawling.put(record.originator(), record.attributes().get("startURL"));
}
} }
} catch (IOException e) {} }
boolean dark = true; boolean dark = true;
yacySeed seed; yacySeed seed;

@ -99,7 +99,7 @@ public class News {
yacyCore.newsPool.clear(tableID); yacyCore.newsPool.clear(tableID);
} else { } else {
while (yacyCore.newsPool.size(tableID) > 0) { while (yacyCore.newsPool.size(tableID) > 0) {
record = yacyCore.newsPool.get(tableID, 0); record = (yacyNewsRecord) yacyCore.newsPool.recordIterator(tableID, true).next();
yacyCore.newsPool.moveOff(tableID, record.id()); yacyCore.newsPool.moveOff(tableID, record.id());
} }
} }
@ -127,14 +127,15 @@ public class News {
if (yacyCore.seedDB == null) { if (yacyCore.seedDB == null) {
} else { } else {
int maxCount = yacyCore.newsPool.size(tableID); int maxCount = Math.min(1000, yacyCore.newsPool.size(tableID));
if (maxCount > 300) maxCount = 300; Iterator recordIterator = yacyCore.newsPool.recordIterator(tableID, false);
yacyNewsRecord record; yacyNewsRecord record;
yacySeed seed; yacySeed seed;
for (int i = 0; i < maxCount; i++) try { int i = 0;
record = yacyCore.newsPool.get(tableID, i); while ((recordIterator.hasNext()) && (i < maxCount)) {
record = (yacyNewsRecord) recordIterator.next();
if (record == null) continue; if (record == null) continue;
seed = yacyCore.seedDB.getConnected(record.originator()); seed = yacyCore.seedDB.getConnected(record.originator());
if (seed == null) seed = yacyCore.seedDB.getDisconnected(record.originator()); if (seed == null) seed = yacyCore.seedDB.getDisconnected(record.originator());
String category = record.category(); String category = record.category();
@ -199,8 +200,10 @@ public class News {
prop.put("table_list_" + i + "_link", link); prop.put("table_list_" + i + "_link", link);
prop.put("table_list_" + i + "_title", title); prop.put("table_list_" + i + "_title", title);
prop.put("table_list_" + i + "_description", description); prop.put("table_list_" + i + "_description", description);
} catch (IOException e) {e.printStackTrace();}
prop.put("table_list", maxCount); i++;
}
prop.put("table_list", i);
} }
} }

@ -48,7 +48,7 @@
</div> </div>
<!-- link end --> <!-- link end -->
#{/results}# #{/results}#
<p class="afterSupporter"> <p class="aftersurftips">
...provided by YaCy peers with an URL in their profile. This shows only URLs from peers that are currently online.<br /> ...provided by YaCy peers with an URL in their profile. This shows only URLs from peers that are currently online.<br />
</p> </p>
<script type="text/javascript"> <script type="text/javascript">

@ -176,9 +176,10 @@ public class Supporter {
private static void accumulateVotes(HashMap negativeHashes, HashMap positiveHashes, int dbtype) { private static void accumulateVotes(HashMap negativeHashes, HashMap positiveHashes, int dbtype) {
int maxCount = Math.min(1000, yacyCore.newsPool.size(dbtype)); int maxCount = Math.min(1000, yacyCore.newsPool.size(dbtype));
yacyNewsRecord record; yacyNewsRecord record;
Iterator recordIterator = yacyCore.newsPool.recordIterator(dbtype, true);
for (int j = 0; j < maxCount; j++) try { int j = 0;
record = yacyCore.newsPool.get(dbtype, j); while ((recordIterator.hasNext()) && (j++ < maxCount)) {
record = (yacyNewsRecord) recordIterator.next();
if (record == null) continue; if (record == null) continue;
if (record.category().equals(yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD)) { if (record.category().equals(yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD)) {
@ -196,7 +197,7 @@ public class Supporter {
else positiveHashes.put(urlhash, new Integer(i.intValue() + factor)); else positiveHashes.put(urlhash, new Integer(i.intValue() + factor));
} }
} }
} catch (IOException e) {e.printStackTrace();} }
} }
private static void accumulateSupporter( private static void accumulateSupporter(
@ -204,18 +205,20 @@ public class Supporter {
HashMap negativeHashes, HashMap positiveHashes, int dbtype) { HashMap negativeHashes, HashMap positiveHashes, int dbtype) {
int maxCount = Math.min(1000, yacyCore.newsPool.size(dbtype)); int maxCount = Math.min(1000, yacyCore.newsPool.size(dbtype));
yacyNewsRecord record; yacyNewsRecord record;
Iterator recordIterator = yacyCore.newsPool.recordIterator(dbtype, true);
int j = 0;
String url = "", urlhash; String url = "", urlhash;
kelondroRow.Entry entry; kelondroRow.Entry entry;
int score = 0; int score = 0;
Integer vote; Integer vote;
yacySeed seed; yacySeed seed;
for (int j = 0; j < maxCount; j++) try { while ((recordIterator.hasNext()) && (j++ < maxCount)) {
record = yacyCore.newsPool.get(dbtype, j); record = (yacyNewsRecord) recordIterator.next();
if (record == null) continue; if (record == null) continue;
entry = null; entry = null;
if ((record.category().equals(yacyNewsPool.CATEGORY_PROFILE_UPDATE)) && if ((record.category().equals(yacyNewsPool.CATEGORY_PROFILE_UPDATE)) &&
((seed = yacyCore.seedDB.getConnected(record.originator())) != null)) { ((seed = yacyCore.seedDB.getConnected(record.originator())) != null)) try {
url = record.attribute("homepage", ""); url = record.attribute("homepage", "");
if (url.length() < 12) continue; if (url.length() < 12) continue;
entry = rowdef.newEntry(new byte[][]{ entry = rowdef.newEntry(new byte[][]{
@ -225,10 +228,10 @@ public class Supporter {
record.id().getBytes() record.id().getBytes()
}); });
score = 1 + timeFactor(record.created()); score = 1 + timeFactor(record.created());
} } catch (IOException e) {}
if ((record.category().equals(yacyNewsPool.CATEGORY_PROFILE_BROADCAST)) && if ((record.category().equals(yacyNewsPool.CATEGORY_PROFILE_BROADCAST)) &&
((seed = yacyCore.seedDB.getConnected(record.originator())) != null)) { ((seed = yacyCore.seedDB.getConnected(record.originator())) != null)) try {
url = record.attribute("homepage", ""); url = record.attribute("homepage", "");
if (url.length() < 12) continue; if (url.length() < 12) continue;
entry = rowdef.newEntry(new byte[][]{ entry = rowdef.newEntry(new byte[][]{
@ -238,7 +241,7 @@ public class Supporter {
record.id().getBytes() record.id().getBytes()
}); });
score = 1 + timeFactor(record.created()); score = 1 + timeFactor(record.created());
} } catch (IOException e) {}
// add/subtract votes and write record // add/subtract votes and write record
if (entry != null) { if (entry != null) {
@ -264,7 +267,7 @@ public class Supporter {
} }
} }
} catch (IOException e) {e.printStackTrace();} }
} }

@ -141,7 +141,7 @@ public class Surftips {
if ((url == null) || (title == null) || (description == null)) continue; if ((url == null) || (title == null) || (description == null)) continue;
refid = row.getColString(3, null); refid = row.getColString(3, null);
voted = false; voted = false;
try { try { // performance problem if published news is too full
voted = (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null) || voted = (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null) ||
(yacyCore.newsPool.getSpecific(yacyNewsPool.PUBLISHED_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null); (yacyCore.newsPool.getSpecific(yacyNewsPool.PUBLISHED_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null);
} catch (IOException e) { } catch (IOException e) {
@ -184,9 +184,10 @@ public class Surftips {
private static void accumulateVotes(HashMap negativeHashes, HashMap positiveHashes, int dbtype) { private static void accumulateVotes(HashMap negativeHashes, HashMap positiveHashes, int dbtype) {
int maxCount = Math.min(1000, yacyCore.newsPool.size(dbtype)); int maxCount = Math.min(1000, yacyCore.newsPool.size(dbtype));
yacyNewsRecord record; yacyNewsRecord record;
Iterator recordIterator = yacyCore.newsPool.recordIterator(dbtype, true);
for (int j = 0; j < maxCount; j++) try { int j = 0;
record = yacyCore.newsPool.get(dbtype, j); while ((recordIterator.hasNext()) && (j++ < maxCount)) {
record = (yacyNewsRecord) recordIterator.next();
if (record == null) continue; if (record == null) continue;
if (record.category().equals(yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD)) { if (record.category().equals(yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD)) {
@ -204,7 +205,7 @@ public class Surftips {
else positiveHashes.put(urlhash, new Integer(i.intValue() + factor)); else positiveHashes.put(urlhash, new Integer(i.intValue() + factor));
} }
} }
} catch (IOException e) {e.printStackTrace();} }
} }
private static void accumulateSurftips( private static void accumulateSurftips(
@ -212,16 +213,18 @@ public class Surftips {
HashMap negativeHashes, HashMap positiveHashes, int dbtype) { HashMap negativeHashes, HashMap positiveHashes, int dbtype) {
int maxCount = Math.min(1000, yacyCore.newsPool.size(dbtype)); int maxCount = Math.min(1000, yacyCore.newsPool.size(dbtype));
yacyNewsRecord record; yacyNewsRecord record;
Iterator recordIterator = yacyCore.newsPool.recordIterator(dbtype, true);
int j = 0;
String url = "", urlhash; String url = "", urlhash;
kelondroRow.Entry entry; kelondroRow.Entry entry;
int score = 0; int score = 0;
Integer vote; Integer vote;
for (int j = 0; j < maxCount; j++) try { while ((recordIterator.hasNext()) && (j++ < maxCount)) {
record = yacyCore.newsPool.get(dbtype, j); record = (yacyNewsRecord) recordIterator.next();
if (record == null) continue; if (record == null) continue;
entry = null; entry = null;
if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) { if (record.category().equals(yacyNewsPool.CATEGORY_CRAWL_START)) try {
String intention = record.attribute("intention", ""); String intention = record.attribute("intention", "");
url = record.attribute("startURL", ""); url = record.attribute("startURL", "");
if (url.length() < 12) continue; if (url.length() < 12) continue;
@ -232,9 +235,9 @@ public class Surftips {
record.id().getBytes() record.id().getBytes()
}); });
score = 2 + Math.min(10, intention.length() / 4) + timeFactor(record.created()); score = 2 + Math.min(10, intention.length() / 4) + timeFactor(record.created());
} } catch (IOException e) {}
if (record.category().equals(yacyNewsPool.CATEGORY_BOOKMARK_ADD)) { if (record.category().equals(yacyNewsPool.CATEGORY_BOOKMARK_ADD)) try {
url = record.attribute("url", ""); url = record.attribute("url", "");
if (url.length() < 12) continue; if (url.length() < 12) continue;
entry = rowdef.newEntry(new byte[][]{ entry = rowdef.newEntry(new byte[][]{
@ -244,9 +247,9 @@ public class Surftips {
record.id().getBytes() record.id().getBytes()
}); });
score = 8 + timeFactor(record.created()); score = 8 + timeFactor(record.created());
} } catch (IOException e) {}
if (record.category().equals(yacyNewsPool.CATEGORY_SURFTIPP_ADD)) { if (record.category().equals(yacyNewsPool.CATEGORY_SURFTIPP_ADD)) try {
url = record.attribute("url", ""); url = record.attribute("url", "");
if (url.length() < 12) continue; if (url.length() < 12) continue;
entry = rowdef.newEntry(new byte[][]{ entry = rowdef.newEntry(new byte[][]{
@ -256,9 +259,9 @@ public class Surftips {
record.id().getBytes() record.id().getBytes()
}); });
score = 5 + timeFactor(record.created()); score = 5 + timeFactor(record.created());
} } catch (IOException e) {}
if (record.category().equals(yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD)) { if (record.category().equals(yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD)) try {
if (!(record.attribute("vote", "negative").equals("positive"))) continue; if (!(record.attribute("vote", "negative").equals("positive"))) continue;
url = record.attribute("url", ""); url = record.attribute("url", "");
if (url.length() < 12) continue; if (url.length() < 12) continue;
@ -269,9 +272,9 @@ public class Surftips {
record.attribute("refid", "").getBytes() record.attribute("refid", "").getBytes()
}); });
score = 5 + timeFactor(record.created()); score = 5 + timeFactor(record.created());
} } catch (IOException e) {}
if (record.category().equals(yacyNewsPool.CATEGORY_WIKI_UPDATE)) { if (record.category().equals(yacyNewsPool.CATEGORY_WIKI_UPDATE)) try {
yacySeed seed = yacyCore.seedDB.getConnected(record.originator()); yacySeed seed = yacyCore.seedDB.getConnected(record.originator());
if (seed == null) seed = yacyCore.seedDB.getDisconnected(record.originator()); if (seed == null) seed = yacyCore.seedDB.getDisconnected(record.originator());
if (seed != null) { if (seed != null) {
@ -284,9 +287,9 @@ public class Surftips {
}); });
score = 4 + timeFactor(record.created()); score = 4 + timeFactor(record.created());
} }
} } catch (IOException e) {}
if (record.category().equals(yacyNewsPool.CATEGORY_BLOG_ADD)) { if (record.category().equals(yacyNewsPool.CATEGORY_BLOG_ADD)) try {
yacySeed seed = yacyCore.seedDB.getConnected(record.originator()); yacySeed seed = yacyCore.seedDB.getConnected(record.originator());
if (seed == null) seed = yacyCore.seedDB.getDisconnected(record.originator()); if (seed == null) seed = yacyCore.seedDB.getDisconnected(record.originator());
if (seed != null) { if (seed != null) {
@ -299,7 +302,7 @@ public class Surftips {
}); });
score = 4 + timeFactor(record.created()); score = 4 + timeFactor(record.created());
} }
} } catch (IOException e) {}
// add/subtract votes and write record // add/subtract votes and write record
if (entry != null) { if (entry != null) {
@ -325,7 +328,7 @@ public class Surftips {
} }
} }
} catch (IOException e) {e.printStackTrace();} }
} }

@ -107,12 +107,20 @@ public final class kelondroStack extends kelondroRecords {
return open(f, row); return open(f, row);
} }
public class Counter implements Iterator { public Iterator iterator(boolean up) {
// iterates the elements in an ordered way.
// returns kelondroRecords.Node - type Objects
return new nodeIterator(up);
}
public class nodeIterator implements Iterator {
Handle nextHandle = null; Handle nextHandle = null;
Handle lastHandle = null; Handle lastHandle = null;
boolean up;
public Counter() { public nodeIterator(boolean up) {
nextHandle = getHandle(root); this.up = up;
nextHandle = getHandle((up) ? root : toor);
} }
public boolean hasNext() { public boolean hasNext() {
@ -122,7 +130,7 @@ public final class kelondroStack extends kelondroRecords {
public Object next() { public Object next() {
lastHandle = nextHandle; lastHandle = nextHandle;
try { try {
nextHandle = getNode(nextHandle, null, 0, false).getOHHandle(right); nextHandle = getNode(nextHandle, null, 0, false).getOHHandle((up) ? right : left);
return getNode(lastHandle, null, 0, true); return getNode(lastHandle, null, 0, true);
} catch (IOException e) { } catch (IOException e) {
throw new kelondroException(filename, "IO error at Counter:next()"); throw new kelondroException(filename, "IO error at Counter:next()");
@ -288,39 +296,6 @@ public final class kelondroStack extends kelondroRecords {
if (h == null) return null; else return getNode(h, true); if (h == null) return null; else return getNode(h, true);
} }
public Iterator iterator() {
// iterates the elements in an ordered way.
// returns Node - type Objects
return new Counter();
}
public Iterator keyIterator() {
// iterates byte[] - objects
return new keyIterator(iterator());
}
public class keyIterator implements Iterator {
Iterator ni;
public keyIterator(Iterator i) {
ni = i;
}
public boolean hasNext() {
return ni.hasNext();
}
public Object next() {
return ((kelondroRecords.Node) ni.next()).getKey();
}
public void remove() {
ni.remove();
}
}
public int imp(File file, String separator) throws IOException { public int imp(File file, String separator) throws IOException {
// imports a value-separated file, returns number of records that have been read // imports a value-separated file, returns number of records that have been read
RandomAccessFile f = null; RandomAccessFile f = null;
@ -368,7 +343,7 @@ public final class kelondroStack extends kelondroRecords {
super.print(false); super.print(false);
Node n; Node n;
try { try {
Iterator it = iterator(); Iterator it = iterator(true);
kelondroRow.Entry r; kelondroRow.Entry r;
while (it.hasNext()) { while (it.hasNext()) {
n = (Node) it.next(); n = (Node) it.next();

@ -166,7 +166,7 @@ public class plasmaCrawlBalancer {
// iterate through the file stack // iterate through the file stack
// in general this is a bad idea. But this can only be avoided by avoidance of this method // in general this is a bad idea. But this can only be avoided by avoidance of this method
i = urlFileStack.iterator(); i = urlFileStack.iterator(true);
while (i.hasNext()) { while (i.hasNext()) {
h = new String(((kelondroRecords.Node) i.next()).getKey()); h = new String(((kelondroRecords.Node) i.next()).getKey());
if (h.equals(urlhash)) { if (h.equals(urlhash)) {

@ -1974,6 +1974,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
log.logFine("Cleaning Incoming News, " + yacyCore.newsPool.size(yacyNewsPool.INCOMING_DB) + " entries on stack"); log.logFine("Cleaning Incoming News, " + yacyCore.newsPool.size(yacyNewsPool.INCOMING_DB) + " entries on stack");
if (yacyCore.newsPool.automaticProcess() > 0) hasDoneSomething = true; if (yacyCore.newsPool.automaticProcess() > 0) hasDoneSomething = true;
} catch (IOException e) {} } catch (IOException e) {}
if (getConfigBool("cleanup.deletionProcessedNews", true)) {
yacyCore.newsPool.clear(yacyNewsPool.PROCESSED_DB);
}
if (getConfigBool("cleanup.deletionPublishedNews", true)) {
yacyCore.newsPool.clear(yacyNewsPool.PUBLISHED_DB);
}
// clean up seed-dbs // clean up seed-dbs
if(getConfigBool("routing.deleteOldSeeds.permission",true)) { if(getConfigBool("routing.deleteOldSeeds.permission",true)) {

@ -47,6 +47,7 @@ package de.anomic.yacy;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import de.anomic.net.URL; import de.anomic.net.URL;
@ -286,6 +287,12 @@ public class yacyNewsPool {
return newsDB.size(); return newsDB.size();
} }
public Iterator recordIterator(int dbKey, boolean up) {
// returns an iterator of yacyNewsRecord-type objects
yacyNewsQueue queue = switchQueue(dbKey);
return queue.records(up);
}
public void publishMyNews(yacyNewsRecord record) { public void publishMyNews(yacyNewsRecord record) {
// this shall be called if our peer generated a new news record and wants to publish it // this shall be called if our peer generated a new news record and wants to publish it
try { try {
@ -395,6 +402,7 @@ public class yacyNewsPool {
return false; return false;
} }
/*
public yacyNewsRecord get(int dbKey, int element) throws IOException { public yacyNewsRecord get(int dbKey, int element) throws IOException {
yacyNewsQueue queue = switchQueue(dbKey); yacyNewsQueue queue = switchQueue(dbKey);
yacyNewsRecord record = null; yacyNewsRecord record = null;
@ -410,6 +418,7 @@ public class yacyNewsPool {
} }
return record; return record;
} }
*/
public synchronized yacyNewsRecord getSpecific(int dbKey, String category, String key, String value) throws IOException { public synchronized yacyNewsRecord getSpecific(int dbKey, String category, String key, String value) throws IOException {
yacyNewsQueue queue = switchQueue(dbKey); yacyNewsQueue queue = switchQueue(dbKey);

@ -47,9 +47,11 @@ package de.anomic.yacy;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Date; import java.util.Date;
import java.util.Iterator;
import de.anomic.kelondro.kelondroColumn; import de.anomic.kelondro.kelondroColumn;
import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroStack; import de.anomic.kelondro.kelondroStack;
@ -62,8 +64,8 @@ public class yacyNewsQueue {
public static final kelondroRow rowdef = new kelondroRow(new kelondroColumn[]{ public static final kelondroRow rowdef = new kelondroRow(new kelondroColumn[]{
new kelondroColumn("newsid", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacyNewsRecord.idLength, "id = created + originator"), new kelondroColumn("newsid", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacyNewsRecord.idLength, "id = created + originator"),
new kelondroColumn("last touched", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacyCore.universalDateShortPattern.length(), "") new kelondroColumn("last touched", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacyCore.universalDateShortPattern.length(), "")
}, },
kelondroNaturalOrder.naturalOrder, 0 kelondroNaturalOrder.naturalOrder, 0
); );
public yacyNewsQueue(File path, yacyNewsDB newsDB) { public yacyNewsQueue(File path, yacyNewsDB newsDB) {
@ -160,5 +162,38 @@ public class yacyNewsQueue {
yacyCore.universalDateShortString(new Date()).getBytes()}); yacyCore.universalDateShortString(new Date()).getBytes()});
return b; return b;
} }
public Iterator records(boolean up) {
return new newsIterator(up);
}
public class newsIterator implements Iterator {
// iterates yacyNewsRecord-type objects
Iterator stackNodeIterator;
public newsIterator(boolean up) {
stackNodeIterator = queueStack.iterator(up);
}
public boolean hasNext() {
return stackNodeIterator.hasNext();
}
public Object next() {
kelondroRecords.Node n = (kelondroRecords.Node) stackNodeIterator.next();
try {
kelondroRow.Entry entry = queueStack.row().newEntry(n.getValueRow());
return b2r(entry);
} catch (IOException e) {
return null;
}
}
public void remove() {
stackNodeIterator.remove();
}
}
} }

@ -532,6 +532,11 @@ filterOutStopwordsFromTopwords=true
90_cleanup_busysleep=300000 90_cleanup_busysleep=300000
90_cleanup_memprereq=0 90_cleanup_memprereq=0
# cleanup-process:
# properties for tasks that are performed during cleanup
cleanup.deletionProcessedNews = true
cleanup.deletionPublishedNews = true
# multiprocessor-settings # multiprocessor-settings
# you may want to run time-consuming processes on several processors # you may want to run time-consuming processes on several processors
# the most time-consuming process is the indexing-Process # the most time-consuming process is the indexing-Process

Loading…
Cancel
Save