From 40da910f415e21ed5f44061b582a33a6bb800d55 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 2 Aug 2005 16:03:35 +0000 Subject: [PATCH] bugfixes and automatic news-cleanup git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@481 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/http/httpc.java | 2 +- .../kelondro/kelondroMScoreCluster.java | 131 ++++++++++-------- source/de/anomic/kelondro/kelondroMap.java | 1 + source/de/anomic/plasma/plasmaCrawlNURL.java | 5 +- .../de/anomic/plasma/plasmaSwitchboard.java | 22 ++- source/de/anomic/yacy/yacyNewsPool.java | 37 +++++ source/de/anomic/yacy/yacySeedDB.java | 4 +- 7 files changed, 139 insertions(+), 63 deletions(-) diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index ee54e1701..597bf0c15 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -344,7 +344,7 @@ public final class httpc { this.socket = null; this.socketOwner = null; throw new IOException("unknown host: " + server); - } + } } void reset() { diff --git a/source/de/anomic/kelondro/kelondroMScoreCluster.java b/source/de/anomic/kelondro/kelondroMScoreCluster.java index 06d2c5325..350b75495 100644 --- a/source/de/anomic/kelondro/kelondroMScoreCluster.java +++ b/source/de/anomic/kelondro/kelondroMScoreCluster.java @@ -48,8 +48,8 @@ import java.util.TreeMap; public class kelondroMScoreCluster { - private TreeMap refkeyDB; - private TreeMap keyrefDB; + private TreeMap refkeyDB; // a mapping from a reference to the cluster key + private TreeMap keyrefDB; // a mapping from the cluster key to the reference private long gcount; private int encnt; @@ -125,78 +125,92 @@ public class kelondroMScoreCluster { addScore(obj, 1); } - public synchronized void addScore(Object obj, int count) { + public synchronized void setScore(Object obj, int newScore) { if (obj == null) return; - Long cs = (Long) refkeyDB.get(obj); - long c; - int ec = count; - int en; - if (cs == null) { - // new entry - en = encnt++; + //System.out.println("setScore " + obj.getClass().getName()); + Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more + + if (usk == null) { + // set new value + usk = new Long(scoreKey(encnt++, newScore)); + + // put new value into cluster + refkeyDB.put(obj, usk); + keyrefDB.put(usk, obj); + } else { // delete old entry - keyrefDB.remove(cs); - c = cs.longValue(); - ec += (int) ((c & 0xFFFFFFFF00000000L) >> 32); - //System.out.println("Debug:" + ec); - en = (int) (c & 0xFFFFFFFFL); + keyrefDB.remove(usk); + + // get previous handle and score + long c = usk.longValue(); + int oldScore = (int) ((c & 0xFFFFFFFF00000000L) >> 32); + int oldHandle = (int) (c & 0xFFFFFFFFL); + gcount -= oldScore; + + // set new value + usk = new Long(scoreKey(oldHandle, newScore)); // generates an unique key for a specific score + refkeyDB.put(obj, usk); + keyrefDB.put(usk, obj); } - // set new value - c = scoreKey(en, ec); - cs = new Long(c); - Object oldcs = refkeyDB.remove(obj); if (oldcs != null) keyrefDB.remove(oldcs); // avoid memory leak - refkeyDB.put(obj, cs); - keyrefDB.put(cs, obj); - // increase overall counter - gcount += count; + gcount += newScore; } - public synchronized void setScore(Object obj, int count) { + public synchronized void addScore(Object obj, int incrementScore) { if (obj == null) return; //System.out.println("setScore " + obj.getClass().getName()); - Long cs = (Long) refkeyDB.get(obj); - long c; - int ec = count; - int en; - if (cs == null) { - // new entry - en = encnt++; + Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more + + if (usk == null) { + // set new value + usk = new Long(scoreKey(encnt++, incrementScore)); + + // put new value into cluster + refkeyDB.put(obj, usk); + keyrefDB.put(usk, obj); + } else { // delete old entry - keyrefDB.remove(cs); - c = cs.longValue(); - gcount -= (c & 0xFFFFFFFF00000000L) >> 32; - en = (int) (c & 0xFFFFFFFFL); + keyrefDB.remove(usk); + + // get previous handle and score + long c = usk.longValue(); + int oldScore = (int) ((c & 0xFFFFFFFF00000000L) >> 32); + int oldHandle = (int) (c & 0xFFFFFFFFL); + + // set new value + usk = new Long(scoreKey(oldHandle, oldScore + incrementScore)); // generates an unique key for a specific score + refkeyDB.put(obj, usk); + keyrefDB.put(usk, obj); + } - // set new value - c = scoreKey(en, ec); - cs = new Long(c); - Object oldcs = refkeyDB.remove(obj); if (oldcs != null) keyrefDB.remove(oldcs); // avoid memory leak - refkeyDB.put(obj, cs); - keyrefDB.put(cs, obj); - // increase overall counter - gcount += count; + gcount += incrementScore; } public synchronized int deleteScore(Object obj) { - if (obj == null) return -1; - Long cs = (Long) refkeyDB.get(obj); - if (cs == null) { - return -1; + // deletes entry and returns previous score + if (obj == null) return 0; + //System.out.println("setScore " + obj.getClass().getName()); + Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more + + if (usk == null) { + return 0; } else { - // delete entry - keyrefDB.remove(cs); - refkeyDB.remove(obj); + // delete old entry + keyrefDB.remove(usk); + + // get previous handle and score + int oldScore = (int) ((usk.longValue() & 0xFFFFFFFF00000000L) >> 32); + // decrease overall counter - long oldScore = (cs.longValue() & 0xFFFFFFFF00000000L) >> 32; gcount -= oldScore; - return (int) oldScore; - } + + return oldScore; + } } public synchronized boolean existsScore(Object obj) { @@ -255,6 +269,10 @@ public class kelondroMScoreCluster { return s; } + public String toString() { + return refkeyDB + " / " + keyrefDB; + } + public synchronized Iterator scores(boolean up) { if (up) return new simpleScoreIterator(); else return scores(false, Integer.MIN_VALUE, Integer.MAX_VALUE); @@ -288,8 +306,7 @@ public class kelondroMScoreCluster { int score = (max + min) / 2; while (keyrefDBcopy.size() > 0) { key = (Long) ((up) ? keyrefDBcopy.firstKey() : keyrefDBcopy.lastKey()); - n = keyrefDBcopy.get(key); - keyrefDBcopy.remove(key); + n = keyrefDBcopy.remove(key); score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32); if ((score >= min) && (score <= max)) return; if (((up) && (score > max)) || ((!(up)) && (score < min))) { @@ -338,6 +355,10 @@ public class kelondroMScoreCluster { } public static void main(String[] args) { + + if (args.length > 0) System.out.println("score of " + args[0] + ": " + string2score(args[0])); + //System.exit(0); + System.out.println("Test for Score: start"); kelondroMScoreCluster s = new kelondroMScoreCluster(); int c = 0; diff --git a/source/de/anomic/kelondro/kelondroMap.java b/source/de/anomic/kelondro/kelondroMap.java index 7704f4033..9308d0590 100644 --- a/source/de/anomic/kelondro/kelondroMap.java +++ b/source/de/anomic/kelondro/kelondroMap.java @@ -283,6 +283,7 @@ public class kelondroMap { if (sortClusterMap == null) return null; kelondroMScoreCluster cluster = (kelondroMScoreCluster) sortClusterMap.get(field); if (cluster == null) return null; // sort field does not exist + //System.out.println("DEBUG: cluster for field " + field + ": " + cluster.toString()); return cluster.scores(up); } diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index 4ca9b5ddb..a97c95f7f 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -362,7 +362,10 @@ public class plasmaCrawlNURL extends plasmaURL { this.flags = new bitfield(entry[10]); this.handle = Integer.parseInt(new String(entry[11])); return; - } + } else { + // show that we found nothing + this.url = null; + } } catch (Exception e) { } } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 391c3e3e8..cd3745e89 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -451,18 +451,24 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser initProfiles(); } catch (IOException e) {} } - public void cleanProfiles() { - if ((sbQueue.size() > 0) || (cacheLoader.size() > 0) || (urlPool.noticeURL.stackSize() > 0)) return; + public boolean cleanProfiles() { + if ((sbQueue.size() > 0) || (cacheLoader.size() > 0) || (urlPool.noticeURL.stackSize() > 0)) return false; Iterator i = profiles.profiles(true); plasmaCrawlProfile.entry entry; + boolean hasDoneSomething = false; try { while (i.hasNext()) { entry = (plasmaCrawlProfile.entry) i.next(); - if (!((entry.name().equals("proxy")) || (entry.name().equals("remote")))) i.remove(); + if (!((entry.name().equals("proxy")) || (entry.name().equals("remote")))) { + i.remove(); + hasDoneSomething = true; + } } } catch (kelondroException e) { resetProfiles(); + hasDoneSomething = true; } + return hasDoneSomething; } public plasmaHTCache getCacheManager() { @@ -623,7 +629,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } } // clean up profiles - cleanProfiles(); + if (cleanProfiles()) hasDoneSomething = true; + + // clean up news + try { + if (yacyCore.newsPool.automaticProcess() > 0) hasDoneSomething = true; + } catch (IOException e) {} + return hasDoneSomething; } @@ -696,7 +708,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // do a local crawl plasmaCrawlNURL.Entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE); String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; - if (urlEntry.url() == null) { + if ((urlEntry.url() == null) || (urlEntry.url().toString().length() < 10)) { log.logError(stats + ": urlEntry.url() == null"); return true; } diff --git a/source/de/anomic/yacy/yacyNewsPool.java b/source/de/anomic/yacy/yacyNewsPool.java index 27d31dc7e..1d1cf02ba 100644 --- a/source/de/anomic/yacy/yacyNewsPool.java +++ b/source/de/anomic/yacy/yacyNewsPool.java @@ -137,6 +137,43 @@ public class yacyNewsPool { return switchQueue(dbKey).size(); } + public int automaticProcess() throws IOException { + // processes news in the incoming-db + // returns number of processes + yacyNewsRecord record; + int pc = 0; + synchronized (incomingNews) { + for (int i = incomingNews.size() - 1; i >= 0; i--) { + record = incomingNews.top(i); + if (automaticProcessP(record)) { + incomingNews.pop(i); + processedNews.push(record); + //newsDB.remove(id); + pc++; + } + } + } + return pc; + } + + private boolean automaticProcessP(yacyNewsRecord record) { + if (record == null) return false; + if ((record.category().equals("wiki_upd")) && + (yacyCore.universalTime() - record.created().getTime() > 1000 * 60 * 60 * 24 /* 1 Day */)) { + return true; + } + if ((record.category().equals("crwlstrt")) && + (yacyCore.universalTime() - record.created().getTime() > 1000 * 60 * 60 /* 1 Hour */)) { + yacySeed seed = yacyCore.seedDB.get(record.originator()); + try { + return (Integer.parseInt(seed.get("ISpeed", "-")) < 10); + } catch (NumberFormatException ee) { + return true; + } + } + return false; + } + public yacyNewsRecord get(int dbKey, int element) throws IOException { yacyNewsQueue queue = switchQueue(dbKey); yacyNewsRecord record; diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 0215438e5..5fdf56e2d 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -680,8 +680,10 @@ public class yacySeedDB { e.printStackTrace(); if (database == seedActiveDB) seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile); if (database == seedPassiveDB) seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile); + if (database == seedPotentialDB) seedPotentialDB = resetSeedTable(seedPotentialDB, seedPotentialDBFile); it = null; - } } + } + } public boolean hasMoreElements() { return (nextSeed != null);