bugfixes and automatic news-cleanup

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@481 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 228b04b499
commit 40da910f41

@ -48,8 +48,8 @@ import java.util.TreeMap;
public class kelondroMScoreCluster { public class kelondroMScoreCluster {
private TreeMap refkeyDB; private TreeMap refkeyDB; // a mapping from a reference to the cluster key
private TreeMap keyrefDB; private TreeMap keyrefDB; // a mapping from the cluster key to the reference
private long gcount; private long gcount;
private int encnt; private int encnt;
@ -125,77 +125,91 @@ public class kelondroMScoreCluster {
addScore(obj, 1); addScore(obj, 1);
} }
public synchronized void addScore(Object obj, int count) { public synchronized void setScore(Object obj, int newScore) {
if (obj == null) return; if (obj == null) return;
Long cs = (Long) refkeyDB.get(obj); //System.out.println("setScore " + obj.getClass().getName());
long c; Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
int ec = count;
int en; if (usk == null) {
if (cs == null) { // set new value
// new entry usk = new Long(scoreKey(encnt++, newScore));
en = encnt++;
// put new value into cluster
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
} else { } else {
// delete old entry // delete old entry
keyrefDB.remove(cs); keyrefDB.remove(usk);
c = cs.longValue();
ec += (int) ((c & 0xFFFFFFFF00000000L) >> 32); // get previous handle and score
//System.out.println("Debug:" + ec); long c = usk.longValue();
en = (int) (c & 0xFFFFFFFFL); int oldScore = (int) ((c & 0xFFFFFFFF00000000L) >> 32);
} int oldHandle = (int) (c & 0xFFFFFFFFL);
gcount -= oldScore;
// set new value // set new value
c = scoreKey(en, ec); usk = new Long(scoreKey(oldHandle, newScore)); // generates an unique key for a specific score
cs = new Long(c); refkeyDB.put(obj, usk);
Object oldcs = refkeyDB.remove(obj); if (oldcs != null) keyrefDB.remove(oldcs); // avoid memory leak keyrefDB.put(usk, obj);
refkeyDB.put(obj, cs); }
keyrefDB.put(cs, obj);
// increase overall counter // increase overall counter
gcount += count; gcount += newScore;
} }
public synchronized void setScore(Object obj, int count) { public synchronized void addScore(Object obj, int incrementScore) {
if (obj == null) return; if (obj == null) return;
//System.out.println("setScore " + obj.getClass().getName()); //System.out.println("setScore " + obj.getClass().getName());
Long cs = (Long) refkeyDB.get(obj); Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
long c;
int ec = count; if (usk == null) {
int en; // set new value
if (cs == null) { usk = new Long(scoreKey(encnt++, incrementScore));
// new entry
en = encnt++; // put new value into cluster
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
} else { } else {
// delete old entry // delete old entry
keyrefDB.remove(cs); keyrefDB.remove(usk);
c = cs.longValue();
gcount -= (c & 0xFFFFFFFF00000000L) >> 32; // get previous handle and score
en = (int) (c & 0xFFFFFFFFL); long c = usk.longValue();
} int oldScore = (int) ((c & 0xFFFFFFFF00000000L) >> 32);
int oldHandle = (int) (c & 0xFFFFFFFFL);
// set new value // set new value
c = scoreKey(en, ec); usk = new Long(scoreKey(oldHandle, oldScore + incrementScore)); // generates an unique key for a specific score
cs = new Long(c); refkeyDB.put(obj, usk);
Object oldcs = refkeyDB.remove(obj); if (oldcs != null) keyrefDB.remove(oldcs); // avoid memory leak keyrefDB.put(usk, obj);
refkeyDB.put(obj, cs);
keyrefDB.put(cs, obj); }
// increase overall counter // increase overall counter
gcount += count; gcount += incrementScore;
} }
public synchronized int deleteScore(Object obj) { public synchronized int deleteScore(Object obj) {
if (obj == null) return -1; // deletes entry and returns previous score
Long cs = (Long) refkeyDB.get(obj); if (obj == null) return 0;
if (cs == null) { //System.out.println("setScore " + obj.getClass().getName());
return -1; Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) {
return 0;
} else { } else {
// delete entry // delete old entry
keyrefDB.remove(cs); keyrefDB.remove(usk);
refkeyDB.remove(obj);
// get previous handle and score
int oldScore = (int) ((usk.longValue() & 0xFFFFFFFF00000000L) >> 32);
// decrease overall counter // decrease overall counter
long oldScore = (cs.longValue() & 0xFFFFFFFF00000000L) >> 32;
gcount -= oldScore; gcount -= oldScore;
return (int) oldScore;
return oldScore;
} }
} }
@ -255,6 +269,10 @@ public class kelondroMScoreCluster {
return s; return s;
} }
public String toString() {
return refkeyDB + " / " + keyrefDB;
}
public synchronized Iterator scores(boolean up) { public synchronized Iterator scores(boolean up) {
if (up) return new simpleScoreIterator(); if (up) return new simpleScoreIterator();
else return scores(false, Integer.MIN_VALUE, Integer.MAX_VALUE); else return scores(false, Integer.MIN_VALUE, Integer.MAX_VALUE);
@ -288,8 +306,7 @@ public class kelondroMScoreCluster {
int score = (max + min) / 2; int score = (max + min) / 2;
while (keyrefDBcopy.size() > 0) { while (keyrefDBcopy.size() > 0) {
key = (Long) ((up) ? keyrefDBcopy.firstKey() : keyrefDBcopy.lastKey()); key = (Long) ((up) ? keyrefDBcopy.firstKey() : keyrefDBcopy.lastKey());
n = keyrefDBcopy.get(key); n = keyrefDBcopy.remove(key);
keyrefDBcopy.remove(key);
score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32); score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32);
if ((score >= min) && (score <= max)) return; if ((score >= min) && (score <= max)) return;
if (((up) && (score > max)) || ((!(up)) && (score < min))) { if (((up) && (score > max)) || ((!(up)) && (score < min))) {
@ -338,6 +355,10 @@ public class kelondroMScoreCluster {
} }
public static void main(String[] args) { public static void main(String[] args) {
if (args.length > 0) System.out.println("score of " + args[0] + ": " + string2score(args[0]));
//System.exit(0);
System.out.println("Test for Score: start"); System.out.println("Test for Score: start");
kelondroMScoreCluster s = new kelondroMScoreCluster(); kelondroMScoreCluster s = new kelondroMScoreCluster();
int c = 0; int c = 0;

@ -283,6 +283,7 @@ public class kelondroMap {
if (sortClusterMap == null) return null; if (sortClusterMap == null) return null;
kelondroMScoreCluster cluster = (kelondroMScoreCluster) sortClusterMap.get(field); kelondroMScoreCluster cluster = (kelondroMScoreCluster) sortClusterMap.get(field);
if (cluster == null) return null; // sort field does not exist if (cluster == null) return null; // sort field does not exist
//System.out.println("DEBUG: cluster for field " + field + ": " + cluster.toString());
return cluster.scores(up); return cluster.scores(up);
} }

@ -362,6 +362,9 @@ public class plasmaCrawlNURL extends plasmaURL {
this.flags = new bitfield(entry[10]); this.flags = new bitfield(entry[10]);
this.handle = Integer.parseInt(new String(entry[11])); this.handle = Integer.parseInt(new String(entry[11]));
return; return;
} else {
// show that we found nothing
this.url = null;
} }
} catch (Exception e) { } catch (Exception e) {
} }

@ -451,18 +451,24 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
initProfiles(); initProfiles();
} catch (IOException e) {} } catch (IOException e) {}
} }
public void cleanProfiles() { public boolean cleanProfiles() {
if ((sbQueue.size() > 0) || (cacheLoader.size() > 0) || (urlPool.noticeURL.stackSize() > 0)) return; if ((sbQueue.size() > 0) || (cacheLoader.size() > 0) || (urlPool.noticeURL.stackSize() > 0)) return false;
Iterator i = profiles.profiles(true); Iterator i = profiles.profiles(true);
plasmaCrawlProfile.entry entry; plasmaCrawlProfile.entry entry;
boolean hasDoneSomething = false;
try { try {
while (i.hasNext()) { while (i.hasNext()) {
entry = (plasmaCrawlProfile.entry) i.next(); entry = (plasmaCrawlProfile.entry) i.next();
if (!((entry.name().equals("proxy")) || (entry.name().equals("remote")))) i.remove(); if (!((entry.name().equals("proxy")) || (entry.name().equals("remote")))) {
i.remove();
hasDoneSomething = true;
}
} }
} catch (kelondroException e) { } catch (kelondroException e) {
resetProfiles(); resetProfiles();
hasDoneSomething = true;
} }
return hasDoneSomething;
} }
public plasmaHTCache getCacheManager() { public plasmaHTCache getCacheManager() {
@ -623,7 +629,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} }
} }
// clean up profiles // clean up profiles
cleanProfiles(); if (cleanProfiles()) hasDoneSomething = true;
// clean up news
try {
if (yacyCore.newsPool.automaticProcess() > 0) hasDoneSomething = true;
} catch (IOException e) {}
return hasDoneSomething; return hasDoneSomething;
} }
@ -696,7 +708,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// do a local crawl // do a local crawl
plasmaCrawlNURL.Entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE); plasmaCrawlNURL.Entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE);
String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]"; String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
if (urlEntry.url() == null) { if ((urlEntry.url() == null) || (urlEntry.url().toString().length() < 10)) {
log.logError(stats + ": urlEntry.url() == null"); log.logError(stats + ": urlEntry.url() == null");
return true; return true;
} }

@ -137,6 +137,43 @@ public class yacyNewsPool {
return switchQueue(dbKey).size(); return switchQueue(dbKey).size();
} }
public int automaticProcess() throws IOException {
// processes news in the incoming-db
// returns number of processes
yacyNewsRecord record;
int pc = 0;
synchronized (incomingNews) {
for (int i = incomingNews.size() - 1; i >= 0; i--) {
record = incomingNews.top(i);
if (automaticProcessP(record)) {
incomingNews.pop(i);
processedNews.push(record);
//newsDB.remove(id);
pc++;
}
}
}
return pc;
}
private boolean automaticProcessP(yacyNewsRecord record) {
if (record == null) return false;
if ((record.category().equals("wiki_upd")) &&
(yacyCore.universalTime() - record.created().getTime() > 1000 * 60 * 60 * 24 /* 1 Day */)) {
return true;
}
if ((record.category().equals("crwlstrt")) &&
(yacyCore.universalTime() - record.created().getTime() > 1000 * 60 * 60 /* 1 Hour */)) {
yacySeed seed = yacyCore.seedDB.get(record.originator());
try {
return (Integer.parseInt(seed.get("ISpeed", "-")) < 10);
} catch (NumberFormatException ee) {
return true;
}
}
return false;
}
public yacyNewsRecord get(int dbKey, int element) throws IOException { public yacyNewsRecord get(int dbKey, int element) throws IOException {
yacyNewsQueue queue = switchQueue(dbKey); yacyNewsQueue queue = switchQueue(dbKey);
yacyNewsRecord record; yacyNewsRecord record;

@ -680,8 +680,10 @@ public class yacySeedDB {
e.printStackTrace(); e.printStackTrace();
if (database == seedActiveDB) seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile); if (database == seedActiveDB) seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile);
if (database == seedPassiveDB) seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile); if (database == seedPassiveDB) seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile);
if (database == seedPotentialDB) seedPotentialDB = resetSeedTable(seedPotentialDB, seedPotentialDBFile);
it = null; it = null;
} } }
}
public boolean hasMoreElements() { public boolean hasMoreElements() {
return (nextSeed != null); return (nextSeed != null);

Loading…
Cancel
Save