bugfixes and automatic news-cleanup

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@481 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 228b04b499
commit 40da910f41

@ -344,7 +344,7 @@ public final class httpc {
this.socket = null;
this.socketOwner = null;
throw new IOException("unknown host: " + server);
}
}
}
void reset() {

@ -48,8 +48,8 @@ import java.util.TreeMap;
public class kelondroMScoreCluster {
private TreeMap refkeyDB;
private TreeMap keyrefDB;
private TreeMap refkeyDB; // a mapping from a reference to the cluster key
private TreeMap keyrefDB; // a mapping from the cluster key to the reference
private long gcount;
private int encnt;
@ -125,78 +125,92 @@ public class kelondroMScoreCluster {
addScore(obj, 1);
}
public synchronized void addScore(Object obj, int count) {
public synchronized void setScore(Object obj, int newScore) {
if (obj == null) return;
Long cs = (Long) refkeyDB.get(obj);
long c;
int ec = count;
int en;
if (cs == null) {
// new entry
en = encnt++;
//System.out.println("setScore " + obj.getClass().getName());
Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) {
// set new value
usk = new Long(scoreKey(encnt++, newScore));
// put new value into cluster
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
} else {
// delete old entry
keyrefDB.remove(cs);
c = cs.longValue();
ec += (int) ((c & 0xFFFFFFFF00000000L) >> 32);
//System.out.println("Debug:" + ec);
en = (int) (c & 0xFFFFFFFFL);
keyrefDB.remove(usk);
// get previous handle and score
long c = usk.longValue();
int oldScore = (int) ((c & 0xFFFFFFFF00000000L) >> 32);
int oldHandle = (int) (c & 0xFFFFFFFFL);
gcount -= oldScore;
// set new value
usk = new Long(scoreKey(oldHandle, newScore)); // generates an unique key for a specific score
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
}
// set new value
c = scoreKey(en, ec);
cs = new Long(c);
Object oldcs = refkeyDB.remove(obj); if (oldcs != null) keyrefDB.remove(oldcs); // avoid memory leak
refkeyDB.put(obj, cs);
keyrefDB.put(cs, obj);
// increase overall counter
gcount += count;
gcount += newScore;
}
public synchronized void setScore(Object obj, int count) {
public synchronized void addScore(Object obj, int incrementScore) {
if (obj == null) return;
//System.out.println("setScore " + obj.getClass().getName());
Long cs = (Long) refkeyDB.get(obj);
long c;
int ec = count;
int en;
if (cs == null) {
// new entry
en = encnt++;
Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) {
// set new value
usk = new Long(scoreKey(encnt++, incrementScore));
// put new value into cluster
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
} else {
// delete old entry
keyrefDB.remove(cs);
c = cs.longValue();
gcount -= (c & 0xFFFFFFFF00000000L) >> 32;
en = (int) (c & 0xFFFFFFFFL);
keyrefDB.remove(usk);
// get previous handle and score
long c = usk.longValue();
int oldScore = (int) ((c & 0xFFFFFFFF00000000L) >> 32);
int oldHandle = (int) (c & 0xFFFFFFFFL);
// set new value
usk = new Long(scoreKey(oldHandle, oldScore + incrementScore)); // generates an unique key for a specific score
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
}
// set new value
c = scoreKey(en, ec);
cs = new Long(c);
Object oldcs = refkeyDB.remove(obj); if (oldcs != null) keyrefDB.remove(oldcs); // avoid memory leak
refkeyDB.put(obj, cs);
keyrefDB.put(cs, obj);
// increase overall counter
gcount += count;
gcount += incrementScore;
}
public synchronized int deleteScore(Object obj) {
if (obj == null) return -1;
Long cs = (Long) refkeyDB.get(obj);
if (cs == null) {
return -1;
// deletes entry and returns previous score
if (obj == null) return 0;
//System.out.println("setScore " + obj.getClass().getName());
Long usk = (Long) refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) {
return 0;
} else {
// delete entry
keyrefDB.remove(cs);
refkeyDB.remove(obj);
// delete old entry
keyrefDB.remove(usk);
// get previous handle and score
int oldScore = (int) ((usk.longValue() & 0xFFFFFFFF00000000L) >> 32);
// decrease overall counter
long oldScore = (cs.longValue() & 0xFFFFFFFF00000000L) >> 32;
gcount -= oldScore;
return (int) oldScore;
}
return oldScore;
}
}
public synchronized boolean existsScore(Object obj) {
@ -255,6 +269,10 @@ public class kelondroMScoreCluster {
return s;
}
public String toString() {
return refkeyDB + " / " + keyrefDB;
}
public synchronized Iterator scores(boolean up) {
if (up) return new simpleScoreIterator();
else return scores(false, Integer.MIN_VALUE, Integer.MAX_VALUE);
@ -288,8 +306,7 @@ public class kelondroMScoreCluster {
int score = (max + min) / 2;
while (keyrefDBcopy.size() > 0) {
key = (Long) ((up) ? keyrefDBcopy.firstKey() : keyrefDBcopy.lastKey());
n = keyrefDBcopy.get(key);
keyrefDBcopy.remove(key);
n = keyrefDBcopy.remove(key);
score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32);
if ((score >= min) && (score <= max)) return;
if (((up) && (score > max)) || ((!(up)) && (score < min))) {
@ -338,6 +355,10 @@ public class kelondroMScoreCluster {
}
public static void main(String[] args) {
if (args.length > 0) System.out.println("score of " + args[0] + ": " + string2score(args[0]));
//System.exit(0);
System.out.println("Test for Score: start");
kelondroMScoreCluster s = new kelondroMScoreCluster();
int c = 0;

@ -283,6 +283,7 @@ public class kelondroMap {
if (sortClusterMap == null) return null;
kelondroMScoreCluster cluster = (kelondroMScoreCluster) sortClusterMap.get(field);
if (cluster == null) return null; // sort field does not exist
//System.out.println("DEBUG: cluster for field " + field + ": " + cluster.toString());
return cluster.scores(up);
}

@ -362,7 +362,10 @@ public class plasmaCrawlNURL extends plasmaURL {
this.flags = new bitfield(entry[10]);
this.handle = Integer.parseInt(new String(entry[11]));
return;
}
} else {
// show that we found nothing
this.url = null;
}
} catch (Exception e) {
}
}

@ -451,18 +451,24 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
initProfiles();
} catch (IOException e) {}
}
public void cleanProfiles() {
if ((sbQueue.size() > 0) || (cacheLoader.size() > 0) || (urlPool.noticeURL.stackSize() > 0)) return;
public boolean cleanProfiles() {
if ((sbQueue.size() > 0) || (cacheLoader.size() > 0) || (urlPool.noticeURL.stackSize() > 0)) return false;
Iterator i = profiles.profiles(true);
plasmaCrawlProfile.entry entry;
boolean hasDoneSomething = false;
try {
while (i.hasNext()) {
entry = (plasmaCrawlProfile.entry) i.next();
if (!((entry.name().equals("proxy")) || (entry.name().equals("remote")))) i.remove();
if (!((entry.name().equals("proxy")) || (entry.name().equals("remote")))) {
i.remove();
hasDoneSomething = true;
}
}
} catch (kelondroException e) {
resetProfiles();
hasDoneSomething = true;
}
return hasDoneSomething;
}
public plasmaHTCache getCacheManager() {
@ -623,7 +629,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
// clean up profiles
cleanProfiles();
if (cleanProfiles()) hasDoneSomething = true;
// clean up news
try {
if (yacyCore.newsPool.automaticProcess() > 0) hasDoneSomething = true;
} catch (IOException e) {}
return hasDoneSomething;
}
@ -696,7 +708,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// do a local crawl
plasmaCrawlNURL.Entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE);
String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
if (urlEntry.url() == null) {
if ((urlEntry.url() == null) || (urlEntry.url().toString().length() < 10)) {
log.logError(stats + ": urlEntry.url() == null");
return true;
}

@ -137,6 +137,43 @@ public class yacyNewsPool {
return switchQueue(dbKey).size();
}
public int automaticProcess() throws IOException {
// processes news in the incoming-db
// returns number of processes
yacyNewsRecord record;
int pc = 0;
synchronized (incomingNews) {
for (int i = incomingNews.size() - 1; i >= 0; i--) {
record = incomingNews.top(i);
if (automaticProcessP(record)) {
incomingNews.pop(i);
processedNews.push(record);
//newsDB.remove(id);
pc++;
}
}
}
return pc;
}
private boolean automaticProcessP(yacyNewsRecord record) {
if (record == null) return false;
if ((record.category().equals("wiki_upd")) &&
(yacyCore.universalTime() - record.created().getTime() > 1000 * 60 * 60 * 24 /* 1 Day */)) {
return true;
}
if ((record.category().equals("crwlstrt")) &&
(yacyCore.universalTime() - record.created().getTime() > 1000 * 60 * 60 /* 1 Hour */)) {
yacySeed seed = yacyCore.seedDB.get(record.originator());
try {
return (Integer.parseInt(seed.get("ISpeed", "-")) < 10);
} catch (NumberFormatException ee) {
return true;
}
}
return false;
}
public yacyNewsRecord get(int dbKey, int element) throws IOException {
yacyNewsQueue queue = switchQueue(dbKey);
yacyNewsRecord record;

@ -680,8 +680,10 @@ public class yacySeedDB {
e.printStackTrace();
if (database == seedActiveDB) seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile);
if (database == seedPassiveDB) seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile);
if (database == seedPotentialDB) seedPotentialDB = resetSeedTable(seedPotentialDB, seedPotentialDBFile);
it = null;
} }
}
}
public boolean hasMoreElements() {
return (nextSeed != null);

Loading…
Cancel
Save