bug fixes for word ordering and dht index selection

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@521 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 41cd5e930a
commit 5716f8521d

@ -193,7 +193,7 @@ public class Network {
prop.put("table_comment",0);
}
}else {
} else {
// generate table
int page = Integer.parseInt(post.get("page", "1"));
int conCount = 0;
@ -226,7 +226,9 @@ public class Network {
try {
for (int c = availableNews - 1; c >= 0; c--) {
record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c);
if (record.category().equals("prfleupd")) {
if (record == null) {
break;
} else if (record.category().equals("prfleupd")) {
updatedProfile.add(record.originator());
} else if (record.category().equals("wiki_upd")) {
updatedWiki.put(record.originator(), record.attributes().get("page"));

@ -241,6 +241,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
}
}
// we reached a node where we must insert the new value
// the parent of this new value can be obtained by getParent()
// all values are set, just return
}
@ -737,7 +738,10 @@ public class kelondroTree extends kelondroRecords implements Comparator {
if (nn == null) {
return (new HashSet()).iterator(); // an empty iterator
} else {
return new nodeIterator(up, rotating, nn);
// the node nn may be greater or smaller than the firstKey
// depending on the ordering direction,
// we must find the next smaller or greater node
return new correctedNodeIterator(up, rotating, nn, firstKey);
}
}
} catch (IOException e) {
@ -745,6 +749,47 @@ public class kelondroTree extends kelondroRecords implements Comparator {
}
}
private class correctedNodeIterator implements Iterator {
Iterator ii;
Node nextNode;
public correctedNodeIterator(boolean up, boolean rotating, Node start, byte[] firstKey) throws IOException {
ii = new nodeIterator(up, rotating, start);
nextNode = (ii.hasNext()) ? (Node) ii.next() : null;
if (nextNode != null) {
int c = compare(firstKey, nextNode.getKey());
if ((c > 0) && (up)) {
// firstKey > nextNode.getKey()
System.out.println("CORRECTING ITERATOR: firstKey=" + new String(firstKey) + ", nextNode=" + new String(nextNode.getKey()));
nextNode = (ii.hasNext()) ? (Node) ii.next() : null;
}
if ((c < 0) && (!(up))) {
nextNode = (ii.hasNext()) ? (Node) ii.next() : null;
}
}
}
public void finalize() {
ii = null;
nextNode = null;
}
public boolean hasNext() {
return nextNode != null;
}
public Object next() {
Node r = nextNode;
nextNode = (ii.hasNext()) ? (Node) ii.next() : null;
return r;
}
public void remove() {
throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported");
}
}
private class nodeIterator implements Iterator {
// we implement an iteration! (not a recursive function as the structure would suggest...)
// the iterator iterates Node objects

@ -1298,8 +1298,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// do global fetching
int globalresults = 0;
if (global) {
int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search
int fetchpeers = ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds
int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search
int fetchpeers = 10 + ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds
long fetchtime = time * 7 / 10; // time to waste
if (fetchcount > count) fetchcount = count;
globalresults = yacySearch.searchHashes(queryhashes, urlPool.loadedURL, searchManager, fetchcount, fetchpeers, urlBlacklist, snippetCache, fetchtime);

@ -116,10 +116,58 @@ public final class plasmaWordIndex {
}
public Iterator wordHashes(String startHash, boolean up, boolean rot) {
return ramCache.wordHashes(startHash, up);
//return ramCache.wordHashes(startHash, up);
return new correctedWordIterator(up, rot, startHash); // use correction until bug is found
}
private class correctedWordIterator implements Iterator {
Iterator ii;
String nextWord;
public correctedWordIterator(boolean up, boolean rotating, String firstWord) {
ii = ramCache.wordHashes(firstWord, up);
nextWord = (ii.hasNext()) ? (String) ii.next() : null;
boolean corrected = true;
int cc = 0; // to avoid rotation loops
while ((nextWord != null) && (corrected) && (cc < 50)) {
int c = firstWord.compareTo(nextWord);
corrected = false;
if ((c > 0) && (up)) {
// firstKey > nextNode.getKey()
//System.out.println("CORRECTING WORD ITERATOR: firstWord=" + firstWord + ", nextWord=" + nextWord);
nextWord = (ii.hasNext()) ? (String) ii.next() : null;
corrected = true;
cc++;
}
if ((c < 0) && (!(up))) {
nextWord = (ii.hasNext()) ? (String) ii.next() : null;
corrected = true;
cc++;
}
}
}
public void finalize() {
ii = null;
nextWord = null;
}
public boolean hasNext() {
return nextWord != null;
}
public Object next() {
String r = nextWord;
nextWord = (ii.hasNext()) ? (String) ii.next() : null;
return r;
}
public void remove() {
throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported");
}
}
public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) {
return new iterateFiles(startHash, up, deleteEmpty);
}

@ -47,6 +47,7 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Enumeration;
import de.anomic.kelondro.kelondroException;
@ -329,7 +330,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (!(up)) throw new RuntimeException("plasmaWordIndexCache.wordHashes can only count up");
return new kelondroMergeIterator(
new kelondroMergeIterator(
cache.keySet().iterator(),
cache.tailMap(startWordHash).keySet().iterator(),
assortmentCluster.hashConjunction(startWordHash, true),
true),
backend.wordHashes(startWordHash, true),

@ -131,8 +131,8 @@ public class plasmaWordIndexDistribution {
if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1;
// collect index
//String startPointHash = yacyCore.seedCache.mySeed.hash;
String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
String startPointHash = yacyCore.seedDB.mySeed.hash;
//String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
plasmaWordIndexEntity[] indexEntities = selectTransferIndexes(startPointHash, indexCount);
if ((indexEntities == null) || (indexEntities.length == 0)) {
log.logDebug("No index available for index transfer, hash start-point " + startPointHash);

@ -380,7 +380,7 @@ public class yacyClient {
} catch (NumberFormatException e) {
searchtime = totalrequesttime;
}
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ", score " + targetPeer.selectscore + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
return results;
} catch (Exception e) {
yacyCore.log.logError("yacyClient.search error: '" + targetPeer.get("Name", "anonymous") + "' failed - " + e);

@ -58,7 +58,7 @@ public class yacyNewsAction implements yacyPeerAction {
if ((recordString == null) || (recordString.length() == 0)) return;
String decodedString = de.anomic.tools.crypt.simpleDecode(recordString, "");
yacyNewsRecord record = new yacyNewsRecord(decodedString);
System.out.println("### news arrival from peer " + peer.getName() + ", decoded=" + decodedString + ", record=" + recordString + ", news=" + record.toString());
//System.out.println("### news arrival from peer " + peer.getName() + ", decoded=" + decodedString + ", record=" + recordString + ", news=" + record.toString());
String cre1 = (String) serverCodings.string2map(decodedString).get("cre");
String cre2 = (String) serverCodings.string2map(record.toString()).get("cre");
if ((cre1 == null) || (cre2 == null) || (!(cre1.equals(cre2)))) {

@ -169,6 +169,7 @@ public class yacyNewsPool {
if ((record.category().equals("crwlstrt")) &&
((yacyCore.universalTime() - record.created().getTime()) > (1000 * 60 * 60 * 24) /* 1 Day */)) {
yacySeed seed = yacyCore.seedDB.get(record.originator());
if (seed == null) return false;
try {
return (Integer.parseInt(seed.get("ISpeed", "-")) < 10);
} catch (NumberFormatException ee) {

@ -43,6 +43,7 @@ package de.anomic.yacy;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Set;
import java.util.HashMap;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL;
@ -106,23 +107,32 @@ public class yacySearch extends Thread {
if (seedcount > yacyCore.seedDB.sizeConnected()) seedcount = yacyCore.seedDB.sizeConnected();
kelondroMScoreCluster ranking = new kelondroMScoreCluster();
HashMap seeds = new HashMap();
yacySeed seed;
Enumeration dhtEnum;
Iterator i = wordhashes.iterator();
int c;
String wordhash;
while (i.hasNext()) {
dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, (String) i.next());
wordhash = (String) i.next();
dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, wordhash);
c = 0;
while ((dhtEnum.hasMoreElements()) && (c < seedcount)) {
seed = (yacySeed) dhtEnum.nextElement();
//System.out.println("Selected peer " + seed.hash + " for wordhash " + wordhash + ", score " + c);
ranking.addScore(seed.hash, c++);
seeds.put(seed.hash, seed);
}
}
if (ranking.size() < seedcount) seedcount = ranking.size();
yacySeed[] result = new yacySeed[seedcount];
Iterator e = ranking.scores(true); // lower are better
c = 0;
while ((e.hasNext()) && (c < result.length)) result[c++] = yacyCore.seedDB.getConnected((String) e.next());
while ((e.hasNext()) && (c < result.length)) {
seed = (yacySeed) seeds.get((String) e.next());
seed.selectscore = c;
result[c++] = seed;
}
//System.out.println("DEBUG yacySearch.selectPeers = " + seedcount + " seeds:"); for (int i = 0; i < seedcount; i++) System.out.println(" #" + i + ":" + result[i]); // debug
return result;
@ -162,8 +172,6 @@ public class yacySearch extends Thread {
// wait until wanted delay passed or wanted result appeared
boolean anyIdle = true;
while ((anyIdle) && ((System.currentTimeMillis() - start) < waitingtime)) {
// wait..
try {Thread.currentThread().sleep(200);} catch (InterruptedException e) {}
// check if all threads have been finished or results so far are enough
c = 0;
anyIdle = false;
@ -175,6 +183,8 @@ public class yacySearch extends Thread {
break; // we have enough
}
if (c >= count * 5) break;
// wait a little time ..
try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {}
}
// collect results

@ -89,6 +89,7 @@ public class yacySeed {
public String hash;
private Map dna;
public int available;
public int selectscore = -1; // only for debugging
public yacySeed(String hash, Map dna) {
// create a seed with a pre-defined hash map

Loading…
Cancel
Save