From 5716f8521dd1b947ca95aee9a1fdfdf4ab4ae5d3 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 12 Aug 2005 14:06:47 +0000 Subject: [PATCH] bug fixes for word ordering and dht index selection git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@521 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Network.java | 6 ++- source/de/anomic/kelondro/kelondroTree.java | 47 ++++++++++++++++- .../de/anomic/plasma/plasmaSwitchboard.java | 4 +- source/de/anomic/plasma/plasmaWordIndex.java | 50 ++++++++++++++++++- .../anomic/plasma/plasmaWordIndexCache.java | 3 +- .../plasma/plasmaWordIndexDistribution.java | 4 +- source/de/anomic/yacy/yacyClient.java | 2 +- source/de/anomic/yacy/yacyNewsAction.java | 2 +- source/de/anomic/yacy/yacyNewsPool.java | 1 + source/de/anomic/yacy/yacySearch.java | 18 +++++-- source/de/anomic/yacy/yacySeed.java | 1 + 11 files changed, 123 insertions(+), 15 deletions(-) diff --git a/htroot/Network.java b/htroot/Network.java index cd036ff72..2d8f29365 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -193,7 +193,7 @@ public class Network { prop.put("table_comment",0); } - }else { + } else { // generate table int page = Integer.parseInt(post.get("page", "1")); int conCount = 0; @@ -226,7 +226,9 @@ public class Network { try { for (int c = availableNews - 1; c >= 0; c--) { record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c); - if (record.category().equals("prfleupd")) { + if (record == null) { + break; + } else if (record.category().equals("prfleupd")) { updatedProfile.add(record.originator()); } else if (record.category().equals("wiki_upd")) { updatedWiki.put(record.originator(), record.attributes().get("page")); diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 18950320a..ac7bd6fea 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -241,6 +241,7 @@ public class kelondroTree extends kelondroRecords implements Comparator { } } // we reached a node where we must insert the new value + // the parent of this new value can be obtained by getParent() // all values are set, just return } @@ -737,7 +738,10 @@ public class kelondroTree extends kelondroRecords implements Comparator { if (nn == null) { return (new HashSet()).iterator(); // an empty iterator } else { - return new nodeIterator(up, rotating, nn); + // the node nn may be greater or smaller than the firstKey + // depending on the ordering direction, + // we must find the next smaller or greater node + return new correctedNodeIterator(up, rotating, nn, firstKey); } } } catch (IOException e) { @@ -745,6 +749,47 @@ public class kelondroTree extends kelondroRecords implements Comparator { } } + private class correctedNodeIterator implements Iterator { + + Iterator ii; + Node nextNode; + + public correctedNodeIterator(boolean up, boolean rotating, Node start, byte[] firstKey) throws IOException { + ii = new nodeIterator(up, rotating, start); + nextNode = (ii.hasNext()) ? (Node) ii.next() : null; + if (nextNode != null) { + int c = compare(firstKey, nextNode.getKey()); + if ((c > 0) && (up)) { + // firstKey > nextNode.getKey() + System.out.println("CORRECTING ITERATOR: firstKey=" + new String(firstKey) + ", nextNode=" + new String(nextNode.getKey())); + nextNode = (ii.hasNext()) ? (Node) ii.next() : null; + } + if ((c < 0) && (!(up))) { + nextNode = (ii.hasNext()) ? (Node) ii.next() : null; + } + } + } + + public void finalize() { + ii = null; + nextNode = null; + } + + public boolean hasNext() { + return nextNode != null; + } + + public Object next() { + Node r = nextNode; + nextNode = (ii.hasNext()) ? (Node) ii.next() : null; + return r; + } + + public void remove() { + throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported"); + } + } + private class nodeIterator implements Iterator { // we implement an iteration! (not a recursive function as the structure would suggest...) // the iterator iterates Node objects diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 887562bf3..881c91766 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1298,8 +1298,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // do global fetching int globalresults = 0; if (global) { - int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search - int fetchpeers = ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds + int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search + int fetchpeers = 10 + ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds long fetchtime = time * 7 / 10; // time to waste if (fetchcount > count) fetchcount = count; globalresults = yacySearch.searchHashes(queryhashes, urlPool.loadedURL, searchManager, fetchcount, fetchpeers, urlBlacklist, snippetCache, fetchtime); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index bfcbb2df0..1c433355b 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -116,10 +116,58 @@ public final class plasmaWordIndex { } public Iterator wordHashes(String startHash, boolean up, boolean rot) { - return ramCache.wordHashes(startHash, up); + //return ramCache.wordHashes(startHash, up); + return new correctedWordIterator(up, rot, startHash); // use correction until bug is found } + private class correctedWordIterator implements Iterator { + + Iterator ii; + String nextWord; + + public correctedWordIterator(boolean up, boolean rotating, String firstWord) { + ii = ramCache.wordHashes(firstWord, up); + nextWord = (ii.hasNext()) ? (String) ii.next() : null; + boolean corrected = true; + int cc = 0; // to avoid rotation loops + while ((nextWord != null) && (corrected) && (cc < 50)) { + int c = firstWord.compareTo(nextWord); + corrected = false; + if ((c > 0) && (up)) { + // firstKey > nextNode.getKey() + //System.out.println("CORRECTING WORD ITERATOR: firstWord=" + firstWord + ", nextWord=" + nextWord); + nextWord = (ii.hasNext()) ? (String) ii.next() : null; + corrected = true; + cc++; + } + if ((c < 0) && (!(up))) { + nextWord = (ii.hasNext()) ? (String) ii.next() : null; + corrected = true; + cc++; + } + } + } + + public void finalize() { + ii = null; + nextWord = null; + } + + public boolean hasNext() { + return nextWord != null; + } + public Object next() { + String r = nextWord; + nextWord = (ii.hasNext()) ? (String) ii.next() : null; + return r; + } + + public void remove() { + throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported"); + } + } + public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) { return new iterateFiles(startHash, up, deleteEmpty); } diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index a846b4c7f..1e763a2cf 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -47,6 +47,7 @@ import java.io.IOException; import java.util.Iterator; import java.util.Map; import java.util.TreeMap; +import java.util.TreeSet; import java.util.Enumeration; import de.anomic.kelondro.kelondroException; @@ -329,7 +330,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { if (!(up)) throw new RuntimeException("plasmaWordIndexCache.wordHashes can only count up"); return new kelondroMergeIterator( new kelondroMergeIterator( - cache.keySet().iterator(), + cache.tailMap(startWordHash).keySet().iterator(), assortmentCluster.hashConjunction(startWordHash, true), true), backend.wordHashes(startWordHash, true), diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java index 7ca979e77..90188b37f 100644 --- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java +++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java @@ -131,8 +131,8 @@ public class plasmaWordIndexDistribution { if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1; // collect index - //String startPointHash = yacyCore.seedCache.mySeed.hash; - String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength); + String startPointHash = yacyCore.seedDB.mySeed.hash; + //String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength); plasmaWordIndexEntity[] indexEntities = selectTransferIndexes(startPointHash, indexCount); if ((indexEntities == null) || (indexEntities.length == 0)) { log.logDebug("No index available for index transfer, hash start-point " + startPointHash); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index fa3d3ba3c..6fc172c90 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -380,7 +380,7 @@ public class yacyClient { } catch (NumberFormatException e) { searchtime = totalrequesttime; } - yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references")); + yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ", score " + targetPeer.selectscore + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references")); return results; } catch (Exception e) { yacyCore.log.logError("yacyClient.search error: '" + targetPeer.get("Name", "anonymous") + "' failed - " + e); diff --git a/source/de/anomic/yacy/yacyNewsAction.java b/source/de/anomic/yacy/yacyNewsAction.java index 1f2cd690c..a2b7418fe 100644 --- a/source/de/anomic/yacy/yacyNewsAction.java +++ b/source/de/anomic/yacy/yacyNewsAction.java @@ -58,7 +58,7 @@ public class yacyNewsAction implements yacyPeerAction { if ((recordString == null) || (recordString.length() == 0)) return; String decodedString = de.anomic.tools.crypt.simpleDecode(recordString, ""); yacyNewsRecord record = new yacyNewsRecord(decodedString); - System.out.println("### news arrival from peer " + peer.getName() + ", decoded=" + decodedString + ", record=" + recordString + ", news=" + record.toString()); + //System.out.println("### news arrival from peer " + peer.getName() + ", decoded=" + decodedString + ", record=" + recordString + ", news=" + record.toString()); String cre1 = (String) serverCodings.string2map(decodedString).get("cre"); String cre2 = (String) serverCodings.string2map(record.toString()).get("cre"); if ((cre1 == null) || (cre2 == null) || (!(cre1.equals(cre2)))) { diff --git a/source/de/anomic/yacy/yacyNewsPool.java b/source/de/anomic/yacy/yacyNewsPool.java index 43d8de62a..44b1e4035 100644 --- a/source/de/anomic/yacy/yacyNewsPool.java +++ b/source/de/anomic/yacy/yacyNewsPool.java @@ -169,6 +169,7 @@ public class yacyNewsPool { if ((record.category().equals("crwlstrt")) && ((yacyCore.universalTime() - record.created().getTime()) > (1000 * 60 * 60 * 24) /* 1 Day */)) { yacySeed seed = yacyCore.seedDB.get(record.originator()); + if (seed == null) return false; try { return (Integer.parseInt(seed.get("ISpeed", "-")) < 10); } catch (NumberFormatException ee) { diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index 4fc9e9770..a7a06db0a 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -43,6 +43,7 @@ package de.anomic.yacy; import java.util.Enumeration; import java.util.Iterator; import java.util.Set; +import java.util.HashMap; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.plasma.plasmaCrawlLURL; @@ -106,23 +107,32 @@ public class yacySearch extends Thread { if (seedcount > yacyCore.seedDB.sizeConnected()) seedcount = yacyCore.seedDB.sizeConnected(); kelondroMScoreCluster ranking = new kelondroMScoreCluster(); + HashMap seeds = new HashMap(); yacySeed seed; Enumeration dhtEnum; Iterator i = wordhashes.iterator(); int c; + String wordhash; while (i.hasNext()) { - dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, (String) i.next()); + wordhash = (String) i.next(); + dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, wordhash); c = 0; while ((dhtEnum.hasMoreElements()) && (c < seedcount)) { seed = (yacySeed) dhtEnum.nextElement(); + //System.out.println("Selected peer " + seed.hash + " for wordhash " + wordhash + ", score " + c); ranking.addScore(seed.hash, c++); + seeds.put(seed.hash, seed); } } if (ranking.size() < seedcount) seedcount = ranking.size(); yacySeed[] result = new yacySeed[seedcount]; Iterator e = ranking.scores(true); // lower are better c = 0; - while ((e.hasNext()) && (c < result.length)) result[c++] = yacyCore.seedDB.getConnected((String) e.next()); + while ((e.hasNext()) && (c < result.length)) { + seed = (yacySeed) seeds.get((String) e.next()); + seed.selectscore = c; + result[c++] = seed; + } //System.out.println("DEBUG yacySearch.selectPeers = " + seedcount + " seeds:"); for (int i = 0; i < seedcount; i++) System.out.println(" #" + i + ":" + result[i]); // debug return result; @@ -162,8 +172,6 @@ public class yacySearch extends Thread { // wait until wanted delay passed or wanted result appeared boolean anyIdle = true; while ((anyIdle) && ((System.currentTimeMillis() - start) < waitingtime)) { - // wait.. - try {Thread.currentThread().sleep(200);} catch (InterruptedException e) {} // check if all threads have been finished or results so far are enough c = 0; anyIdle = false; @@ -175,6 +183,8 @@ public class yacySearch extends Thread { break; // we have enough } if (c >= count * 5) break; + // wait a little time .. + try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {} } // collect results diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index 799ae1b40..9311e29b4 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -89,6 +89,7 @@ public class yacySeed { public String hash; private Map dna; public int available; + public int selectscore = -1; // only for debugging public yacySeed(String hash, Map dna) { // create a seed with a pre-defined hash map