From 5683162bd3531a672b8311db8318e0993b9cfaa5 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 24 Sep 2012 12:01:09 +0200 Subject: [PATCH] simplifications in DHT Distribution class and more documentation --- source/net/yacy/cora/document/ASCII.java | 2 + .../services/federated/yacy/Distribution.java | 166 +++++++++--------- source/net/yacy/peers/DHTSelection.java | 4 +- source/net/yacy/peers/Dispatcher.java | 1 + .../net/yacy/peers/graphics/NetworkGraph.java | 7 +- .../net/yacy/server/http/TemplateEngine.java | 16 +- 6 files changed, 96 insertions(+), 100 deletions(-) diff --git a/source/net/yacy/cora/document/ASCII.java b/source/net/yacy/cora/document/ASCII.java index 1d7998225..570ef829f 100644 --- a/source/net/yacy/cora/document/ASCII.java +++ b/source/net/yacy/cora/document/ASCII.java @@ -124,6 +124,8 @@ public class ASCII implements Comparator { } public final static byte[] getBytes(final String s) { + assert s != null; + assert s.length() < 3 || s.charAt(2) != '@'; int count = s.length(); final byte[] b = new byte[count]; for (int i = 0; i < count; i++) { diff --git a/source/net/yacy/cora/services/federated/yacy/Distribution.java b/source/net/yacy/cora/services/federated/yacy/Distribution.java index 033068226..3e413c38e 100644 --- a/source/net/yacy/cora/services/federated/yacy/Distribution.java +++ b/source/net/yacy/cora/services/federated/yacy/Distribution.java @@ -30,11 +30,7 @@ import net.yacy.cora.order.Base64Order; * vertical: scale with number of references for every word * The vertical scaling is selected using the corresponding reference hash, the url hash * This has the effect that every vertical position accumulates references for the same url - * and the urls are not spread over all positions of the DHT. To use this effect, the - * horizontal DHT position must be normed to a 'rest' value of a partition size. - * @param wordHash, the hash of the RWI - * @param urlHash, the hash of a reference - * @return a double in the range 0 .. 1.0 (including 0, excluding 1.0), the DHT position + * and the urls are not spread over all positions of the DHT. */ public class Distribution { @@ -50,36 +46,70 @@ public class Distribution { */ public Distribution(int verticalPartitionExponent) { assert verticalPartitionExponent > 0; + + // the partition exponent is the number of bits that we use for the partition this.verticalPartitionExponent = verticalPartitionExponent; - this.partitionCount = 1 << verticalPartitionExponent; + + // number of partitions that is possible for the given number of partition exponent bits + this.partitionCount = 1 << this.verticalPartitionExponent; + + // we use Long.SIZE - 1 as bitlength since we use only the 63 bits of 0..Long.MAX_VALUE this.shiftLength = Long.SIZE - 1 - this.verticalPartitionExponent; + + // the partition size is the cardinal number of possible hash positions for each segment of the DHT this.partitionSize = 1L << this.shiftLength; - this.partitionMask = (1L << shiftLength) - 1L; + + // the partition mask is a bitmask for each partition + this.partitionMask = this.partitionSize - 1L; } public int verticalPartitions() { - return 1 << verticalPartitionExponent; + return this.partitionCount; } + /** + * the horizontal DHT position uses simply the ordering on hashes, the base 64 order to assign a cardinal + * in the range of 0..Long.MAX_VALUE to the word. + * @param wordHash + * @return + */ public final static long horizontalDHTPosition(byte[] wordHash) { assert wordHash != null; + assert wordHash[2] != '@'; return Base64Order.enhancedCoder.cardinal(wordHash); } + /** + * the horizontal DHT distance is the cardinal number between the cardinal position of the hashes of two objects in the DHT + * Since the DHT is closed at the end, a cardinal at the high-end of 0..Long.MAX_VALUE can be very close to a low cardinal number. + * @param from the start DHT position as word hash + * @param to the end DHT position as word hash + * @return the distance of two positions. The maximal distance is Long.MAX_VALUE / 2 + */ public final static long horizontalDHTDistance(final byte[] from, final byte[] to) { // the dht distance is a positive value between 0 and 1 // if the distance is small, the word more probably belongs to the peer - assert to != null; - assert from != null; final long toPos = horizontalDHTPosition(to); final long fromPos = horizontalDHTPosition(from); return horizontalDHTDistance(fromPos, toPos); } + /** + * the horizontalDHTDistance computes the closed-at-the-end ordering of two cardinal DHT positions + * @param fromPos the start DHT position as cardinal of the word hash + * @param toPos the end DHT position as cardinal of the word hash + * @return the distance of two positions. The maximal distance is Long.MAX_VALUE / 2 + */ public final static long horizontalDHTDistance(final long fromPos, final long toPos) { return (toPos >= fromPos) ? toPos - fromPos : (Long.MAX_VALUE - fromPos) + toPos + 1; } + /** + * the reverse function to horizontalDHTPosition + * This is a bit fuzzy since the horizontalDHTPosition cannot represent all 72 bits of the word hash (Yes, its a HASH!) + * @param l the cardinal position in the DHT + * @return the abstract/computed word of the cardinal. + */ public final static byte[] positionToHash(final long l) { // transform the position of a peer position into a close peer hash byte[] h = Base64Order.enhancedCoder.uncardinal(l); @@ -87,107 +117,69 @@ public class Distribution { return h; } + /** + * the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e) + * compute the position using a specific fragment of the word hash and the url hash: + * - from the word hash take the 63 - lower bits + * - from the url hash take the higher bits + * in case that the partitionExpoent is 1, only one bit is taken from the urlHash, + * which means that the partition is in two parts. + * With partitionExponent = 2 it is divided in four parts and so on. + * @param wordHash + * @param urlHash + * @return + */ public final long verticalDHTPosition(final byte[] wordHash, final String urlHash) { // this creates 1^^e different positions for the same word hash (according to url hash) - assert wordHash != null; - assert urlHash != null; - if (urlHash == null || verticalPartitionExponent < 1) return Distribution.horizontalDHTPosition(wordHash); - // the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e) - // compute the position using a specific fragment of the word hash and the url hash: - // - from the word hash take the 63 - lower bits - // - from the url hash take the higher bits - // in case that the partitionExpoent is 1, only one bit is taken from the urlHash, - // which means that the partition is in two parts. - // With partitionExponent = 2 it is divided in four parts and so on. return (Distribution.horizontalDHTPosition(wordHash) & partitionMask) | (Distribution.horizontalDHTPosition(ASCII.getBytes(urlHash)) & ~partitionMask); } + /** + * compute a vertical DHT position for a given word + * This is used when a word is searched and the peers holding the word must be computed + * @param wordHash, the hash of the word + * @param verticalPosition (0 <= verticalPosition < verticalPartitions()) + * @return a number that can represents a position and can be computed to a word hash again + */ public final long verticalDHTPosition(final byte[] wordHash, final int verticalPosition) { - assert wordHash != null; - assert wordHash[2] != '@'; + assert verticalPosition >= 0 && verticalPosition < verticalPartitions(); long verticalMask = ((long) verticalPosition) << this.shiftLength; // don't remove the cast! it will become an integer result which is wrong. return (Distribution.horizontalDHTPosition(wordHash) & partitionMask) | verticalMask; } - public final int verticalDHTPosition(final byte[] urlHash) { - assert urlHash != null; - return (int) (Distribution.horizontalDHTPosition(urlHash) >> this.shiftLength); // take only the top- bits - } - /** - * compute all vertical DHT positions for a given word - * This is used when a word is searched and the peers holding the word must be computed - * @param wordHash, the hash of the word - * @param partitions, the number of partitions of the DHT - * @return a vector of long values, the possible DHT positions + * compute the vertical position of a url hash. Thats the same value as second parameter in verticalDHTPosition/2 + * @param urlHash + * @return a number from 0..verticalPartitions() */ - public final long[] verticalDHTPositions(final byte[] wordHash) { - assert wordHash != null; - long[] l = new long[this.partitionCount]; - l[0] = Distribution.horizontalDHTPosition(wordHash) & (partitionSize - 1L); // this is the lowest possible position - for (int i = 1; i < this.partitionCount; i++) { - l[i] = l[i - 1] + partitionSize; // no overflow, because we started with the lowest - } - return l; - } - - /* - public static void main(String[] args) { - long c1 = Base64Order.enhancedCoder.cardinal("AAAAAAAAAAAA".getBytes()); - System.out.println(ASCII.String(Base64Order.enhancedCoder.uncardinal(c1))); - long c2 = Base64Order.enhancedCoder.cardinal("____________".getBytes()); - System.out.println(ASCII.String(Base64Order.enhancedCoder.uncardinal(c2))); - Random r = new Random(System.currentTimeMillis()); - for (int i = 0; i < 10000; i++) { - long l = r.nextLong(); - byte[] h = positionToHash(l); - if (l != Base64Order.enhancedCoder.cardinal(h)) System.out.println(l); - } + public final int verticalDHTPosition(final byte[] urlHash) { + return (int) (Distribution.horizontalDHTPosition(urlHash) >> this.shiftLength); // take only the top- bits } - */ public static void main(String[] args) { // java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFn76 // java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFG76 M8hgtrHG6g12 3 // test the DHT position calculation - String wordHash = "hHJBztzcFn76"; - //double dhtd; + byte[] wordHash = UTF8.getBytes("hHJBztzcFn76"); long dhtl; - int partitionExponent = 0; - Distribution partition = new Distribution(0); - if (args.length == 3) { - // the horizontal and vertical position calculation - String urlHash = args[1]; - partitionExponent = Integer.parseInt(args[2]); - dhtl = partition.verticalDHTPosition(UTF8.getBytes(wordHash), urlHash); - } else { - // only a horizontal position calculation - dhtl = Distribution.horizontalDHTPosition(UTF8.getBytes(wordHash)); - } - //System.out.println("DHT Double = " + dhtd); + int partitionExponent = 4; + Distribution partition = new Distribution(partitionExponent); + dhtl = Distribution.horizontalDHTPosition(wordHash); System.out.println("DHT Long = " + dhtl); - System.out.println("DHT as Double from Long = " + ((double) dhtl) / ((double) Long.MAX_VALUE)); - //System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd)); - //System.out.println("DHT as b64 from Double = " + positionToHash(dhtd)); System.out.println("DHT as b64 from Long = " + ASCII.String(Distribution.positionToHash(dhtl))); - System.out.print("all " + (1 << partitionExponent) + " DHT positions from doubles: "); - /* - - double[] d = dhtPositionsDouble(wordHash, partitionExponent); - for (int i = 0; i < d.length; i++) { - if (i > 0) System.out.print(", "); - System.out.print(positionToHash(d[i])); - } - System.out.println(); - */ - System.out.print("all " + (1 << partitionExponent) + " DHT positions from long : "); - long[] l = partition.verticalDHTPositions(UTF8.getBytes(wordHash)); - for (int i = 0; i < l.length; i++) { - if (i > 0) System.out.print(", "); - System.out.print(ASCII.String(Distribution.positionToHash(l[i]))); + System.out.println("all " + partition.verticalPartitions() + " DHT positions from long : "); + for (int i = 0; i < partition.verticalPartitions(); i++) { + long l = partition.verticalDHTPosition(wordHash, i); + System.out.println(ASCII.String(Distribution.positionToHash(l))); } System.out.println(); + + + long c1 = Base64Order.enhancedCoder.cardinal("AAAAAAAAAAAA".getBytes()); + System.out.println(ASCII.String(Base64Order.enhancedCoder.uncardinal(c1))); + long c2 = Base64Order.enhancedCoder.cardinal("____________".getBytes()); + System.out.println(ASCII.String(Base64Order.enhancedCoder.uncardinal(c2))); } } diff --git a/source/net/yacy/peers/DHTSelection.java b/source/net/yacy/peers/DHTSelection.java index 4eacd3930..57e90df13 100644 --- a/source/net/yacy/peers/DHTSelection.java +++ b/source/net/yacy/peers/DHTSelection.java @@ -197,9 +197,9 @@ public class DHTSelection { int redundancy, Map regularSeeds) { // this method is called from the search target computation - final long[] dhtVerticalTargets = seedDB.scheme.verticalDHTPositions(wordhash); Seed seed; - for (long dhtVerticalTarget : dhtVerticalTargets) { + for (int verticalPosition = 0; verticalPosition < seedDB.scheme.verticalPartitions(); verticalPosition++) { + long dhtVerticalTarget = seedDB.scheme.verticalDHTPosition(wordhash, verticalPosition); wordhash = Distribution.positionToHash(dhtVerticalTarget); Iterator dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, false); int c = Math.min(seedDB.sizeConnected(), redundancy); diff --git a/source/net/yacy/peers/Dispatcher.java b/source/net/yacy/peers/Dispatcher.java index d4d052624..9406ca6fb 100644 --- a/source/net/yacy/peers/Dispatcher.java +++ b/source/net/yacy/peers/Dispatcher.java @@ -273,6 +273,7 @@ public class Dispatcher { * then no additional IO is necessary. */ private void enqueueContainersToCloud(final List>[] containers) { + assert (containers.length == this.seeds.scheme.verticalPartitions()); if (this.transmissionCloud == null) return; ReferenceContainer lastContainer; byte[] primaryTarget; diff --git a/source/net/yacy/peers/graphics/NetworkGraph.java b/source/net/yacy/peers/graphics/NetworkGraph.java index 917bd4797..dc09dfb09 100644 --- a/source/net/yacy/peers/graphics/NetworkGraph.java +++ b/source/net/yacy/peers/graphics/NetworkGraph.java @@ -161,9 +161,10 @@ public class NetworkGraph { final Iterator i = query.query_include_hashes.iterator(); eventPicture.setColor(RasterPlotter.GREY); while (i.hasNext()) { - final long[] positions = seedDB.scheme.verticalDHTPositions(i.next()); - for (final long position : positions) { - angle = cyc + (360.0d * ((position) / DOUBLE_LONG_MAX_VALUE)); + byte[] wordHash = i.next(); + for (int verticalPosition = 0; verticalPosition < seedDB.scheme.verticalPartitions(); verticalPosition++) { + long position = seedDB.scheme.verticalDHTPosition(wordHash, verticalPosition); + angle = cyc + (360.0d * (position / DOUBLE_LONG_MAX_VALUE)); eventPicture.arcLine(cx, cy, cr - 20, cr, angle, true, null, null, -1, -1, -1, false); } } diff --git a/source/net/yacy/server/http/TemplateEngine.java b/source/net/yacy/server/http/TemplateEngine.java index b8aee294d..29c1e3370 100644 --- a/source/net/yacy/server/http/TemplateEngine.java +++ b/source/net/yacy/server/http/TemplateEngine.java @@ -147,14 +147,14 @@ public final class TemplateEngine { private final static byte[] ul = "_".getBytes(); - private final static byte[] alternative_which = ASCII.getBytes(" type=\"alternative\" which=\""); - private final static byte[] multi_num = ASCII.getBytes(" type=\"multi\" num=\""); - private final static byte[] open_endtag = ASCII.getBytes("\n"); - private final static byte[] close_tagn = ASCII.getBytes(">\n"); - private final static byte[] PP = ASCII.getBytes("%%"); - private final static byte[] hash_brackopen_slash = ASCII.getBytes("#(/"); - private final static byte[] brackclose_hash = ASCII.getBytes(")#"); + private final static byte[] alternative_which = " type=\"alternative\" which=\"".getBytes(); + private final static byte[] multi_num = " type=\"multi\" num=\"".getBytes(); + private final static byte[] open_endtag = "\n".getBytes(); + private final static byte[] close_tagn = ">\n".getBytes(); + private final static byte[] PP = "%%".getBytes(); + private final static byte[] hash_brackopen_slash = "#(/".getBytes(); + private final static byte[] brackclose_hash = ")#".getBytes(); /**