|
|
|
@ -30,11 +30,7 @@ import net.yacy.cora.order.Base64Order;
|
|
|
|
|
* vertical: scale with number of references for every word
|
|
|
|
|
* The vertical scaling is selected using the corresponding reference hash, the url hash
|
|
|
|
|
* This has the effect that every vertical position accumulates references for the same url
|
|
|
|
|
* and the urls are not spread over all positions of the DHT. To use this effect, the
|
|
|
|
|
* horizontal DHT position must be normed to a 'rest' value of a partition size.
|
|
|
|
|
* @param wordHash, the hash of the RWI
|
|
|
|
|
* @param urlHash, the hash of a reference
|
|
|
|
|
* @return a double in the range 0 .. 1.0 (including 0, excluding 1.0), the DHT position
|
|
|
|
|
* and the urls are not spread over all positions of the DHT.
|
|
|
|
|
*/
|
|
|
|
|
public class Distribution {
|
|
|
|
|
|
|
|
|
@ -50,36 +46,70 @@ public class Distribution {
|
|
|
|
|
*/
|
|
|
|
|
public Distribution(int verticalPartitionExponent) {
|
|
|
|
|
assert verticalPartitionExponent > 0;
|
|
|
|
|
|
|
|
|
|
// the partition exponent is the number of bits that we use for the partition
|
|
|
|
|
this.verticalPartitionExponent = verticalPartitionExponent;
|
|
|
|
|
this.partitionCount = 1 << verticalPartitionExponent;
|
|
|
|
|
|
|
|
|
|
// number of partitions that is possible for the given number of partition exponent bits
|
|
|
|
|
this.partitionCount = 1 << this.verticalPartitionExponent;
|
|
|
|
|
|
|
|
|
|
// we use Long.SIZE - 1 as bitlength since we use only the 63 bits of 0..Long.MAX_VALUE
|
|
|
|
|
this.shiftLength = Long.SIZE - 1 - this.verticalPartitionExponent;
|
|
|
|
|
|
|
|
|
|
// the partition size is the cardinal number of possible hash positions for each segment of the DHT
|
|
|
|
|
this.partitionSize = 1L << this.shiftLength;
|
|
|
|
|
this.partitionMask = (1L << shiftLength) - 1L;
|
|
|
|
|
|
|
|
|
|
// the partition mask is a bitmask for each partition
|
|
|
|
|
this.partitionMask = this.partitionSize - 1L;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public int verticalPartitions() {
|
|
|
|
|
return 1 << verticalPartitionExponent;
|
|
|
|
|
return this.partitionCount;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* the horizontal DHT position uses simply the ordering on hashes, the base 64 order to assign a cardinal
|
|
|
|
|
* in the range of 0..Long.MAX_VALUE to the word.
|
|
|
|
|
* @param wordHash
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public final static long horizontalDHTPosition(byte[] wordHash) {
|
|
|
|
|
assert wordHash != null;
|
|
|
|
|
assert wordHash[2] != '@';
|
|
|
|
|
return Base64Order.enhancedCoder.cardinal(wordHash);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* the horizontal DHT distance is the cardinal number between the cardinal position of the hashes of two objects in the DHT
|
|
|
|
|
* Since the DHT is closed at the end, a cardinal at the high-end of 0..Long.MAX_VALUE can be very close to a low cardinal number.
|
|
|
|
|
* @param from the start DHT position as word hash
|
|
|
|
|
* @param to the end DHT position as word hash
|
|
|
|
|
* @return the distance of two positions. The maximal distance is Long.MAX_VALUE / 2
|
|
|
|
|
*/
|
|
|
|
|
public final static long horizontalDHTDistance(final byte[] from, final byte[] to) {
|
|
|
|
|
// the dht distance is a positive value between 0 and 1
|
|
|
|
|
// if the distance is small, the word more probably belongs to the peer
|
|
|
|
|
assert to != null;
|
|
|
|
|
assert from != null;
|
|
|
|
|
final long toPos = horizontalDHTPosition(to);
|
|
|
|
|
final long fromPos = horizontalDHTPosition(from);
|
|
|
|
|
return horizontalDHTDistance(fromPos, toPos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* the horizontalDHTDistance computes the closed-at-the-end ordering of two cardinal DHT positions
|
|
|
|
|
* @param fromPos the start DHT position as cardinal of the word hash
|
|
|
|
|
* @param toPos the end DHT position as cardinal of the word hash
|
|
|
|
|
* @return the distance of two positions. The maximal distance is Long.MAX_VALUE / 2
|
|
|
|
|
*/
|
|
|
|
|
public final static long horizontalDHTDistance(final long fromPos, final long toPos) {
|
|
|
|
|
return (toPos >= fromPos) ? toPos - fromPos : (Long.MAX_VALUE - fromPos) + toPos + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* the reverse function to horizontalDHTPosition
|
|
|
|
|
* This is a bit fuzzy since the horizontalDHTPosition cannot represent all 72 bits of the word hash (Yes, its a HASH!)
|
|
|
|
|
* @param l the cardinal position in the DHT
|
|
|
|
|
* @return the abstract/computed word of the cardinal.
|
|
|
|
|
*/
|
|
|
|
|
public final static byte[] positionToHash(final long l) {
|
|
|
|
|
// transform the position of a peer position into a close peer hash
|
|
|
|
|
byte[] h = Base64Order.enhancedCoder.uncardinal(l);
|
|
|
|
@ -87,107 +117,69 @@ public class Distribution {
|
|
|
|
|
return h;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e)
|
|
|
|
|
* compute the position using a specific fragment of the word hash and the url hash:
|
|
|
|
|
* - from the word hash take the 63 - <partitionExponent> lower bits
|
|
|
|
|
* - from the url hash take the <partitionExponent> higher bits
|
|
|
|
|
* in case that the partitionExpoent is 1, only one bit is taken from the urlHash,
|
|
|
|
|
* which means that the partition is in two parts.
|
|
|
|
|
* With partitionExponent = 2 it is divided in four parts and so on.
|
|
|
|
|
* @param wordHash
|
|
|
|
|
* @param urlHash
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public final long verticalDHTPosition(final byte[] wordHash, final String urlHash) {
|
|
|
|
|
// this creates 1^^e different positions for the same word hash (according to url hash)
|
|
|
|
|
assert wordHash != null;
|
|
|
|
|
assert urlHash != null;
|
|
|
|
|
if (urlHash == null || verticalPartitionExponent < 1) return Distribution.horizontalDHTPosition(wordHash);
|
|
|
|
|
// the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e)
|
|
|
|
|
// compute the position using a specific fragment of the word hash and the url hash:
|
|
|
|
|
// - from the word hash take the 63 - <partitionExponent> lower bits
|
|
|
|
|
// - from the url hash take the <partitionExponent> higher bits
|
|
|
|
|
// in case that the partitionExpoent is 1, only one bit is taken from the urlHash,
|
|
|
|
|
// which means that the partition is in two parts.
|
|
|
|
|
// With partitionExponent = 2 it is divided in four parts and so on.
|
|
|
|
|
return (Distribution.horizontalDHTPosition(wordHash) & partitionMask) | (Distribution.horizontalDHTPosition(ASCII.getBytes(urlHash)) & ~partitionMask);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* compute a vertical DHT position for a given word
|
|
|
|
|
* This is used when a word is searched and the peers holding the word must be computed
|
|
|
|
|
* @param wordHash, the hash of the word
|
|
|
|
|
* @param verticalPosition (0 <= verticalPosition < verticalPartitions())
|
|
|
|
|
* @return a number that can represents a position and can be computed to a word hash again
|
|
|
|
|
*/
|
|
|
|
|
public final long verticalDHTPosition(final byte[] wordHash, final int verticalPosition) {
|
|
|
|
|
assert wordHash != null;
|
|
|
|
|
assert wordHash[2] != '@';
|
|
|
|
|
assert verticalPosition >= 0 && verticalPosition < verticalPartitions();
|
|
|
|
|
long verticalMask = ((long) verticalPosition) << this.shiftLength; // don't remove the cast! it will become an integer result which is wrong.
|
|
|
|
|
return (Distribution.horizontalDHTPosition(wordHash) & partitionMask) | verticalMask;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public final int verticalDHTPosition(final byte[] urlHash) {
|
|
|
|
|
assert urlHash != null;
|
|
|
|
|
return (int) (Distribution.horizontalDHTPosition(urlHash) >> this.shiftLength); // take only the top-<partitionExponent> bits
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* compute all vertical DHT positions for a given word
|
|
|
|
|
* This is used when a word is searched and the peers holding the word must be computed
|
|
|
|
|
* @param wordHash, the hash of the word
|
|
|
|
|
* @param partitions, the number of partitions of the DHT
|
|
|
|
|
* @return a vector of long values, the possible DHT positions
|
|
|
|
|
* compute the vertical position of a url hash. Thats the same value as second parameter in verticalDHTPosition/2
|
|
|
|
|
* @param urlHash
|
|
|
|
|
* @return a number from 0..verticalPartitions()
|
|
|
|
|
*/
|
|
|
|
|
public final long[] verticalDHTPositions(final byte[] wordHash) {
|
|
|
|
|
assert wordHash != null;
|
|
|
|
|
long[] l = new long[this.partitionCount];
|
|
|
|
|
l[0] = Distribution.horizontalDHTPosition(wordHash) & (partitionSize - 1L); // this is the lowest possible position
|
|
|
|
|
for (int i = 1; i < this.partitionCount; i++) {
|
|
|
|
|
l[i] = l[i - 1] + partitionSize; // no overflow, because we started with the lowest
|
|
|
|
|
}
|
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
long c1 = Base64Order.enhancedCoder.cardinal("AAAAAAAAAAAA".getBytes());
|
|
|
|
|
System.out.println(ASCII.String(Base64Order.enhancedCoder.uncardinal(c1)));
|
|
|
|
|
long c2 = Base64Order.enhancedCoder.cardinal("____________".getBytes());
|
|
|
|
|
System.out.println(ASCII.String(Base64Order.enhancedCoder.uncardinal(c2)));
|
|
|
|
|
Random r = new Random(System.currentTimeMillis());
|
|
|
|
|
for (int i = 0; i < 10000; i++) {
|
|
|
|
|
long l = r.nextLong();
|
|
|
|
|
byte[] h = positionToHash(l);
|
|
|
|
|
if (l != Base64Order.enhancedCoder.cardinal(h)) System.out.println(l);
|
|
|
|
|
}
|
|
|
|
|
public final int verticalDHTPosition(final byte[] urlHash) {
|
|
|
|
|
return (int) (Distribution.horizontalDHTPosition(urlHash) >> this.shiftLength); // take only the top-<partitionExponent> bits
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
// java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFn76
|
|
|
|
|
// java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFG76 M8hgtrHG6g12 3
|
|
|
|
|
// test the DHT position calculation
|
|
|
|
|
String wordHash = "hHJBztzcFn76";
|
|
|
|
|
//double dhtd;
|
|
|
|
|
byte[] wordHash = UTF8.getBytes("hHJBztzcFn76");
|
|
|
|
|
long dhtl;
|
|
|
|
|
int partitionExponent = 0;
|
|
|
|
|
Distribution partition = new Distribution(0);
|
|
|
|
|
if (args.length == 3) {
|
|
|
|
|
// the horizontal and vertical position calculation
|
|
|
|
|
String urlHash = args[1];
|
|
|
|
|
partitionExponent = Integer.parseInt(args[2]);
|
|
|
|
|
dhtl = partition.verticalDHTPosition(UTF8.getBytes(wordHash), urlHash);
|
|
|
|
|
} else {
|
|
|
|
|
// only a horizontal position calculation
|
|
|
|
|
dhtl = Distribution.horizontalDHTPosition(UTF8.getBytes(wordHash));
|
|
|
|
|
}
|
|
|
|
|
//System.out.println("DHT Double = " + dhtd);
|
|
|
|
|
int partitionExponent = 4;
|
|
|
|
|
Distribution partition = new Distribution(partitionExponent);
|
|
|
|
|
dhtl = Distribution.horizontalDHTPosition(wordHash);
|
|
|
|
|
System.out.println("DHT Long = " + dhtl);
|
|
|
|
|
System.out.println("DHT as Double from Long = " + ((double) dhtl) / ((double) Long.MAX_VALUE));
|
|
|
|
|
//System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd));
|
|
|
|
|
//System.out.println("DHT as b64 from Double = " + positionToHash(dhtd));
|
|
|
|
|
System.out.println("DHT as b64 from Long = " + ASCII.String(Distribution.positionToHash(dhtl)));
|
|
|
|
|
|
|
|
|
|
System.out.print("all " + (1 << partitionExponent) + " DHT positions from doubles: ");
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
|
|
double[] d = dhtPositionsDouble(wordHash, partitionExponent);
|
|
|
|
|
for (int i = 0; i < d.length; i++) {
|
|
|
|
|
if (i > 0) System.out.print(", ");
|
|
|
|
|
System.out.print(positionToHash(d[i]));
|
|
|
|
|
}
|
|
|
|
|
System.out.println();
|
|
|
|
|
*/
|
|
|
|
|
System.out.print("all " + (1 << partitionExponent) + " DHT positions from long : ");
|
|
|
|
|
long[] l = partition.verticalDHTPositions(UTF8.getBytes(wordHash));
|
|
|
|
|
for (int i = 0; i < l.length; i++) {
|
|
|
|
|
if (i > 0) System.out.print(", ");
|
|
|
|
|
System.out.print(ASCII.String(Distribution.positionToHash(l[i])));
|
|
|
|
|
System.out.println("all " + partition.verticalPartitions() + " DHT positions from long : ");
|
|
|
|
|
for (int i = 0; i < partition.verticalPartitions(); i++) {
|
|
|
|
|
long l = partition.verticalDHTPosition(wordHash, i);
|
|
|
|
|
System.out.println(ASCII.String(Distribution.positionToHash(l)));
|
|
|
|
|
}
|
|
|
|
|
System.out.println();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
long c1 = Base64Order.enhancedCoder.cardinal("AAAAAAAAAAAA".getBytes());
|
|
|
|
|
System.out.println(ASCII.String(Base64Order.enhancedCoder.uncardinal(c1)));
|
|
|
|
|
long c2 = Base64Order.enhancedCoder.cardinal("____________".getBytes());
|
|
|
|
|
System.out.println(ASCII.String(Base64Order.enhancedCoder.uncardinal(c2)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|