moved static method from ClusteredScoreMap to MapDataMining because it

was not used in the ClusteredScoreMap class but only in MapDataMining
pull/1/head
orbiter 12 years ago
parent 8e1248ffe3
commit 354f0d9acd

@ -24,23 +24,19 @@
package net.yacy.cora.sorting; package net.yacy.cora.sorting;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.storage.OutOfLimitsException; import net.yacy.cora.storage.OutOfLimitsException;
public final class ClusteredScoreMap<E> extends AbstractScoreMap<E> implements ReversibleScoreMap<E> { public final class ClusteredScoreMap<E> extends AbstractScoreMap<E> implements ReversibleScoreMap<E> {
protected final Map<E, Long> map; // a mapping from a reference to the cluster key private final Map<E, Long> map; // a mapping from a reference to the cluster key
protected final TreeMap<Long, E> pam; // a mapping from the cluster key to the reference private final TreeMap<Long, E> pam; // a mapping from the cluster key to the reference
private long gcount; private long gcount;
private int encnt; private int encnt;
@ -109,86 +105,11 @@ public final class ClusteredScoreMap<E> extends AbstractScoreMap<E> implements R
} }
} }
public static final String shortDateFormatString = "yyyyMMddHHmmss";
public static final SimpleDateFormat shortFormatter = new SimpleDateFormat(shortDateFormatString, Locale.US);
public static final long minutemillis = 60000;
public static long date2000 = 0;
static {
try {
date2000 = shortFormatter.parse("20000101000000").getTime();
} catch (final ParseException e) {}
}
public static int object2score(Object o) {
if (o instanceof Integer) return ((Integer) o).intValue();
if (o instanceof Long) {
final long l = ((Long) o).longValue();
if (l < Integer.MAX_VALUE) return (int) l;
return (int) (l & Integer.MAX_VALUE);
}
if (o instanceof Float) {
final double d = 1000f * ((Float) o).floatValue();
return (int) Math.round(d);
}
if (o instanceof Double) {
final double d = 1000d * ((Double) o).doubleValue();
return (int) Math.round(d);
}
String s = null;
if (o instanceof String) s = (String) o;
if (o instanceof byte[]) s = UTF8.String((byte[]) o);
// this can be used to calculate a score from a string
if (s == null || s.isEmpty() || s.charAt(0) == '-') return 0;
try {
long l = 0;
if (s.length() == shortDateFormatString.length()) {
// try a date
l = ((shortFormatter.parse(s).getTime() - date2000) / minutemillis);
if (l < 0) l = 0;
} else {
// try a number
l = Long.parseLong(s);
}
// fix out-of-ranges
if (l > Integer.MAX_VALUE) return (int) (l & Integer.MAX_VALUE);
if (l < 0) {
System.out.println("string2score: negative score for input " + s);
return 0;
}
return (int) l;
} catch (final Throwable e) {
// try it lex
int len = s.length();
if (len > 5) len = 5;
int c = 0;
for (int i = 0; i < len; i++) {
c <<= 6;
c += plainByteArray[(byte) s.charAt(i)];
}
for (int i = len; i < 5; i++) c <<= 6;
if (c < 0) {
System.out.println("string2score: negative score for input " + s);
return 0;
}
return c;
}
}
private static final byte[] plainByteArray = new byte[256];
static {
for (int i = 0; i < 32; i++) plainByteArray[i] = (byte) i;
for (int i = 32; i < 96; i++) plainByteArray[i] = (byte) (i - 32);
for (int i = 96; i < 128; i++) plainByteArray[i] = (byte) (i - 64);
for (int i = 128; i < 256; i++) plainByteArray[i] = (byte) (i & 0X20);
}
private long scoreKey(final int elementNr, final int elementCount) { private long scoreKey(final int elementNr, final int elementCount) {
return (((elementCount & 0xFFFFFFFFL)) << 32) | ((elementNr & 0xFFFFFFFFL)); return (((elementCount & 0xFFFFFFFFL)) << 32) | ((elementNr & 0xFFFFFFFFL));
} }
public synchronized long totalCount() { private synchronized long totalCount() {
return this.gcount; return this.gcount;
} }
@ -428,13 +349,6 @@ public final class ClusteredScoreMap<E> extends AbstractScoreMap<E> implements R
public static void main(final String[] args) { public static void main(final String[] args) {
final String t = "ZZZZZZZZZZ";
System.out.println("score of " + t + ": " + object2score(t));
if (args.length > 0) {
System.out.println("score of " + args[0] + ": " + object2score(args[0]));
System.exit(0);
}
System.out.println("Test for Score: start"); System.out.println("Test for Score: start");
final ClusteredScoreMap<String> s = new ClusteredScoreMap<String>(); final ClusteredScoreMap<String> s = new ClusteredScoreMap<String>();
long c = 0; long c = 0;

@ -29,9 +29,12 @@ package net.yacy.kelondro.blob;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
@ -39,7 +42,6 @@ import net.yacy.cora.document.UTF8;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.ByteOrder; import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.order.CloneableIterator; import net.yacy.cora.order.CloneableIterator;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ConcurrentScoreMap; import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
@ -127,7 +129,7 @@ public class MapDataMining extends MapHeap {
if (sortfields != null && cluster != null) { if (sortfields != null && cluster != null) {
for (int i = 0; i < sortfields.length; i++) { for (int i = 0; i < sortfields.length; i++) {
cell = map.get(sortfields[i]); cell = map.get(sortfields[i]);
if (cell != null) cluster[i].set(UTF8.String(mapnameb), ClusteredScoreMap.object2score(cell)); if (cell != null) cluster[i].set(UTF8.String(mapnameb), object2score(cell));
} }
} }
@ -272,7 +274,7 @@ public class MapDataMining extends MapHeap {
cell = map.get(sortfield); cell = map.get(sortfield);
if (cell != null) { if (cell != null) {
cluster = this.sortClusterMap.get(sortfield); cluster = this.sortClusterMap.get(sortfield);
cluster.set(key, ClusteredScoreMap.object2score(cell)); cluster.set(key, object2score(cell));
this.sortClusterMap.put(sortfield, cluster); this.sortClusterMap.put(sortfield, cluster);
} }
} }
@ -408,6 +410,81 @@ public class MapDataMining extends MapHeap {
super.close(); super.close();
} }
private static final String shortDateFormatString = "yyyyMMddHHmmss";
private static final SimpleDateFormat shortFormatter = new SimpleDateFormat(shortDateFormatString, Locale.US);
private static final long minutemillis = 60000;
private static long date2000 = 0;
static {
try {
date2000 = shortFormatter.parse("20000101000000").getTime();
} catch (final ParseException e) {}
}
private static final byte[] plainByteArray = new byte[256];
static {
for (int i = 0; i < 32; i++) plainByteArray[i] = (byte) i;
for (int i = 32; i < 96; i++) plainByteArray[i] = (byte) (i - 32);
for (int i = 96; i < 128; i++) plainByteArray[i] = (byte) (i - 64);
for (int i = 128; i < 256; i++) plainByteArray[i] = (byte) (i & 0X20);
}
private static int object2score(Object o) {
if (o instanceof Integer) return ((Integer) o).intValue();
if (o instanceof Long) {
final long l = ((Long) o).longValue();
if (l < Integer.MAX_VALUE) return (int) l;
return (int) (l & Integer.MAX_VALUE);
}
if (o instanceof Float) {
final double d = 1000f * ((Float) o).floatValue();
return (int) Math.round(d);
}
if (o instanceof Double) {
final double d = 1000d * ((Double) o).doubleValue();
return (int) Math.round(d);
}
String s = null;
if (o instanceof String) s = (String) o;
if (o instanceof byte[]) s = UTF8.String((byte[]) o);
// this can be used to calculate a score from a string
if (s == null || s.isEmpty() || s.charAt(0) == '-') return 0;
try {
long l = 0;
if (s.length() == shortDateFormatString.length()) {
// try a date
l = ((shortFormatter.parse(s).getTime() - date2000) / minutemillis);
if (l < 0) l = 0;
} else {
// try a number
l = Long.parseLong(s);
}
// fix out-of-ranges
if (l > Integer.MAX_VALUE) return (int) (l & Integer.MAX_VALUE);
if (l < 0) {
System.out.println("string2score: negative score for input " + s);
return 0;
}
return (int) l;
} catch (final Throwable e) {
// try it lex
int len = s.length();
if (len > 5) len = 5;
int c = 0;
for (int i = 0; i < len; i++) {
c <<= 6;
c += plainByteArray[(byte) s.charAt(i)];
}
for (int i = len; i < 5; i++) c <<= 6;
if (c < 0) {
System.out.println("string2score: negative score for input " + s);
return 0;
}
return c;
}
}
/* /*
public byte[] lookupBy( public byte[] lookupBy(
final String whereKey, final String whereKey,

Loading…
Cancel
Save