- consolidated different orderings to kelondroNaturalOrder

- added another iteration method to rwihash-enumeration


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1309 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 9544c47684
commit 2028403670

@ -59,6 +59,8 @@ public abstract class kelondroAbstractOrder implements kelondroOrder {
return compare((byte[]) a, (byte[]) b);
} else if ((a instanceof Node) && (b instanceof Node)) {
return compare(((Node) a).getKey(), ((Node) b).getKey());
} else if ((a instanceof String) && (b instanceof String)) {
return compare(((String) a).getBytes(), ((String) b).getBytes());
} else
throw new IllegalArgumentException("Object type or Object type combination not supported");
}

@ -66,18 +66,19 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond
}
}
public static final kelondroBase64Order standardCoder = new kelondroBase64Order(true);
public static final kelondroBase64Order enhancedCoder = new kelondroBase64Order(false);
final boolean rfc1113compliant;
public static final kelondroBase64Order standardCoder = new kelondroBase64Order(true, true);
public static final kelondroBase64Order enhancedCoder = new kelondroBase64Order(true, false);
private boolean rfc1113compliant;
private boolean asc;
private final char[] alpha;
private final byte[] ahpla;
public kelondroBase64Order(boolean rfc1113compliant) {
public kelondroBase64Order(boolean up, boolean rfc1113compliant) {
// if we choose not to be rfc1113compliant,
// then we get shorter base64 results which are also filename-compatible
this.rfc1113compliant = rfc1113compliant;
this.asc = up;
alpha = (rfc1113compliant) ? alpha_standard : alpha_enhanced;
ahpla = (rfc1113compliant) ? ahpla_standard : ahpla_enhanced;
}
@ -221,10 +222,8 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond
final int bl = b.length;
final int len = (al > bl) ? bl : al;
while (i < len) {
if (ahpla[a[i]] > ahpla[b[i]])
return 1;
if (ahpla[a[i]] < ahpla[b[i]])
return -1;
if (ahpla[a[i]] > ahpla[b[i]]) return (asc) ? 1 : -1;
if (ahpla[a[i]] < ahpla[b[i]]) return (asc) ? -1 : 1;
// else the bytes are equal and it may go on yet undecided
i++;
}
@ -232,14 +231,14 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond
if ((i == al) && (i < bl) && (b[i] == 0)) return 0;
if ((i == bl) && (i < al) && (a[i] == 0)) return 0;
// no, decide by length
if (al > bl) return 1;
if (al < bl) return -1;
if (al > bl) return (asc) ? 1 : -1;
if (al < bl) return (asc) ? -1 : 1;
// no, they are equal
return 0;
}
public static void main(String[] s) {
kelondroBase64Order b64 = new kelondroBase64Order(true);
kelondroBase64Order b64 = new kelondroBase64Order(true, true);
if (s.length == 0) {
System.out.println("usage: -[ec|dc|es|ds|s2m] <arg>");
System.exit(0);

@ -52,7 +52,7 @@ public class kelondroBinSearch {
private int chunksize;
private byte[] buffer;
private int count;
private kelondroOrder objectOrder = new kelondroNaturalOrder();
private kelondroOrder objectOrder = new kelondroNaturalOrder(true);
public kelondroBinSearch(byte[] chunks, int chunksize) {
this.chunks = chunks;

@ -53,7 +53,7 @@ import java.util.TreeSet;
public class kelondroMSetTools {
public static Comparator fastStringComparator = fastStringComparator(true);
//public static Comparator fastStringComparator = fastStringComparator(true);
// ------------------------------------------------------------------------------------------------
// helper methods
@ -324,38 +324,6 @@ public class kelondroMSetTools {
// ------------------------------------------------------------------------------------------------
public static Comparator fastStringComparator(boolean ascending) {
return new stringComparator(ascending);
}
private static class stringComparator implements Comparator {
// fast ordering
boolean asc = true;
public stringComparator(boolean ascending) {
asc = ascending;
}
public int compare(Object o1, Object o2) {
// returns o1<o2:-1 , o1=p2:0 , o1>o2:1
int l1 = ((String) o1).length();
int l2 = ((String) o2).length();
if (l1 == l2) {
for (int i = 0; i < l1; i++) {
if (((byte) ((String) o1).charAt(i)) < ((byte) ((String) o2).charAt(i))) return (asc) ? -1 : 1;
if (((byte) ((String) o1).charAt(i)) > ((byte) ((String) o2).charAt(i))) return (asc) ? 1 : -1;
}
return 0;
//return ((String) o1).compareTo((String) o2);
} else {
return l1 < l2 ? ((asc) ? -1 : 1) : ((asc) ? 1 : -1);
}
}
public boolean equals(Object obj) {
return false;
}
}
// ------------------------------------------------------------------------------------------------
public static TreeMap loadMap(String filename, String sep) {
TreeMap map = new TreeMap();
BufferedReader br = null;
@ -375,8 +343,8 @@ public class kelondroMSetTools {
return map;
}
public static TreeSet loadList(File file) {
TreeSet list = new TreeSet(kelondroMSetTools.fastStringComparator);
public static TreeSet loadList(File file, Comparator c) {
TreeSet list = new TreeSet(c);
if (!(file.exists())) return list;
BufferedReader br = null;

@ -53,12 +53,12 @@ public class kelondroMergeIterator implements Iterator {
String na, nb;
boolean up;
public kelondroMergeIterator(Iterator a, Iterator b, boolean up) {
public kelondroMergeIterator(Iterator a, Iterator b, Comparator c, boolean up) {
// this works currently only for String-type key iterations
this.a = a;
this.b = b;
this.up = up;
this.comp = kelondroMSetTools.fastStringComparator(up);
this.comp = c;
nexta();
nextb();
}
@ -125,19 +125,19 @@ public class kelondroMergeIterator implements Iterator {
throw new java.lang.UnsupportedOperationException("merge does not support remove");
}
public static Iterator cascade(Set /*of*/ iterators, boolean up) {
public static Iterator cascade(Set /*of*/ iterators, Comparator c,boolean up) {
// this extends the ability to combine two iterators
// to the abiliy of combining a set of iterators
if (iterators == null) return null;
if (iterators.size() == 0) return null;
return cascade(iterators.iterator(), up);
return cascade(iterators.iterator(), c, up);
}
private static Iterator cascade(Iterator /*of*/ iiterators, boolean up) {
private static Iterator cascade(Iterator /*of*/ iiterators, Comparator c, boolean up) {
if (iiterators == null) return null;
if (!(iiterators.hasNext())) return null;
Iterator one = (Iterator) iiterators.next();
if (!(iiterators.hasNext())) return one;
return new kelondroMergeIterator(one, cascade(iiterators, up), up);
return new kelondroMergeIterator(one, cascade(iiterators, c, up), c, up);
}
}

@ -49,7 +49,12 @@ import java.util.Comparator;
public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelondroOrder, Comparator {
public kelondroNaturalOrder() {
boolean asc;
public static final Comparator naturalOrder = new kelondroNaturalOrder(true);
public kelondroNaturalOrder(boolean ascending) {
this.asc = ascending;
}
public long cardinal(byte[] key) {
@ -68,7 +73,7 @@ public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelon
// two arrays are also equal if one array is a subset of the other's array
// with filled-up char(0)-values
public int compare(byte[] a, byte[] b) {
return compares(a, b);
return (asc) ? compares(a, b) : compares(b, a);
}
public static final int compares(byte[] a, byte[] b) {
@ -98,7 +103,7 @@ public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelon
byte[] t = new byte[12];
for (int i = 0; i < 12; i++) t[i] = (byte) 255;
t[0] = (byte) 127;
kelondroOrder o = new kelondroNaturalOrder();
kelondroOrder o = new kelondroNaturalOrder(true);
System.out.println(o.partition(t, 16));
}

@ -75,7 +75,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
private static int root = 0; // pointer for FHandles-array: pointer to root node
private Search writeSearchObj = new Search();
private kelondroOrder objectOrder = new kelondroNaturalOrder();
private kelondroOrder objectOrder = new kelondroNaturalOrder(true);
public kelondroTree(File file, long buffersize, int key, int value, boolean exitOnFail) {
this(file, buffersize, new int[] { key, value }, 1, 8, exitOnFail);

@ -64,6 +64,7 @@ import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterOutputStream;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
public final class plasmaCondenser {
@ -142,7 +143,7 @@ public final class plasmaCondenser {
private void createCondensement(InputStream is) {
words = new TreeMap(kelondroMSetTools.fastStringComparator);
words = new TreeMap(kelondroNaturalOrder.naturalOrder);
sentences = new HashMap();
HashSet currsentwords = new HashSet();
StringBuffer sentence = new StringBuffer(100);
@ -405,7 +406,7 @@ public final class plasmaCondenser {
// we reconstruct the word hashtable
// and sort the entries by the number of occurrences
// this structure is needed to print out a sorted list of words
TreeMap sortedWords = new TreeMap(kelondroMSetTools.fastStringComparator);
TreeMap sortedWords = new TreeMap(kelondroNaturalOrder.naturalOrder);
it = words.entrySet().iterator(); // enumerates the keys in ascending order
while (it.hasNext()) {
entry = (Map.Entry) it.next();

@ -48,6 +48,7 @@ import java.util.Iterator;
import de.anomic.htmlFilter.htmlFilterAbstractScraper;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.serverByteBuffer;
public final class plasmaSearchQuery {
@ -132,7 +133,7 @@ public final class plasmaSearchQuery {
}
// the string is clean now, but we must generate a set out of it
final TreeSet query = new TreeSet(kelondroMSetTools.fastStringComparator);
final TreeSet query = new TreeSet(kelondroNaturalOrder.naturalOrder);
if (words.length() == 0) return query; // split returns always one element
final String[] a = words.split(" ");
for (int i = 0; i < a.length; i++) { query.add(a[i]); }

@ -130,6 +130,7 @@ import de.anomic.http.httpc;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroTables;
import de.anomic.server.serverAbstractSwitch;
import de.anomic.server.serverCodings;
@ -260,7 +261,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// read only once upon first instantiation of this class
String f = getConfig("plasmaBlueList", null);
File plasmaBlueListFile = new File(f);
if (f != null) blueList = kelondroMSetTools.loadList(plasmaBlueListFile); else blueList= new TreeSet();
if (f != null) blueList = kelondroMSetTools.loadList(plasmaBlueListFile, kelondroNaturalOrder.naturalOrder); else blueList= new TreeSet();
this.log.logConfig("loaded blue-list from file " + plasmaBlueListFile.getName() + ", " +
blueList.size() + " entries, " +
ppRamString(plasmaBlueListFile.length()/1024));
@ -280,7 +281,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// load stopwords
if (stopwords == null) {
File stopwordsFile = new File(rootPath, "yacy.stopwords");
stopwords = kelondroMSetTools.loadList(stopwordsFile);
stopwords = kelondroMSetTools.loadList(stopwordsFile, kelondroNaturalOrder.naturalOrder);
this.log.logConfig("loaded stopwords from file " + stopwordsFile.getName() + ", " +
stopwords.size() + " entries, " +
ppRamString(stopwordsFile.length()/1024));

@ -59,6 +59,7 @@ import java.net.URL;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
@ -279,7 +280,8 @@ public final class plasmaWordIndex {
public iterateFiles(String startHash, boolean up, boolean deleteEmpty) {
this.hierarchy = new ArrayList();
this.comp = kelondroMSetTools.fastStringComparator(up);
this.comp = kelondroNaturalOrder.naturalOrder; // this is the wrong ordering but mut be used as long as the assortments uses the same ordering
//this.comp = new kelondroBase64Order(up, false);
this.delete = deleteEmpty;
// the we initially fill the hierarchy with the content of the root folder

@ -50,6 +50,7 @@ import java.io.File;
import java.util.HashSet;
import java.util.Iterator;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.server.logging.serverLog;
@ -216,7 +217,7 @@ public final class plasmaWordIndexAssortmentCluster {
public Iterator hashConjunction(String startWordHash, boolean up) {
HashSet iterators = new HashSet();
for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, true));
return kelondroMergeIterator.cascade(iterators, up);
return kelondroMergeIterator.cascade(iterators, kelondroNaturalOrder.naturalOrder, up);
}
public int sizeTotal() {

@ -53,6 +53,7 @@ import de.anomic.kelondro.kelondroArray;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
@ -267,8 +268,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
new kelondroMergeIterator(
cache.tailMap(startWordHash).keySet().iterator(),
assortmentCluster.hashConjunction(startWordHash, true),
kelondroNaturalOrder.naturalOrder,
true),
backend.wordHashes(startWordHash, true),
kelondroNaturalOrder.naturalOrder,
true);
}

@ -50,6 +50,7 @@ import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
@ -83,7 +84,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
public iterateFiles(String startHash, boolean up) {
this.hierarchy = new ArrayList();
this.comp = kelondroMSetTools.fastStringComparator(up);
this.comp = new kelondroNaturalOrder(up);
// the we initially fill the hierarchy with the content of the root folder
String path = "WORDS";

@ -77,6 +77,7 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaURLPool;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexAssortment;
import de.anomic.plasma.plasmaWordIndexAssortmentCluster;
import de.anomic.plasma.plasmaWordIndexCache;
import de.anomic.plasma.plasmaWordIndexClassicDB;
@ -1282,6 +1283,11 @@ public final class yacy {
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, log);
WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true);
}
if (resource.startsWith("assortment")) {
int a = Integer.parseInt(resource.substring(10));
plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, null);
WordHashIterator = assortment.hashes(wordChunkStartHash, true, false);
}
if (resource.equals("words")) {
plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(homeDBroot, log);
WordHashIterator = fileDB.wordHashes(wordChunkStartHash, true);

Loading…
Cancel
Save