- consolidated different orderings to kelondroNaturalOrder

- added another iteration method to rwihash-enumeration


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1309 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 9544c47684
commit 2028403670

@ -59,6 +59,8 @@ public abstract class kelondroAbstractOrder implements kelondroOrder {
return compare((byte[]) a, (byte[]) b); return compare((byte[]) a, (byte[]) b);
} else if ((a instanceof Node) && (b instanceof Node)) { } else if ((a instanceof Node) && (b instanceof Node)) {
return compare(((Node) a).getKey(), ((Node) b).getKey()); return compare(((Node) a).getKey(), ((Node) b).getKey());
} else if ((a instanceof String) && (b instanceof String)) {
return compare(((String) a).getBytes(), ((String) b).getBytes());
} else } else
throw new IllegalArgumentException("Object type or Object type combination not supported"); throw new IllegalArgumentException("Object type or Object type combination not supported");
} }

@ -66,18 +66,19 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond
} }
} }
public static final kelondroBase64Order standardCoder = new kelondroBase64Order(true); public static final kelondroBase64Order standardCoder = new kelondroBase64Order(true, true);
public static final kelondroBase64Order enhancedCoder = new kelondroBase64Order(false); public static final kelondroBase64Order enhancedCoder = new kelondroBase64Order(true, false);
final boolean rfc1113compliant;
private boolean rfc1113compliant;
private boolean asc;
private final char[] alpha; private final char[] alpha;
private final byte[] ahpla; private final byte[] ahpla;
public kelondroBase64Order(boolean rfc1113compliant) { public kelondroBase64Order(boolean up, boolean rfc1113compliant) {
// if we choose not to be rfc1113compliant, // if we choose not to be rfc1113compliant,
// then we get shorter base64 results which are also filename-compatible // then we get shorter base64 results which are also filename-compatible
this.rfc1113compliant = rfc1113compliant; this.rfc1113compliant = rfc1113compliant;
this.asc = up;
alpha = (rfc1113compliant) ? alpha_standard : alpha_enhanced; alpha = (rfc1113compliant) ? alpha_standard : alpha_enhanced;
ahpla = (rfc1113compliant) ? ahpla_standard : ahpla_enhanced; ahpla = (rfc1113compliant) ? ahpla_standard : ahpla_enhanced;
} }
@ -221,10 +222,8 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond
final int bl = b.length; final int bl = b.length;
final int len = (al > bl) ? bl : al; final int len = (al > bl) ? bl : al;
while (i < len) { while (i < len) {
if (ahpla[a[i]] > ahpla[b[i]]) if (ahpla[a[i]] > ahpla[b[i]]) return (asc) ? 1 : -1;
return 1; if (ahpla[a[i]] < ahpla[b[i]]) return (asc) ? -1 : 1;
if (ahpla[a[i]] < ahpla[b[i]])
return -1;
// else the bytes are equal and it may go on yet undecided // else the bytes are equal and it may go on yet undecided
i++; i++;
} }
@ -232,14 +231,14 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond
if ((i == al) && (i < bl) && (b[i] == 0)) return 0; if ((i == al) && (i < bl) && (b[i] == 0)) return 0;
if ((i == bl) && (i < al) && (a[i] == 0)) return 0; if ((i == bl) && (i < al) && (a[i] == 0)) return 0;
// no, decide by length // no, decide by length
if (al > bl) return 1; if (al > bl) return (asc) ? 1 : -1;
if (al < bl) return -1; if (al < bl) return (asc) ? -1 : 1;
// no, they are equal // no, they are equal
return 0; return 0;
} }
public static void main(String[] s) { public static void main(String[] s) {
kelondroBase64Order b64 = new kelondroBase64Order(true); kelondroBase64Order b64 = new kelondroBase64Order(true, true);
if (s.length == 0) { if (s.length == 0) {
System.out.println("usage: -[ec|dc|es|ds|s2m] <arg>"); System.out.println("usage: -[ec|dc|es|ds|s2m] <arg>");
System.exit(0); System.exit(0);

@ -52,7 +52,7 @@ public class kelondroBinSearch {
private int chunksize; private int chunksize;
private byte[] buffer; private byte[] buffer;
private int count; private int count;
private kelondroOrder objectOrder = new kelondroNaturalOrder(); private kelondroOrder objectOrder = new kelondroNaturalOrder(true);
public kelondroBinSearch(byte[] chunks, int chunksize) { public kelondroBinSearch(byte[] chunks, int chunksize) {
this.chunks = chunks; this.chunks = chunks;

@ -53,7 +53,7 @@ import java.util.TreeSet;
public class kelondroMSetTools { public class kelondroMSetTools {
public static Comparator fastStringComparator = fastStringComparator(true); //public static Comparator fastStringComparator = fastStringComparator(true);
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// helper methods // helper methods
@ -324,38 +324,6 @@ public class kelondroMSetTools {
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
public static Comparator fastStringComparator(boolean ascending) {
return new stringComparator(ascending);
}
private static class stringComparator implements Comparator {
// fast ordering
boolean asc = true;
public stringComparator(boolean ascending) {
asc = ascending;
}
public int compare(Object o1, Object o2) {
// returns o1<o2:-1 , o1=p2:0 , o1>o2:1
int l1 = ((String) o1).length();
int l2 = ((String) o2).length();
if (l1 == l2) {
for (int i = 0; i < l1; i++) {
if (((byte) ((String) o1).charAt(i)) < ((byte) ((String) o2).charAt(i))) return (asc) ? -1 : 1;
if (((byte) ((String) o1).charAt(i)) > ((byte) ((String) o2).charAt(i))) return (asc) ? 1 : -1;
}
return 0;
//return ((String) o1).compareTo((String) o2);
} else {
return l1 < l2 ? ((asc) ? -1 : 1) : ((asc) ? 1 : -1);
}
}
public boolean equals(Object obj) {
return false;
}
}
// ------------------------------------------------------------------------------------------------
public static TreeMap loadMap(String filename, String sep) { public static TreeMap loadMap(String filename, String sep) {
TreeMap map = new TreeMap(); TreeMap map = new TreeMap();
BufferedReader br = null; BufferedReader br = null;
@ -375,8 +343,8 @@ public class kelondroMSetTools {
return map; return map;
} }
public static TreeSet loadList(File file) { public static TreeSet loadList(File file, Comparator c) {
TreeSet list = new TreeSet(kelondroMSetTools.fastStringComparator); TreeSet list = new TreeSet(c);
if (!(file.exists())) return list; if (!(file.exists())) return list;
BufferedReader br = null; BufferedReader br = null;

@ -53,12 +53,12 @@ public class kelondroMergeIterator implements Iterator {
String na, nb; String na, nb;
boolean up; boolean up;
public kelondroMergeIterator(Iterator a, Iterator b, boolean up) { public kelondroMergeIterator(Iterator a, Iterator b, Comparator c, boolean up) {
// this works currently only for String-type key iterations // this works currently only for String-type key iterations
this.a = a; this.a = a;
this.b = b; this.b = b;
this.up = up; this.up = up;
this.comp = kelondroMSetTools.fastStringComparator(up); this.comp = c;
nexta(); nexta();
nextb(); nextb();
} }
@ -125,19 +125,19 @@ public class kelondroMergeIterator implements Iterator {
throw new java.lang.UnsupportedOperationException("merge does not support remove"); throw new java.lang.UnsupportedOperationException("merge does not support remove");
} }
public static Iterator cascade(Set /*of*/ iterators, boolean up) { public static Iterator cascade(Set /*of*/ iterators, Comparator c,boolean up) {
// this extends the ability to combine two iterators // this extends the ability to combine two iterators
// to the abiliy of combining a set of iterators // to the abiliy of combining a set of iterators
if (iterators == null) return null; if (iterators == null) return null;
if (iterators.size() == 0) return null; if (iterators.size() == 0) return null;
return cascade(iterators.iterator(), up); return cascade(iterators.iterator(), c, up);
} }
private static Iterator cascade(Iterator /*of*/ iiterators, boolean up) { private static Iterator cascade(Iterator /*of*/ iiterators, Comparator c, boolean up) {
if (iiterators == null) return null; if (iiterators == null) return null;
if (!(iiterators.hasNext())) return null; if (!(iiterators.hasNext())) return null;
Iterator one = (Iterator) iiterators.next(); Iterator one = (Iterator) iiterators.next();
if (!(iiterators.hasNext())) return one; if (!(iiterators.hasNext())) return one;
return new kelondroMergeIterator(one, cascade(iiterators, up), up); return new kelondroMergeIterator(one, cascade(iiterators, c, up), c, up);
} }
} }

@ -49,7 +49,12 @@ import java.util.Comparator;
public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelondroOrder, Comparator { public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelondroOrder, Comparator {
public kelondroNaturalOrder() { boolean asc;
public static final Comparator naturalOrder = new kelondroNaturalOrder(true);
public kelondroNaturalOrder(boolean ascending) {
this.asc = ascending;
} }
public long cardinal(byte[] key) { public long cardinal(byte[] key) {
@ -68,7 +73,7 @@ public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelon
// two arrays are also equal if one array is a subset of the other's array // two arrays are also equal if one array is a subset of the other's array
// with filled-up char(0)-values // with filled-up char(0)-values
public int compare(byte[] a, byte[] b) { public int compare(byte[] a, byte[] b) {
return compares(a, b); return (asc) ? compares(a, b) : compares(b, a);
} }
public static final int compares(byte[] a, byte[] b) { public static final int compares(byte[] a, byte[] b) {
@ -98,7 +103,7 @@ public class kelondroNaturalOrder extends kelondroAbstractOrder implements kelon
byte[] t = new byte[12]; byte[] t = new byte[12];
for (int i = 0; i < 12; i++) t[i] = (byte) 255; for (int i = 0; i < 12; i++) t[i] = (byte) 255;
t[0] = (byte) 127; t[0] = (byte) 127;
kelondroOrder o = new kelondroNaturalOrder(); kelondroOrder o = new kelondroNaturalOrder(true);
System.out.println(o.partition(t, 16)); System.out.println(o.partition(t, 16));
} }

@ -75,7 +75,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
private static int root = 0; // pointer for FHandles-array: pointer to root node private static int root = 0; // pointer for FHandles-array: pointer to root node
private Search writeSearchObj = new Search(); private Search writeSearchObj = new Search();
private kelondroOrder objectOrder = new kelondroNaturalOrder(); private kelondroOrder objectOrder = new kelondroNaturalOrder(true);
public kelondroTree(File file, long buffersize, int key, int value, boolean exitOnFail) { public kelondroTree(File file, long buffersize, int key, int value, boolean exitOnFail) {
this(file, buffersize, new int[] { key, value }, 1, 8, exitOnFail); this(file, buffersize, new int[] { key, value }, 1, 8, exitOnFail);

@ -64,6 +64,7 @@ import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterOutputStream; import de.anomic.htmlFilter.htmlFilterOutputStream;
import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
public final class plasmaCondenser { public final class plasmaCondenser {
@ -142,7 +143,7 @@ public final class plasmaCondenser {
private void createCondensement(InputStream is) { private void createCondensement(InputStream is) {
words = new TreeMap(kelondroMSetTools.fastStringComparator); words = new TreeMap(kelondroNaturalOrder.naturalOrder);
sentences = new HashMap(); sentences = new HashMap();
HashSet currsentwords = new HashSet(); HashSet currsentwords = new HashSet();
StringBuffer sentence = new StringBuffer(100); StringBuffer sentence = new StringBuffer(100);
@ -405,7 +406,7 @@ public final class plasmaCondenser {
// we reconstruct the word hashtable // we reconstruct the word hashtable
// and sort the entries by the number of occurrences // and sort the entries by the number of occurrences
// this structure is needed to print out a sorted list of words // this structure is needed to print out a sorted list of words
TreeMap sortedWords = new TreeMap(kelondroMSetTools.fastStringComparator); TreeMap sortedWords = new TreeMap(kelondroNaturalOrder.naturalOrder);
it = words.entrySet().iterator(); // enumerates the keys in ascending order it = words.entrySet().iterator(); // enumerates the keys in ascending order
while (it.hasNext()) { while (it.hasNext()) {
entry = (Map.Entry) it.next(); entry = (Map.Entry) it.next();

@ -48,6 +48,7 @@ import java.util.Iterator;
import de.anomic.htmlFilter.htmlFilterAbstractScraper; import de.anomic.htmlFilter.htmlFilterAbstractScraper;
import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.serverByteBuffer; import de.anomic.server.serverByteBuffer;
public final class plasmaSearchQuery { public final class plasmaSearchQuery {
@ -132,7 +133,7 @@ public final class plasmaSearchQuery {
} }
// the string is clean now, but we must generate a set out of it // the string is clean now, but we must generate a set out of it
final TreeSet query = new TreeSet(kelondroMSetTools.fastStringComparator); final TreeSet query = new TreeSet(kelondroNaturalOrder.naturalOrder);
if (words.length() == 0) return query; // split returns always one element if (words.length() == 0) return query; // split returns always one element
final String[] a = words.split(" "); final String[] a = words.split(" ");
for (int i = 0; i < a.length; i++) { query.add(a[i]); } for (int i = 0; i < a.length; i++) { query.add(a[i]); }

@ -130,6 +130,7 @@ import de.anomic.http.httpc;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroTables; import de.anomic.kelondro.kelondroTables;
import de.anomic.server.serverAbstractSwitch; import de.anomic.server.serverAbstractSwitch;
import de.anomic.server.serverCodings; import de.anomic.server.serverCodings;
@ -260,7 +261,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// read only once upon first instantiation of this class // read only once upon first instantiation of this class
String f = getConfig("plasmaBlueList", null); String f = getConfig("plasmaBlueList", null);
File plasmaBlueListFile = new File(f); File plasmaBlueListFile = new File(f);
if (f != null) blueList = kelondroMSetTools.loadList(plasmaBlueListFile); else blueList= new TreeSet(); if (f != null) blueList = kelondroMSetTools.loadList(plasmaBlueListFile, kelondroNaturalOrder.naturalOrder); else blueList= new TreeSet();
this.log.logConfig("loaded blue-list from file " + plasmaBlueListFile.getName() + ", " + this.log.logConfig("loaded blue-list from file " + plasmaBlueListFile.getName() + ", " +
blueList.size() + " entries, " + blueList.size() + " entries, " +
ppRamString(plasmaBlueListFile.length()/1024)); ppRamString(plasmaBlueListFile.length()/1024));
@ -280,7 +281,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// load stopwords // load stopwords
if (stopwords == null) { if (stopwords == null) {
File stopwordsFile = new File(rootPath, "yacy.stopwords"); File stopwordsFile = new File(rootPath, "yacy.stopwords");
stopwords = kelondroMSetTools.loadList(stopwordsFile); stopwords = kelondroMSetTools.loadList(stopwordsFile, kelondroNaturalOrder.naturalOrder);
this.log.logConfig("loaded stopwords from file " + stopwordsFile.getName() + ", " + this.log.logConfig("loaded stopwords from file " + stopwordsFile.getName() + ", " +
stopwords.size() + " entries, " + stopwords.size() + " entries, " +
ppRamString(stopwordsFile.length()/1024)); ppRamString(stopwordsFile.length()/1024));

@ -59,6 +59,7 @@ import java.net.URL;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
@ -279,7 +280,8 @@ public final class plasmaWordIndex {
public iterateFiles(String startHash, boolean up, boolean deleteEmpty) { public iterateFiles(String startHash, boolean up, boolean deleteEmpty) {
this.hierarchy = new ArrayList(); this.hierarchy = new ArrayList();
this.comp = kelondroMSetTools.fastStringComparator(up); this.comp = kelondroNaturalOrder.naturalOrder; // this is the wrong ordering but mut be used as long as the assortments uses the same ordering
//this.comp = new kelondroBase64Order(up, false);
this.delete = deleteEmpty; this.delete = deleteEmpty;
// the we initially fill the hierarchy with the content of the root folder // the we initially fill the hierarchy with the content of the root folder

@ -50,6 +50,7 @@ import java.io.File;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRecords; import de.anomic.kelondro.kelondroRecords;
import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
@ -216,7 +217,7 @@ public final class plasmaWordIndexAssortmentCluster {
public Iterator hashConjunction(String startWordHash, boolean up) { public Iterator hashConjunction(String startWordHash, boolean up) {
HashSet iterators = new HashSet(); HashSet iterators = new HashSet();
for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, true)); for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, true));
return kelondroMergeIterator.cascade(iterators, up); return kelondroMergeIterator.cascade(iterators, kelondroNaturalOrder.naturalOrder, up);
} }
public int sizeTotal() { public int sizeTotal() {

@ -53,6 +53,7 @@ import de.anomic.kelondro.kelondroArray;
import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRecords; import de.anomic.kelondro.kelondroRecords;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
@ -267,8 +268,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
new kelondroMergeIterator( new kelondroMergeIterator(
cache.tailMap(startWordHash).keySet().iterator(), cache.tailMap(startWordHash).keySet().iterator(),
assortmentCluster.hashConjunction(startWordHash, true), assortmentCluster.hashConjunction(startWordHash, true),
kelondroNaturalOrder.naturalOrder,
true), true),
backend.wordHashes(startWordHash, true), backend.wordHashes(startWordHash, true),
kelondroNaturalOrder.naturalOrder,
true); true);
} }

@ -50,6 +50,7 @@ import java.util.Iterator;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
@ -83,7 +84,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
public iterateFiles(String startHash, boolean up) { public iterateFiles(String startHash, boolean up) {
this.hierarchy = new ArrayList(); this.hierarchy = new ArrayList();
this.comp = kelondroMSetTools.fastStringComparator(up); this.comp = new kelondroNaturalOrder(up);
// the we initially fill the hierarchy with the content of the root folder // the we initially fill the hierarchy with the content of the root folder
String path = "WORDS"; String path = "WORDS";

@ -77,6 +77,7 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaURLPool; import de.anomic.plasma.plasmaURLPool;
import de.anomic.plasma.plasmaWordIndex; import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexAssortment;
import de.anomic.plasma.plasmaWordIndexAssortmentCluster; import de.anomic.plasma.plasmaWordIndexAssortmentCluster;
import de.anomic.plasma.plasmaWordIndexCache; import de.anomic.plasma.plasmaWordIndexCache;
import de.anomic.plasma.plasmaWordIndexClassicDB; import de.anomic.plasma.plasmaWordIndexClassicDB;
@ -1282,6 +1283,11 @@ public final class yacy {
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, log); plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, log);
WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true); WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true);
} }
if (resource.startsWith("assortment")) {
int a = Integer.parseInt(resource.substring(10));
plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, null);
WordHashIterator = assortment.hashes(wordChunkStartHash, true, false);
}
if (resource.equals("words")) { if (resource.equals("words")) {
plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(homeDBroot, log); plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(homeDBroot, log);
WordHashIterator = fileDB.wordHashes(wordChunkStartHash, true); WordHashIterator = fileDB.wordHashes(wordChunkStartHash, true);

Loading…
Cancel
Save