orbiter 17 years ago
parent 483e9a2066
commit d0678f7ab9

@ -77,17 +77,13 @@ public class kelondroBytesIntMap {
}
public synchronized ArrayList<Integer[]> removeDoubles() throws IOException {
ArrayList<kelondroRowSet> indexreport = index.removeDoubles();
ArrayList<Integer[]> report = new ArrayList<Integer[]>();
Iterator<kelondroRowSet> i = indexreport.iterator();
kelondroRowSet rowset;
Integer[] is;
Iterator<kelondroRow.Entry> ei;
int c;
while (i.hasNext()) {
rowset = i.next();
is = new Integer[rowset.size()];
ei = rowset.rows();
for (kelondroRowCollection delset: index.removeDoubles()) {
is = new Integer[delset.size()];
ei = delset.rows();
c = 0;
while (ei.hasNext()) {
is[c++] = new Integer((int) ei.next().getColLong(1));

@ -77,12 +77,12 @@ public class kelondroBytesLongMap {
}
public synchronized ArrayList<Long[]> removeDoubles() throws IOException {
ArrayList<kelondroRowSet> indexreport = index.removeDoubles();
ArrayList<kelondroRowCollection> indexreport = index.removeDoubles();
ArrayList<Long[]> report = new ArrayList<Long[]>();
Long[] is;
Iterator<kelondroRow.Entry> ei;
int c;
for (kelondroRowSet rowset: indexreport) {
for (kelondroRowCollection rowset: indexreport) {
is = new Long[rowset.size()];
ei = rowset.rows();
c = 0;

@ -359,7 +359,7 @@ public class kelondroCache implements kelondroIndex {
return c;
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {
public synchronized ArrayList<kelondroRowCollection> removeDoubles() throws IOException {
return index.removeDoubles();
// todo: remove reported entries from the cache!!!
}

@ -32,7 +32,6 @@ package de.anomic.kelondro;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
@ -231,17 +230,13 @@ public class kelondroCollectionIndex {
}
}
// care for double entries
ArrayList<kelondroRowSet> del = index.removeDoubles();
Iterator<kelondroRowSet> j = del.iterator();
kelondroRowSet rowset;
Iterator<kelondroRow.Entry> rowiter;
int partition, maxpartition;
kelondroRow.Entry entry, maxentry;
int doublecount = 0;
while (j.hasNext()) {
rowset = j.next();
// for each entry in row set choose one which we want to keep
rowiter = rowset.rows();
for (kelondroRowCollection doubleset: index.removeDoubles()) {
// for each entry in doubleset choose one which we want to keep
rowiter = doubleset.rows();
maxentry = null;
maxpartition = -1;
while (rowiter.hasNext()) {

@ -284,25 +284,21 @@ public class kelondroEcoTable implements kelondroIndex {
return c;
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {
ArrayList<Integer[]> indexreport = index.removeDoubles();
ArrayList<kelondroRowSet> report = new ArrayList<kelondroRowSet>();
Iterator<Integer[]> i = indexreport.iterator();
Integer[] is;
public synchronized ArrayList<kelondroRowCollection> removeDoubles() throws IOException {
ArrayList<kelondroRowCollection> report = new ArrayList<kelondroRowCollection>();
kelondroRowSet rows;
TreeSet<Integer> d = new TreeSet<Integer>();
byte[] b = new byte[rowdef.objectsize];
while (i.hasNext()) {
is = i.next();
for (Integer[] is: index.removeDoubles()) {
rows = new kelondroRowSet(this.rowdef, is.length);
for (int j = 0; j < is.length; j++) {
d.add(is[j]);
file.get(is[j].intValue(), b, 0);
file.get(is[j].intValue(), b, 0); // TODO: fix IndexOutOfBoundsException here
rows.addUnique(rowdef.newEntry(b));
}
report.add(rows);
}
// finally delete the affected rows, but start with largest id first, othervise we overwrite wrong entries
// finally delete the affected rows, but start with largest id first, otherwise we overwrite wrong entries
Integer s;
while (d.size() > 0) {
s = d.last();

@ -321,15 +321,11 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
return indexed_result.size();
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {
ArrayList<Integer[]> indexreport = index.removeDoubles();
ArrayList<kelondroRowSet> report = new ArrayList<kelondroRowSet>();
Iterator<Integer[]> i = indexreport.iterator();
Integer[] is;
public synchronized ArrayList<kelondroRowCollection> removeDoubles() throws IOException {
ArrayList<kelondroRowCollection> report = new ArrayList<kelondroRowCollection>();
kelondroRowSet rows;
TreeSet<Integer> d = new TreeSet<Integer>();
while (i.hasNext()) {
is = i.next();
for (Integer[] is: index.removeDoubles()) {
rows = new kelondroRowSet(this.rowdef, is.length);
for (int j = 0; j < is.length; j++) {
d.add(is[j]);
@ -337,7 +333,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
}
report.add(rows);
}
// finally delete the affected rows, but start with largest id first, othervise we overwrite wrong entries
// finally delete the affected rows, but start with largest id first, otherwise we overwrite wrong entries
Integer s;
while (d.size() > 0) {
s = d.last();

@ -68,7 +68,7 @@ public interface kelondroIndex {
public void putMultiple(List<kelondroRow.Entry> rows) throws IOException; // for R/W head path optimization
public boolean addUnique(kelondroRow.Entry row) throws IOException; // no double-check
public int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException; // no double-check
public ArrayList<kelondroRowSet> removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique)
public ArrayList<kelondroRowCollection> removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique)
public kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException;
public kelondroRow.Entry removeOne() throws IOException;
public kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException; // iterates only the key

@ -125,7 +125,7 @@ public class kelondroRAMIndex implements kelondroIndex {
return c;
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() {
public synchronized ArrayList<kelondroRowCollection> removeDoubles() {
// finish initialization phase explicitely
if (index1 == null) index1 = new kelondroRowSet(rowdef, 0);
return index0.removeDoubles();

@ -737,18 +737,18 @@ public class kelondroRowCollection {
}
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() {
public synchronized ArrayList<kelondroRowCollection> removeDoubles() {
assert (this.rowdef.objectOrder != null);
// removes double-occurrences of chunks
// in contrast to uniq() this removes also the remaining, non-double entry that had a double-occurrance to the others
// in contrast to uniq() this removes also the remaining, non-double entry that had a double-occurrence to the others
// all removed chunks are returned in an array
this.sort();
ArrayList<kelondroRowSet> report = new ArrayList<kelondroRowSet>();
ArrayList<kelondroRowCollection> report = new ArrayList<kelondroRowCollection>();
if (chunkcount < 2) return report;
int i = chunkcount - 2;
int d = 0;
boolean u = true;
kelondroRowSet collection = new kelondroRowSet(this.rowdef, 2);
kelondroRowCollection collection = new kelondroRowCollection(this.rowdef, 2);
try {
while (i >= 0) {
if (compare(i, i + 1) == 0) {
@ -893,7 +893,7 @@ public class kelondroRowCollection {
a.add("BBBBBBBBBBBB".getBytes());
a.add("BBBBBBBBBBBB".getBytes());
a.add("CCCCCCCCCCCC".getBytes());
ArrayList<kelondroRowSet> del = a.removeDoubles();
ArrayList<kelondroRowCollection> del = a.removeDoubles();
System.out.println(del + "rows double");
Iterator<kelondroRow.Entry> j = a.rows();
while (j.hasNext()) System.out.println(new String(j.next().bytes()));

@ -141,8 +141,8 @@ public class kelondroSQLTable implements kelondroIndex {
return (get(key) != null);
}
public ArrayList<kelondroRowSet> removeDoubles() {
return new ArrayList<kelondroRowSet>();
public ArrayList<kelondroRowCollection> removeDoubles() {
return new ArrayList<kelondroRowCollection>();
}
public kelondroRow.Entry get(byte[] key) throws IOException {

@ -346,9 +346,9 @@ public class kelondroSplitTable implements kelondroIndex {
while (i.hasNext()) addUnique(i.next(), entryDate);
}
public ArrayList<kelondroRowSet> removeDoubles() throws IOException {
public ArrayList<kelondroRowCollection> removeDoubles() throws IOException {
Iterator<kelondroIndex> i = tables.values().iterator();
ArrayList<kelondroRowSet> report = new ArrayList<kelondroRowSet>();
ArrayList<kelondroRowCollection> report = new ArrayList<kelondroRowCollection>();
while (i.hasNext()) {
report.addAll(i.next().removeDoubles());
}

@ -188,9 +188,9 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex
return result;
}
public ArrayList<kelondroRowSet> removeDoubles() {
public ArrayList<kelondroRowCollection> removeDoubles() {
// this data structure cannot have doubles; return empty array
return new ArrayList<kelondroRowSet>();
return new ArrayList<kelondroRowCollection>();
}
public class Search {

@ -345,7 +345,7 @@ public final class plasmaParser {
public static String patchCharsetEncoding(String encoding) {
// return a default encoding
if ((encoding == null) || (encoding.length() == 0)) return "ISO-8859-1";
if ((encoding == null) || (encoding.length() < 3)) return "ISO-8859-1";
// trim encoding string
encoding = encoding.trim();

@ -56,7 +56,7 @@ import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroRotateIterator;
import de.anomic.kelondro.kelondroRowSet;
import de.anomic.kelondro.kelondroRowCollection;
import de.anomic.server.serverMemory;
import de.anomic.server.logging.serverLog;
import de.anomic.xml.RSSFeed;
@ -419,8 +419,8 @@ public final class plasmaWordIndex implements indexRI {
// check doubles
int beforeDouble = container.size();
ArrayList<kelondroRowSet> d = container.removeDoubles();
kelondroRowSet set;
ArrayList<kelondroRowCollection> d = container.removeDoubles();
kelondroRowCollection set;
for (int i = 0; i < d.size(); i++) {
// for each element in the double-set, take that one that is the most recent one
set = d.get(i);

@ -24,7 +24,7 @@
package de.anomic.yacy;
// this class exsist to provide a system-wide normal form representation of urls,
// this class exist to provide a system-wide normal form representation of urls,
// and to prevent that java.net.URL usage causes DNS queries which are used in java.net.
import java.io.File;

Loading…
Cancel
Save