* fixed problem with indexContainer iteration from RAM:

indexContainers from RAM must be cloned explicitely to prevent
  side-effects on stored indexContainer objects in Cache
* changed behaviour of urlReference deletion from indexContainers:
  deletion does not user retrieval of all Elements from the assortments
* added textual configuration of kelondroRow and kelondroColumn definition
* update of kelondroRow usage in yacyNews
* modified kelondroAttrSeq to use modified kelondroColumn parser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2339 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 57fe5cc671
commit e357599f92

@ -53,6 +53,7 @@ import java.util.Enumeration;
import java.util.HashSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
import de.anomic.htmlFilter.htmlFilterContentScraper;
@ -185,7 +186,9 @@ public class IndexControl_p {
switchboard.urlPool.loadedURL.remove(urlx[i]);
}
}
switchboard.wordIndex.removeEntries(keyhash, urlx, true);
Set urlHashes = new HashSet();
for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]);
switchboard.wordIndex.removeEntries(keyhash, urlHashes, true);
// this shall lead to a presentation of the list; so handle that the remaining program
// thinks that it was called for a list presentation
post.remove("keyhashdelete");

@ -487,7 +487,7 @@ public class dir {
Map.Entry entry;
while (words.hasNext()) {
entry = (Map.Entry) words.next();
switchboard.wordIndex.removeEntries(indexEntryAttribute.word2hash((String) entry.getKey()), new String[] {urlhash}, true);
switchboard.wordIndex.removeEntry(indexEntryAttribute.word2hash((String) entry.getKey()), urlhash, true);
}
switchboard.urlPool.loadedURL.remove(urlhash);
} catch (Exception e) {

@ -29,6 +29,7 @@ package de.anomic.index;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Set;
import de.anomic.kelondro.kelondroCollectionIndex;
import de.anomic.kelondro.kelondroNaturalOrder;
@ -106,7 +107,12 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
return idx;
}
public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete) {
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
// TODO Auto-generated method stub
return false;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
// TODO Auto-generated method stub
return 0;
}
@ -121,6 +127,4 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
}

@ -29,11 +29,14 @@
package de.anomic.index;
import java.util.Iterator;
import java.util.Set;
import de.anomic.kelondro.kelondroOrder;
public interface indexContainer {
public indexContainer topLevelClone();
public void clear();
public int size();
public long updated();
@ -50,17 +53,17 @@ public interface indexContainer {
public int add(indexEntry[] entries, long updateTime);
public int add(indexContainer c, long maxTime);
public Set urlHashes();
public boolean contains(String urlHash) ;
public indexEntry get(String urlHash);
public indexEntry[] getEntryArray() ;
public indexEntry remove(String urlHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
public Iterator entries();
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete);
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete);
public Iterator entries(); // returns an iterator of indexEntry objects
public String toString();
public int hashCode();
//public void joinConstructive(indexContainer c, long time, int maxDistance);

@ -30,6 +30,7 @@ import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroFixedWidthArray;
@ -274,8 +275,53 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
}
public Iterator wordContainers(String startWordHash, boolean rot) {
if (rot) throw new UnsupportedOperationException("plasmaWordIndexCache cannot rotate wordContainers");
return wCache.tailMap(startWordHash).values().iterator(); // The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
// we return an iterator object that creates top-level-clones of the indexContainers
// in the cache, so that manipulations of the iterated objects do not change
// objects in the cache.
return new wordContainerIterator(startWordHash, rot);
}
public class wordContainerIterator implements Iterator {
// this class exists, because the wCache cannot be iterated with rotation
// and because every indeContainer Object that is iterated must be returned as top-level-clone
// so this class simulates wCache.tailMap(startWordHash).values().iterator()
// plus the mentioned features
private boolean rot;
private Iterator iterator;
private String startHash;
public wordContainerIterator(String startWordHash, boolean rot) {
this.rot = rot;
this.startHash = startWordHash;
this.iterator = wCache.tailMap(startWordHash).values().iterator();
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
}
public boolean hasNext() {
if (rot) return true;
return iterator.hasNext();
}
public Object next() {
if (iterator.hasNext()) {
return iterator.next();
} else {
// rotation iteration
if (rot) {
iterator = wCache.tailMap(startHash).values().iterator();
return iterator.next();
} else {
return null;
}
}
}
public void remove() {
iterator.remove();
}
}
public void shiftK2W() {
@ -355,8 +401,19 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
}
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
if (urlHashes.length == 0) return 0;
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
synchronized (wCache) {
indexTreeMapContainer c = (indexTreeMapContainer) deleteContainer(wordHash);
if (c != null) {
if (c.removeEntry(wordHash, urlHash, deleteComplete)) return true;
this.addEntries(c, System.currentTimeMillis(), false);
}
}
return false;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
if (urlHashes.size() == 0) return 0;
int count = 0;
synchronized (wCache) {
indexTreeMapContainer c = (indexTreeMapContainer) deleteContainer(wordHash);

@ -43,6 +43,7 @@
package de.anomic.index;
import java.util.Iterator;
import java.util.Set;
public interface indexRI {
@ -55,7 +56,8 @@ public interface indexRI {
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime);
public indexContainer deleteContainer(String wordHash);
public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete);
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete);
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete);
public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase);
public indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase);

@ -28,6 +28,7 @@ package de.anomic.index;
import java.lang.reflect.Method;
import java.util.Iterator;
import java.util.Set;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
@ -40,6 +41,14 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
super(rowdef);
}
public indexContainer topLevelClone() {
indexContainer newContainer = new indexRowSetContainer(this.rowdef);
newContainer.setWordHash(this.wordHash);
newContainer.setOrdering(this.sortOrder, this.sortColumn);
newContainer.add(this, -1);
return newContainer;
}
public void setWordHash(String newWordHash) {
this.wordHash = newWordHash;
}
@ -93,7 +102,12 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
return null;
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
// TODO Auto-generated method stub
return false;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
// TODO Auto-generated method stub
return 0;
}
@ -126,4 +140,9 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain
return c;
}
public Set urlHashes() {
// TODO Auto-generated method stub
return null;
}
}

@ -65,6 +65,12 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen
container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation
}
public indexContainer topLevelClone() {
indexContainer newContainer = new indexTreeMapContainer(this.wordHash, this.ordering, this.order_column);
newContainer.add(this, -1);
return newContainer;
}
public void setWordHash(String newWordHash) {
// this is used to replicate a container for different word indexes during global search
this.wordHash = newWordHash;
@ -158,15 +164,21 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen
return (indexURLEntry) container.remove(urlHash);
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
if (!wordHash.equals(this.wordHash)) return false;
return remove(urlHash) != null;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
if (!wordHash.equals(this.wordHash)) return 0;
int count = 0;
for (int i = 0; i < urlHashes.length; i++) count += (remove(urlHashes[i]) == null) ? 0 : 1;
Iterator i = urlHashes.iterator();
while (i.hasNext()) count += (remove((String) i.next()) == null) ? 0 : 1;
return count;
}
public Iterator entries() {
// returns an iterator of plasmaWordIndexEntry objects
// returns an iterator of indexEntry objects
return container.values().iterator();
}
@ -303,4 +315,8 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen
return conj;
}
public Set urlHashes() {
return container.keySet();
}
}

@ -35,13 +35,13 @@ import de.anomic.plasma.plasmaWordIndex;
public class indexURLEntryNew implements Cloneable, indexEntry {
public static kelondroRow urlEntryRow = new kelondroRow(new kelondroColumn[]{
new kelondroColumn("h", kelondroColumn.celltype_string, kelondroColumn.encoder_none, indexURL.urlHashLength, "urlhash"),
new kelondroColumn("h", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, indexURL.urlHashLength, "urlhash"),
new kelondroColumn("q", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, indexURL.urlQualityLength, "quality"),
new kelondroColumn("a", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 3, "lastModified"),
new kelondroColumn("c", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "hitcount"),
new kelondroColumn("l", kelondroColumn.celltype_string, kelondroColumn.encoder_none, indexURL.urlLanguageLength, "language"),
new kelondroColumn("d", kelondroColumn.celltype_binary, kelondroColumn.encoder_none, 1, "doctype"),
new kelondroColumn("f", kelondroColumn.celltype_binary, kelondroColumn.encoder_none, 1, "localflag"),
new kelondroColumn("l", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, indexURL.urlLanguageLength, "language"),
new kelondroColumn("d", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "doctype"),
new kelondroColumn("f", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "localflag"),
new kelondroColumn("t", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posintext"),
new kelondroColumn("r", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posinphrase"),
new kelondroColumn("o", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posofphrase"),
@ -115,14 +115,8 @@ public class indexURLEntryNew implements Cloneable, indexEntry {
}
public indexURLEntryNew(String external) {
}
/*
public indexURLEntryNew(kelondroRow.Entry entry) {
this.entry = entry;
this.entry = urlEntryRow.newEntry(external);
}
*/
public indexURLEntryNew(byte[] row) {
this.entry = urlEntryRow.newEntry(row);
@ -141,7 +135,7 @@ public class indexURLEntryNew implements Cloneable, indexEntry {
}
public String toPropertyForm() {
return entry.toPropertyForm();
return entry.toPropertyForm(true);
}
public Entry toKelondroEntry() {

@ -249,10 +249,9 @@ public class kelondroAttrSeq {
if (p < 0) return;
String pivot = structure.substring(0, p);
structure = structure.substring(p + 5);
Object[] a = atom(pivot);
if (a == null) return;
pivot_name = (String) a[0];
pivot_len = ((Integer) a[1]).intValue();
kelondroColumn a = new kelondroColumn(pivot);
pivot_name = a.nickname();
pivot_len = a.cellwidth();
// parse property part definition:
p = structure.indexOf(",'|'");
@ -262,7 +261,7 @@ public class kelondroAttrSeq {
String seqs = structure.substring(p + 5);
StringTokenizer st = new StringTokenizer(attr, ",");
while (st.hasMoreTokens()) {
a = atom(st.nextToken());
a = new kelondroColumn(st.nextToken());
if (a == null) break;
l.add(a);
}
@ -271,9 +270,9 @@ public class kelondroAttrSeq {
prop_pos = new int[l.size()];
p = 0;
for (int i = 0; i < l.size(); i++) {
a = (Object[]) l.get(i);
prop_names[i] = (String) a[0];
prop_len[i] = ((Integer) a[1]).intValue();
a = (kelondroColumn) l.get(i);
prop_names[i] = a.nickname();
prop_len[i] = a.cellwidth();
prop_pos[i] = p;
p += prop_len[i];
}
@ -283,7 +282,7 @@ public class kelondroAttrSeq {
l = new ArrayList();
st = new StringTokenizer(seqs, ",");
while (st.hasMoreTokens()) {
a = atom(st.nextToken());
a = new kelondroColumn(st.nextToken());
if (a == null) break;
l.add(a);
}
@ -292,32 +291,14 @@ public class kelondroAttrSeq {
seq_pos = new int[l.size()];
p = 0;
for (int i = 0; i < l.size(); i++) {
a = (Object[]) l.get(i);
seq_names[i] = (String) a[0];
seq_len[i] = ((Integer) a[1]).intValue();
a = (kelondroColumn) l.get(i);
seq_names[i] = a.nickname();
seq_len[i] = a.cellwidth();
seq_pos[i] = p;
p += seq_len[i];
}
}
private Object[] atom(String a) {
if (a.startsWith("<")) {
a = a.substring(1);
} else return null;
if (a.endsWith(">")) {
a = a.substring(0, a.length() - 1);
} else return null;
int p = a.indexOf('-');
if (p < 0) return null;
String atomname = a.substring(0, p);
try {
int x = Integer.parseInt(a.substring(p + 1));
return new Object[]{atomname, new Integer(x)};
} catch (NumberFormatException e) {
return null;
}
}
public String toString() {
StringBuffer sb = new StringBuffer(100);
sb.append('<'); sb.append(pivot_name); sb.append('-'); sb.append(Integer.toString(pivot_len)); sb.append(">,'=',");

@ -34,14 +34,11 @@ public class kelondroColumn {
public static final int celltype_binary = 2;
public static final int celltype_string = 3;
public static final int celltype_cardinal = 4;
public static final int celltype_real = 5;
public static final int encoder_none = 0;
public static final int encoder_b64e = 1;
public static final int encoder_b256 = 2;
public static final int encoder_string = 3;
public static final int encoder_bytes = 4;
public static final int encoder_char = 5;
public static final int encoder_bytes = 3;
private int celltype, cellwidth, encoder;
private String nickname, description;
@ -54,6 +51,135 @@ public class kelondroColumn {
this.description = description;
}
public kelondroColumn(String celldef) {
// define column with column syntax
// example: <UDate-3>
// cut quotes etc.
if (celldef.startsWith("<")) celldef = celldef.substring(1);
if (celldef.endsWith(">")) celldef = celldef.substring(0, celldef.length() - 1);
// parse type definition
int p = celldef.indexOf(' ');
String typename = "";
if (p < 0) {
// no typedef
this.celltype = celltype_undefined;
this.cellwidth = -1;
} else {
typename = celldef.substring(0, p);
celldef = celldef.substring(p + 1).trim();
if (typename.equals("boolean")) {
this.celltype = celltype_boolean;
this.cellwidth = 1;
} else if (typename.equals("byte")) {
this.celltype = celltype_cardinal;
this.cellwidth = 1;
} else if (typename.equals("short")) {
this.celltype = celltype_cardinal;
this.cellwidth = 2;
} else if (typename.equals("int")) {
this.celltype = celltype_cardinal;
this.cellwidth = 4;
} else if (typename.equals("long")) {
this.celltype = celltype_cardinal;
this.cellwidth = 8;
} else if (typename.equals("byte[]")) {
this.celltype = celltype_binary;
this.cellwidth = -1; // yet undefined
} else if (typename.equals("char")) {
this.celltype = celltype_string;
this.cellwidth = 1;
} else if (typename.equals("String")) {
this.celltype = celltype_string;
this.cellwidth = -1; // yet undefined
} else if (typename.equals("Cardinal")) {
this.celltype = celltype_cardinal;
this.cellwidth = -1; // yet undefined
} else {
throw new kelondroException("kelondroColumn - undefined type def '" + typename + "'");
}
}
// parse length
p = celldef.indexOf('-');
if (p < 0) {
// if the cell was defined with a type, we dont need to give an explicit with definition
if (this.cellwidth < 0) throw new kelondroException("kelondroColumn - no cell width definition given");
p = celldef.indexOf(' ');
if (p < 0) {
this.nickname = celldef;
celldef = "";
} else {
this.nickname = celldef.substring(0, p);
celldef = celldef.substring(p + 1);
}
} else {
int q = celldef.indexOf(' ');
if (q < 0) {
this.nickname = celldef.substring(0, p);
try {
this.cellwidth = Integer.parseInt(celldef.substring(p + 1));
} catch (NumberFormatException e) {
throw new kelondroException("kelondroColumn - cellwidth description wrong:" + celldef.substring(p + 1));
}
celldef = "";
} else {
this.nickname = celldef.substring(0, q);
try {
this.cellwidth = Integer.parseInt(celldef.substring(p + 1, q));
} catch (NumberFormatException e) {
throw new kelondroException("kelondroColumn - cellwidth description wrong:" + celldef.substring(p + 1, q));
}
celldef = celldef.substring(q + 1);
}
}
// check length constraints
if (this.cellwidth <= 0) throw new kelondroException("kelondroColumn - no cell width given for " + this.nickname);
if (((typename.equals("boolean")) && (this.cellwidth > 1)) ||
((typename.equals("byte")) && (this.cellwidth > 1)) ||
((typename.equals("short")) && (this.cellwidth > 2)) ||
((typename.equals("int")) && (this.cellwidth > 4)) ||
((typename.equals("long")) && (this.cellwidth > 8)) ||
((typename.equals("char")) && (this.cellwidth > 1))
) throw new kelondroException("kelondroColumn - cell width " + this.cellwidth + " too wide for type " + typename);
if (((typename.equals("short")) && (this.cellwidth <= 1)) ||
((typename.equals("int")) && (this.cellwidth <= 2)) ||
((typename.equals("long")) && (this.cellwidth <= 4))
) throw new kelondroException("kelondroColumn - cell width " + this.cellwidth + " not appropriate for type " + typename);
// parse/check encoder type
if ((celldef.length() > 0) && (celldef.charAt(0) == '{')) {
p = celldef.indexOf('}');
String expf = celldef.substring(1, p);
celldef = celldef.substring(p + 1).trim();
if (expf.equals("b64e")) this.encoder = encoder_b64e;
else if (expf.equals("b256")) this.encoder = encoder_b64e;
else if (expf.equals("bytes")) this.encoder = encoder_b64e;
else {
if (this.celltype == celltype_undefined) this.encoder = encoder_bytes;
else if (this.celltype == celltype_boolean) this.encoder = encoder_bytes;
else if (this.celltype == celltype_binary) this.encoder = encoder_bytes;
else if (this.celltype == celltype_string) this.encoder = encoder_bytes;
else if (this.celltype == celltype_cardinal) throw new kelondroException("kelondroColumn - encoder missing for cell " + this.nickname);
}
} else {
if (this.celltype == celltype_cardinal) throw new kelondroException("kelondroColumn - encoder missing for cell " + this.nickname);
this.encoder = encoder_bytes;
}
// parse/check description
if ((celldef.length() > 0) && (celldef.charAt(0) == '"')) {
p = celldef.indexOf('"', 1);
this.description = celldef.substring(1, p);
celldef = celldef.substring(p + 1).trim();
} else {
this.description = this.nickname;
}
}
public int celltype() {
return this.celltype;
}

@ -50,6 +50,10 @@ public class kelondroException extends java.lang.RuntimeException {
super("unspecific-error");
}
public kelondroException(String message) {
super(message);
}
public kelondroException(String database, String message) {
super(message + " in db '" + database + "'");
}

@ -28,12 +28,17 @@
package de.anomic.kelondro;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
public class kelondroRow {
private kelondroColumn[] row;
protected int[] colstart;
private int objectsize;
private Map nickref = null;
public kelondroRow(kelondroColumn[] row) {
this.row = row;
@ -57,6 +62,56 @@ public class kelondroRow {
}
}
public kelondroRow(String structure) {
// define row with row syntax
// example:
//# Structure=<pivot-12>,'=',<UDate-3>,<VDate-3>,<LCount-2>,<GCount-2>,<ICount-2>,<DCount-2>,<TLength-3>,<WACount-3>,<WUCount-3>,<Flags-1>
// parse a structure string
kelondroColumn pivot_col = null;
// parse pivot definition:
int p = structure.indexOf(",'='");
if (p >= 0) {
String pivot = structure.substring(0, p);
structure = structure.substring(p + 5);
pivot_col = new kelondroColumn(pivot);
}
// parse property part definition:
p = structure.indexOf(",'|'");
if (p < 0) p = structure.length();
ArrayList l = new ArrayList();
String attr = structure.substring(0, p);
StringTokenizer st = new StringTokenizer(attr, ",");
while (st.hasMoreTokens()) {
l.add(new kelondroColumn(st.nextToken()));
}
// define columns
int piv_offset = (pivot_col == null) ? 0 : 1;
this.row = new kelondroColumn[l.size() + piv_offset];
this.colstart = new int[row.length];
this.objectsize = 0;
if (pivot_col != null) {
this.colstart[0] = 0;
this.row[0] = pivot_col;
this.objectsize += this.row[0].cellwidth();
}
for (int i = 0; i < l.size(); i++) {
this.colstart[i + piv_offset] = this.objectsize;
this.row[i + piv_offset] = (kelondroColumn) l.get(i);
this.objectsize += this.row[i + piv_offset].cellwidth();
}
}
private void genNickRef() {
if (nickref != null) return;
nickref = new HashMap(row.length);
for (int i = 0; i < row.length; i++) nickref.put(row[i].nickname(), new Object[]{row[i], new Integer(colstart[i])});
}
public int columns() {
return this.row.length;
}
@ -94,6 +149,11 @@ public class kelondroRow {
return new Entry(cells);
}
public Entry newEntry(String external) {
if (external == null) return null;
return new Entry(external);
}
public class Entry {
private byte[] rowinstance;
@ -128,6 +188,25 @@ public class kelondroRow {
}
}
public Entry(String external) {
// parse external form
if (external.charAt(0) == '{') external = external.substring(1, external.length() - 1);
String[] elts = external.split(",");
if (nickref == null) genNickRef();
String nick;
int p;
Object[] f;
rowinstance = new byte[objectsize];
for (int i = 0; i < elts.length; i++) {
p = elts[i].indexOf('=');
if (p > 0) {
nick = elts[i].substring(0, p).trim();
f = (Object[]) nickref.get(nick);
System.arraycopy(elts[i].substring(p + 1).trim().getBytes(), 0, rowinstance, ((Integer) f[1]).intValue(), ((kelondroColumn) f[0]).cellwidth());
}
}
}
public byte[] bytes() {
return rowinstance;
}
@ -183,13 +262,8 @@ public class kelondroRow {
case kelondroColumn.encoder_b256:
setColLongB256(column, cell);
break;
case kelondroColumn.encoder_string:
setCol(column, Long.toString(cell).getBytes());
break;
case kelondroColumn.encoder_bytes:
throw new kelondroException("ROW", "setColLong of celltype bytes not applicable");
case kelondroColumn.encoder_char:
throw new kelondroException("ROW", "setColLong of celltype char not applicable");
}
}
@ -229,12 +303,8 @@ public class kelondroRow {
return getColLongB64E(column);
case kelondroColumn.encoder_b256:
return getColLongB256(column);
case kelondroColumn.encoder_string:
return Long.parseLong(getColString(column, null));
case kelondroColumn.encoder_bytes:
throw new kelondroException("ROW", "getColLong of celltype bytes not applicable");
case kelondroColumn.encoder_char:
throw new kelondroException("ROW", "getColLong of celltype char not applicable");
}
throw new kelondroException("ROW", "getColLong did not find appropriate encoding");
}
@ -259,6 +329,7 @@ public class kelondroRow {
return c;
}
/*
public byte[] toEncodedBytesForm() {
byte[] b = new byte[objectsize];
int encoder, cellwidth;
@ -287,16 +358,15 @@ public class kelondroRow {
continue;
}
throw new kelondroException("ROW", "toEncodedForm of celltype cardinal has no encoder (" + encoder + ")");
case kelondroColumn.celltype_real:
throw new kelondroException("ROW", "toEncodedForm of celltype real not yet implemented");
}
}
return b;
}
*/
public String toPropertyForm() {
public String toPropertyForm(boolean includeBraces) {
StringBuffer sb = new StringBuffer();
sb.append("{");
if (includeBraces) sb.append("{");
int encoder, cellwidth;
for (int i = 0; i < row.length; i++) {
encoder = row[i].encoder();
@ -328,12 +398,10 @@ public class kelondroRow {
continue;
}
throw new kelondroException("ROW", "toEncodedForm of celltype cardinal has no encoder (" + encoder + ")");
case kelondroColumn.celltype_real:
throw new kelondroException("ROW", "toEncodedForm of celltype real not yet implemented");
}
}
if (sb.charAt(sb.length() - 1) == ',') sb.deleteCharAt(sb.length() - 1); // remove ',' at end
sb.append("}");
if (includeBraces) sb.append("}");
return sb.toString();
}

@ -44,6 +44,7 @@ package de.anomic.plasma;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import de.anomic.index.indexContainer;
@ -211,17 +212,19 @@ public class plasmaDHTChunk {
try {
lurl = lurls.getEntry(indexEntry.urlHash(), indexEntry);
if ((lurl == null) || (lurl.url() == null)) {
yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + indexEntry.urlHash() + "' for word hash " + container.getWordHash());
notBoundCounter++;
urlIter.remove();
wordIndex.removeEntries(container.getWordHash(), new String[] { indexEntry.urlHash() }, true);
wordIndex.removeEntry(container.getWordHash(), indexEntry.urlHash(), true);
} else {
urlCache.put(indexEntry.urlHash(), lurl);
yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + indexEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash());
refcount++;
}
} catch (IOException e) {
notBoundCounter++;
urlIter.remove();
wordIndex.removeEntries(container.getWordHash(), new String[] { indexEntry.urlHash() }, true);
wordIndex.removeEntry(container.getWordHash(), indexEntry.urlHash(), true);
}
}
@ -270,16 +273,17 @@ public class plasmaDHTChunk {
public int deleteTransferIndexes() {
Iterator urlIter;
indexURLEntry indexEntry;
String[] urlHashes;
HashSet urlHashes;
int count = 0;
for (int i = 0; i < this.indexContainers.length; i++) {
// delete entries separately
int c = 0;
urlHashes = new String[this.indexContainers[i].size()];
int c = this.indexContainers[i].size();
urlHashes = new HashSet(this.indexContainers[i].size());
urlIter = this.indexContainers[i].entries();
while (urlIter.hasNext()) {
indexEntry = (indexURLEntry) urlIter.next();
urlHashes[c++] = indexEntry.urlHash();
urlHashes.add(indexEntry.urlHash());
}
count += wordIndex.removeEntries(this.indexContainers[i].getWordHash(), urlHashes, true);
log.logFine("Deleted partial index (" + c + " URLs) for word " + this.indexContainers[i].getWordHash() + "; " + this.wordIndex.indexSize(indexContainers[i].getWordHash()) + " entries left");

@ -169,7 +169,7 @@ public class plasmaDHTFlush extends Thread {
// selecting 500 words to transfer
this.status = "Running: Selecting chunk " + iteration;
newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.sb.urlPool.loadedURL, this.chunkSize/3, this.chunkSize, this.startPointHash);
newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.sb.urlPool.loadedURL, this.chunkSize/3*2, this.chunkSize, this.startPointHash);
/* If we havn't selected a word chunk this could be because of
* a) no words are left in the index

@ -2007,12 +2007,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// returns number of deletions
Iterator iter = words.iterator();
String word;
final String[] urlEntries = new String[] {urlhash};
int count = 0;
while (iter.hasNext()) {
word = (String) iter.next();
// delete the URL reference in this word index
count += wordIndex.removeEntries(indexEntryAttribute.word2hash(word), urlEntries, true);
if (wordIndex.removeEntry(indexEntryAttribute.word2hash(word), urlhash, true)) count++;
}
return count;
}
@ -2022,13 +2021,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// returns number of deletions
Map.Entry entry;
String word;
final String[] urlEntries = new String[] {urlhash};
int count = 0;
while (wordStatPropIterator.hasNext()) {
entry = (Map.Entry) wordStatPropIterator.next();
word = (String) entry.getKey();
// delete the URL reference in this word index
count += wordIndex.removeEntries(indexEntryAttribute.word2hash(word), urlEntries, true);
if (wordIndex.removeEntry(indexEntryAttribute.word2hash(word), urlhash, true)) count++;
}
return count;
}

@ -290,25 +290,28 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis();
indexTreeMapContainer container = new indexTreeMapContainer(wordHash);
// get from cache
indexContainer container = ramCache.getContainer(wordHash, true, -1);
// get from assortments
if (container == null) {
container = assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime);
} else {
// We must not use the container from cache to store everything we find,
// as that container remains linked to in the cache and might be changed later
// while the returned container is still in use.
// e.g. indexTransfer might keep this container for minutes while
// several new pages could be added to the index, possibly with the same words that have
// been selected for transfer
container.add(ramCache.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2);
// get from assortments
container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2);
// create a clone from the container
container = container.topLevelClone();
// add containers from assortment cluster
container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime), -1);
}
// get from backend
if (maxTime > 0) {
maxTime = maxTime - (System.currentTimeMillis() - start);
if (maxTime < 0) maxTime = 100;
}
container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2);
container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime), -1);
return container;
}
@ -374,25 +377,23 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
return c;
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
int removed;
boolean addedEntryToRamCache = false;
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
synchronized (this) {
removed = ramCache.removeEntries(wordHash, urlHashes, deleteComplete);
if (removed == urlHashes.length) return removed;
indexContainer container = assortmentCluster.deleteContainer(wordHash, -1);
if (container != null) {
removed += container.removeEntries(wordHash, urlHashes, deleteComplete);
if (container.size() != 0) {
ramCache.addEntries(container, System.currentTimeMillis(), false);
addedEntryToRamCache = true;
if (ramCache.removeEntry(wordHash, urlHash, deleteComplete)) return true;
if (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete)) return true;
return backend.removeEntry(wordHash, urlHash, deleteComplete);
}
}
if (removed != urlHashes.length) {
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
int removed = 0;;
synchronized (this) {
removed += ramCache.removeEntries(wordHash, urlHashes, deleteComplete);
if (removed == urlHashes.size()) return removed;
removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete);
if (removed == urlHashes.size()) return removed;
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
}
}
if (addedEntryToRamCache) flushControl();
return removed;
}
@ -604,9 +605,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
}
}
if (urlHashs.size() > 0) {
String[] urlArray;
urlArray = (String[]) urlHashs.toArray(new String[0]);
int removed = removeEntries(container.getWordHash(), urlArray, true);
int removed = removeEntries(container.getWordHash(), urlHashs, true);
serverLog.logFine("INDEXCLEANER", container.getWordHash() + ": " + removed + " of " + container.size() + " URL-entries deleted");
lastWordHash = container.getWordHash();
lastDeletionCounter = urlHashs.size();

@ -50,9 +50,11 @@ import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder;
import de.anomic.index.indexEntry;
import de.anomic.index.indexRI;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexRowSetContainer;
@ -243,6 +245,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
return record;
}
/*
public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete) {
indexContainer c = deleteContainer(wordHash, -1);
int b = c.size();
@ -252,6 +255,47 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
}
return b - c.size();
}
*/
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
indexContainer buffer, record = new indexTreeMapContainer(wordHash);
boolean found = false;
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
if ((buffer != null) && (buffer.remove(urlHash) != null)) found = true;
record.add(buffer, -1);
if (found) break;
}
// put back remaining
if (record.size() != 0) {
addEntries(record, record.updated(), false);
}
return found;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
indexContainer buffer, record = new indexTreeMapContainer(wordHash);
int initialSize = urlHashes.size();
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
if (buffer != null) {
// sort out url hashes that shall be deleted
Iterator bi = buffer.entries();
indexEntry entry;
while (bi.hasNext()) {
entry = (indexEntry) bi.next();
if (urlHashes.remove(entry.urlHash())) bi.remove();
}
record.add(buffer, -1);
}
if (urlHashes.size() == 0) break;
}
// put back remaining
if (record.size() != 0) {
addEntries(record, record.updated(), false);
}
return initialSize - urlHashes.size();
}
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
// collect all records from all the assortments and return them

@ -47,6 +47,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.index.indexContainer;
@ -256,14 +257,34 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
return new indexTreeMapContainer(wordHash);
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
// removes all given url hashes from a single word index. Returns number of deletions.
plasmaWordIndexFile pi = null;
boolean removed = false;
try {
pi = getEntity(wordHash, true, -1);
if (pi.removeEntry(urlHash, deleteComplete)) removed = true;
int size = pi.size();
pi.close(); pi = null;
// check if we can remove the index completely
if ((deleteComplete) && (size == 0)) deleteContainer(wordHash);
return removed;
} catch (IOException e) {
log.logSevere("plasmaWordIndexClassic.removeEntries: " + e.getMessage());
return false;
} finally {
if (pi != null) try{pi.close();}catch(Exception e){}
}
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
// removes all given url hashes from a single word index. Returns number of deletions.
plasmaWordIndexFile pi = null;
int count = 0;
try {
pi = getEntity(wordHash, true, -1);
for (int i = 0; i < urlHashes.length; i++)
if (pi.removeEntry(urlHashes[i], deleteComplete)) count++;
Iterator i = urlHashes.iterator();
while (i.hasNext()) if (pi.removeEntry((String) i.next(), deleteComplete)) count++;
int size = pi.size();
pi.close(); pi = null;
// check if we can remove the index completely

@ -50,7 +50,6 @@ import java.util.Iterator;
import de.anomic.yacy.yacyCore;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroColumn;
import de.anomic.kelondro.kelondroTree;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRow;
@ -64,12 +63,6 @@ public class yacyNewsDB {
private long preloadTime;
private kelondroTree news;
public static final int attributesMaxLength = yacyNewsRecord.maxNewsRecordLength
- yacyNewsRecord.idLength()
- yacyNewsRecord.categoryStringLength
- yacyCore.universalDateShortPattern.length()
- 2;
public yacyNewsDB(File path, int bufferkb, long preloadTime) {
this.path = path;
this.bufferkb = bufferkb;
@ -84,16 +77,8 @@ public class yacyNewsDB {
}
}
public static final kelondroRow rowdef = new kelondroRow(new kelondroColumn[]{
new kelondroColumn("newsid", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyNewsRecord.idLength(), "id = created + originator"),
new kelondroColumn("category", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyNewsRecord.categoryStringLength, ""),
new kelondroColumn("received", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyCore.universalDateShortPattern.length(), ""),
new kelondroColumn("", kelondroColumn.celltype_string, kelondroColumn.encoder_string, 2, ""),
new kelondroColumn("", kelondroColumn.celltype_string, kelondroColumn.encoder_string, attributesMaxLength, ""),
});
private static kelondroTree createDB(File path, int bufferkb, long preloadTime) {
return new kelondroTree(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
return new kelondroTree(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, yacyNewsRecord.rowdef, true);
}
private void resetDB() {
@ -189,7 +174,7 @@ public class yacyNewsDB {
private kelondroRow.Entry r2b(yacyNewsRecord r) {
if (r == null) return null;
String attributes = r.attributes().toString();
if (attributes.length() > attributesMaxLength) throw new IllegalArgumentException("attribute length=" + attributes.length() + " exceeds maximum size=" + attributesMaxLength);
if (attributes.length() > yacyNewsRecord.attributesMaxLength) throw new IllegalArgumentException("attribute length=" + attributes.length() + " exceeds maximum size=" + yacyNewsRecord.attributesMaxLength);
kelondroRow.Entry entry = news.row().newEntry();
entry.setCol(0, r.id().getBytes());
entry.setCol(1, r.category().getBytes());

@ -142,7 +142,7 @@ public class yacyNewsPool {
// check consistency
if (record.id() == null) return;
if (record.id().length() != yacyNewsRecord.idLength()) return;
if (record.id().length() != yacyNewsRecord.idLength) return;
if (record.category() == null) return;
if (!(categories.contains(record.category()))) return;
if (record.created().getTime() == 0) return;

@ -77,8 +77,8 @@ public class yacyNewsQueue {
}
public static final kelondroRow rowdef = new kelondroRow(new kelondroColumn[]{
new kelondroColumn("newsid", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyNewsRecord.idLength(), "id = created + originator"),
new kelondroColumn("last touched", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyCore.universalDateShortPattern.length(), "")
new kelondroColumn("newsid", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacyNewsRecord.idLength, "id = created + originator"),
new kelondroColumn("last touched", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacyCore.universalDateShortPattern.length(), "")
});
private static kelondroStack createStack(File path) {

@ -44,6 +44,7 @@
package de.anomic.yacy;
import de.anomic.kelondro.kelondroRow;
import de.anomic.server.serverCodings;
import de.anomic.server.serverDate;
@ -54,6 +55,7 @@ public class yacyNewsRecord {
public static final int maxNewsRecordLength = 512;
public static final int categoryStringLength = 8;
public static final int idLength = yacyCore.universalDateShortPattern.length() + yacySeedDB.commonHashLength;
private String originator; // hash of originating peer
private Date created; // Date when news was created by originator
@ -62,6 +64,20 @@ public class yacyNewsRecord {
private int distributed; // counter that counts number of distributions of this news record
private Map attributes; // elemets of the news for a special category
public static final int attributesMaxLength = maxNewsRecordLength
- idLength
- categoryStringLength
- yacyCore.universalDateShortPattern.length()
- 2;
public static final kelondroRow rowdef = new kelondroRow(
"String idx-" + idLength + " \"id = created + originator\"," +
"String cat-" + categoryStringLength + "," +
"String rec-" + yacyCore.universalDateShortPattern.length() + "," +
"short dis-2 {b64e}," +
"String att-" + attributesMaxLength
);
public yacyNewsRecord(String newsString) {
this.attributes = serverCodings.string2map(newsString);
this.received = (attributes.containsKey("rec")) ? yacyCore.parseUniversalDate((String) attributes.get("rec"), serverDate.UTCDiffString()) : new Date();
@ -118,10 +134,6 @@ public class yacyNewsRecord {
return yacyCore.universalDateShortString(created) + originator;
}
public static int idLength() {
return yacyCore.universalDateShortPattern.length() + yacySeedDB.commonHashLength;
}
public String originator() {
return originator;
}

Loading…
Cancel
Save