diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 257f535d3..714166efb 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -53,6 +53,7 @@ import java.util.Enumeration; import java.util.HashSet; import java.util.HashMap; import java.util.Iterator; +import java.util.Set; import java.util.TreeMap; import de.anomic.htmlFilter.htmlFilterContentScraper; @@ -185,7 +186,9 @@ public class IndexControl_p { switchboard.urlPool.loadedURL.remove(urlx[i]); } } - switchboard.wordIndex.removeEntries(keyhash, urlx, true); + Set urlHashes = new HashSet(); + for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]); + switchboard.wordIndex.removeEntries(keyhash, urlHashes, true); // this shall lead to a presentation of the list; so handle that the remaining program // thinks that it was called for a list presentation post.remove("keyhashdelete"); diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java index f06fbf9e6..0bfb7944c 100644 --- a/htroot/htdocsdefault/dir.java +++ b/htroot/htdocsdefault/dir.java @@ -487,7 +487,7 @@ public class dir { Map.Entry entry; while (words.hasNext()) { entry = (Map.Entry) words.next(); - switchboard.wordIndex.removeEntries(indexEntryAttribute.word2hash((String) entry.getKey()), new String[] {urlhash}, true); + switchboard.wordIndex.removeEntry(indexEntryAttribute.word2hash((String) entry.getKey()), urlhash, true); } switchboard.urlPool.loadedURL.remove(urlhash); } catch (Exception e) { diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java index 7470e8b35..2bc1e671a 100644 --- a/source/de/anomic/index/indexCollectionRI.java +++ b/source/de/anomic/index/indexCollectionRI.java @@ -29,6 +29,7 @@ package de.anomic.index; import java.io.File; import java.io.IOException; import java.util.Iterator; +import java.util.Set; import de.anomic.kelondro.kelondroCollectionIndex; import de.anomic.kelondro.kelondroNaturalOrder; @@ -106,7 +107,12 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI { return idx; } - public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete) { + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + // TODO Auto-generated method stub + return false; + } + + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { // TODO Auto-generated method stub return 0; } @@ -120,7 +126,5 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI { // TODO Auto-generated method stub } - - - + } diff --git a/source/de/anomic/index/indexContainer.java b/source/de/anomic/index/indexContainer.java index a605c041e..7ff4dd53d 100644 --- a/source/de/anomic/index/indexContainer.java +++ b/source/de/anomic/index/indexContainer.java @@ -29,11 +29,14 @@ package de.anomic.index; import java.util.Iterator; +import java.util.Set; import de.anomic.kelondro.kelondroOrder; public interface indexContainer { + public indexContainer topLevelClone(); + public void clear(); public int size(); public long updated(); @@ -50,17 +53,17 @@ public interface indexContainer { public int add(indexEntry[] entries, long updateTime); public int add(indexContainer c, long maxTime); + public Set urlHashes(); public boolean contains(String urlHash) ; public indexEntry get(String urlHash); public indexEntry[] getEntryArray() ; public indexEntry remove(String urlHash); - public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete); - - public Iterator entries(); + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete); + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete); + public Iterator entries(); // returns an iterator of indexEntry objects public String toString(); - public int hashCode(); //public void joinConstructive(indexContainer c, long time, int maxDistance); diff --git a/source/de/anomic/index/indexRAMCacheRI.java b/source/de/anomic/index/indexRAMCacheRI.java index 0635f7af1..04540aa5a 100644 --- a/source/de/anomic/index/indexRAMCacheRI.java +++ b/source/de/anomic/index/indexRAMCacheRI.java @@ -30,6 +30,7 @@ import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import de.anomic.kelondro.kelondroFixedWidthArray; @@ -274,10 +275,55 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { } public Iterator wordContainers(String startWordHash, boolean rot) { - if (rot) throw new UnsupportedOperationException("plasmaWordIndexCache cannot rotate wordContainers"); - return wCache.tailMap(startWordHash).values().iterator(); // The collection's iterator will return the values in the order that their corresponding keys appear in the tree. + // we return an iterator object that creates top-level-clones of the indexContainers + // in the cache, so that manipulations of the iterated objects do not change + // objects in the cache. + return new wordContainerIterator(startWordHash, rot); } + public class wordContainerIterator implements Iterator { + + // this class exists, because the wCache cannot be iterated with rotation + // and because every indeContainer Object that is iterated must be returned as top-level-clone + // so this class simulates wCache.tailMap(startWordHash).values().iterator() + // plus the mentioned features + + private boolean rot; + private Iterator iterator; + private String startHash; + + public wordContainerIterator(String startWordHash, boolean rot) { + this.rot = rot; + this.startHash = startWordHash; + this.iterator = wCache.tailMap(startWordHash).values().iterator(); + // The collection's iterator will return the values in the order that their corresponding keys appear in the tree. + } + + public boolean hasNext() { + if (rot) return true; + return iterator.hasNext(); + } + + public Object next() { + if (iterator.hasNext()) { + return iterator.next(); + } else { + // rotation iteration + if (rot) { + iterator = wCache.tailMap(startHash).values().iterator(); + return iterator.next(); + } else { + return null; + } + } + } + + public void remove() { + iterator.remove(); + } + + } + public void shiftK2W() { // find entries in kCache that are too old for that place and shift them to the wCache long time; @@ -355,8 +401,19 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { } } - public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) { - if (urlHashes.length == 0) return 0; + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + synchronized (wCache) { + indexTreeMapContainer c = (indexTreeMapContainer) deleteContainer(wordHash); + if (c != null) { + if (c.removeEntry(wordHash, urlHash, deleteComplete)) return true; + this.addEntries(c, System.currentTimeMillis(), false); + } + } + return false; + } + + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + if (urlHashes.size() == 0) return 0; int count = 0; synchronized (wCache) { indexTreeMapContainer c = (indexTreeMapContainer) deleteContainer(wordHash); diff --git a/source/de/anomic/index/indexRI.java b/source/de/anomic/index/indexRI.java index 45af36302..3738fd11d 100644 --- a/source/de/anomic/index/indexRI.java +++ b/source/de/anomic/index/indexRI.java @@ -43,6 +43,7 @@ package de.anomic.index; import java.util.Iterator; +import java.util.Set; public interface indexRI { @@ -55,7 +56,8 @@ public interface indexRI { public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime); public indexContainer deleteContainer(String wordHash); - public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete); + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete); + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete); public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase); public indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase); diff --git a/source/de/anomic/index/indexRowSetContainer.java b/source/de/anomic/index/indexRowSetContainer.java index 70e96f1e9..af180822e 100644 --- a/source/de/anomic/index/indexRowSetContainer.java +++ b/source/de/anomic/index/indexRowSetContainer.java @@ -28,6 +28,7 @@ package de.anomic.index; import java.lang.reflect.Method; import java.util.Iterator; +import java.util.Set; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; @@ -40,6 +41,14 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain super(rowdef); } + public indexContainer topLevelClone() { + indexContainer newContainer = new indexRowSetContainer(this.rowdef); + newContainer.setWordHash(this.wordHash); + newContainer.setOrdering(this.sortOrder, this.sortColumn); + newContainer.add(this, -1); + return newContainer; + } + public void setWordHash(String newWordHash) { this.wordHash = newWordHash; } @@ -93,7 +102,12 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain return null; } - public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) { + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + // TODO Auto-generated method stub + return false; + } + + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { // TODO Auto-generated method stub return 0; } @@ -125,5 +139,10 @@ public class indexRowSetContainer extends kelondroRowSet implements indexContain c.add((indexContainer) b, -1); return c; } + + public Set urlHashes() { + // TODO Auto-generated method stub + return null; + } } diff --git a/source/de/anomic/index/indexTreeMapContainer.java b/source/de/anomic/index/indexTreeMapContainer.java index deb0f8355..d52adb046 100644 --- a/source/de/anomic/index/indexTreeMapContainer.java +++ b/source/de/anomic/index/indexTreeMapContainer.java @@ -65,6 +65,12 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation } + public indexContainer topLevelClone() { + indexContainer newContainer = new indexTreeMapContainer(this.wordHash, this.ordering, this.order_column); + newContainer.add(this, -1); + return newContainer; + } + public void setWordHash(String newWordHash) { // this is used to replicate a container for different word indexes during global search this.wordHash = newWordHash; @@ -158,15 +164,21 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen return (indexURLEntry) container.remove(urlHash); } - public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) { + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + if (!wordHash.equals(this.wordHash)) return false; + return remove(urlHash) != null; + } + + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { if (!wordHash.equals(this.wordHash)) return 0; int count = 0; - for (int i = 0; i < urlHashes.length; i++) count += (remove(urlHashes[i]) == null) ? 0 : 1; + Iterator i = urlHashes.iterator(); + while (i.hasNext()) count += (remove((String) i.next()) == null) ? 0 : 1; return count; } public Iterator entries() { - // returns an iterator of plasmaWordIndexEntry objects + // returns an iterator of indexEntry objects return container.values().iterator(); } @@ -303,4 +315,8 @@ public final class indexTreeMapContainer extends indexAbstractContainer implemen return conj; } + public Set urlHashes() { + return container.keySet(); + } + } diff --git a/source/de/anomic/index/indexURLEntryNew.java b/source/de/anomic/index/indexURLEntryNew.java index 30a4a0d0a..1e43e5dbf 100644 --- a/source/de/anomic/index/indexURLEntryNew.java +++ b/source/de/anomic/index/indexURLEntryNew.java @@ -35,13 +35,13 @@ import de.anomic.plasma.plasmaWordIndex; public class indexURLEntryNew implements Cloneable, indexEntry { public static kelondroRow urlEntryRow = new kelondroRow(new kelondroColumn[]{ - new kelondroColumn("h", kelondroColumn.celltype_string, kelondroColumn.encoder_none, indexURL.urlHashLength, "urlhash"), + new kelondroColumn("h", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, indexURL.urlHashLength, "urlhash"), new kelondroColumn("q", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, indexURL.urlQualityLength, "quality"), new kelondroColumn("a", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 3, "lastModified"), new kelondroColumn("c", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "hitcount"), - new kelondroColumn("l", kelondroColumn.celltype_string, kelondroColumn.encoder_none, indexURL.urlLanguageLength, "language"), - new kelondroColumn("d", kelondroColumn.celltype_binary, kelondroColumn.encoder_none, 1, "doctype"), - new kelondroColumn("f", kelondroColumn.celltype_binary, kelondroColumn.encoder_none, 1, "localflag"), + new kelondroColumn("l", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, indexURL.urlLanguageLength, "language"), + new kelondroColumn("d", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "doctype"), + new kelondroColumn("f", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "localflag"), new kelondroColumn("t", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posintext"), new kelondroColumn("r", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posinphrase"), new kelondroColumn("o", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b64e, 2, "posofphrase"), @@ -115,15 +115,9 @@ public class indexURLEntryNew implements Cloneable, indexEntry { } public indexURLEntryNew(String external) { - + this.entry = urlEntryRow.newEntry(external); } - /* - public indexURLEntryNew(kelondroRow.Entry entry) { - this.entry = entry; - } - */ - public indexURLEntryNew(byte[] row) { this.entry = urlEntryRow.newEntry(row); } @@ -141,7 +135,7 @@ public class indexURLEntryNew implements Cloneable, indexEntry { } public String toPropertyForm() { - return entry.toPropertyForm(); + return entry.toPropertyForm(true); } public Entry toKelondroEntry() { diff --git a/source/de/anomic/kelondro/kelondroAttrSeq.java b/source/de/anomic/kelondro/kelondroAttrSeq.java index 3225ba172..8f8aba241 100644 --- a/source/de/anomic/kelondro/kelondroAttrSeq.java +++ b/source/de/anomic/kelondro/kelondroAttrSeq.java @@ -249,10 +249,9 @@ public class kelondroAttrSeq { if (p < 0) return; String pivot = structure.substring(0, p); structure = structure.substring(p + 5); - Object[] a = atom(pivot); - if (a == null) return; - pivot_name = (String) a[0]; - pivot_len = ((Integer) a[1]).intValue(); + kelondroColumn a = new kelondroColumn(pivot); + pivot_name = a.nickname(); + pivot_len = a.cellwidth(); // parse property part definition: p = structure.indexOf(",'|'"); @@ -262,7 +261,7 @@ public class kelondroAttrSeq { String seqs = structure.substring(p + 5); StringTokenizer st = new StringTokenizer(attr, ","); while (st.hasMoreTokens()) { - a = atom(st.nextToken()); + a = new kelondroColumn(st.nextToken()); if (a == null) break; l.add(a); } @@ -271,9 +270,9 @@ public class kelondroAttrSeq { prop_pos = new int[l.size()]; p = 0; for (int i = 0; i < l.size(); i++) { - a = (Object[]) l.get(i); - prop_names[i] = (String) a[0]; - prop_len[i] = ((Integer) a[1]).intValue(); + a = (kelondroColumn) l.get(i); + prop_names[i] = a.nickname(); + prop_len[i] = a.cellwidth(); prop_pos[i] = p; p += prop_len[i]; } @@ -283,7 +282,7 @@ public class kelondroAttrSeq { l = new ArrayList(); st = new StringTokenizer(seqs, ","); while (st.hasMoreTokens()) { - a = atom(st.nextToken()); + a = new kelondroColumn(st.nextToken()); if (a == null) break; l.add(a); } @@ -292,32 +291,14 @@ public class kelondroAttrSeq { seq_pos = new int[l.size()]; p = 0; for (int i = 0; i < l.size(); i++) { - a = (Object[]) l.get(i); - seq_names[i] = (String) a[0]; - seq_len[i] = ((Integer) a[1]).intValue(); + a = (kelondroColumn) l.get(i); + seq_names[i] = a.nickname(); + seq_len[i] = a.cellwidth(); seq_pos[i] = p; p += seq_len[i]; } } - - private Object[] atom(String a) { - if (a.startsWith("<")) { - a = a.substring(1); - } else return null; - if (a.endsWith(">")) { - a = a.substring(0, a.length() - 1); - } else return null; - int p = a.indexOf('-'); - if (p < 0) return null; - String atomname = a.substring(0, p); - try { - int x = Integer.parseInt(a.substring(p + 1)); - return new Object[]{atomname, new Integer(x)}; - } catch (NumberFormatException e) { - return null; - } - } - + public String toString() { StringBuffer sb = new StringBuffer(100); sb.append('<'); sb.append(pivot_name); sb.append('-'); sb.append(Integer.toString(pivot_len)); sb.append(">,'=',"); diff --git a/source/de/anomic/kelondro/kelondroColumn.java b/source/de/anomic/kelondro/kelondroColumn.java index a89067767..a0a18f0bf 100644 --- a/source/de/anomic/kelondro/kelondroColumn.java +++ b/source/de/anomic/kelondro/kelondroColumn.java @@ -34,14 +34,11 @@ public class kelondroColumn { public static final int celltype_binary = 2; public static final int celltype_string = 3; public static final int celltype_cardinal = 4; - public static final int celltype_real = 5; public static final int encoder_none = 0; public static final int encoder_b64e = 1; public static final int encoder_b256 = 2; - public static final int encoder_string = 3; - public static final int encoder_bytes = 4; - public static final int encoder_char = 5; + public static final int encoder_bytes = 3; private int celltype, cellwidth, encoder; private String nickname, description; @@ -54,6 +51,135 @@ public class kelondroColumn { this.description = description; } + public kelondroColumn(String celldef) { + // define column with column syntax + // example: + + // cut quotes etc. + if (celldef.startsWith("<")) celldef = celldef.substring(1); + if (celldef.endsWith(">")) celldef = celldef.substring(0, celldef.length() - 1); + + // parse type definition + int p = celldef.indexOf(' '); + String typename = ""; + if (p < 0) { + // no typedef + this.celltype = celltype_undefined; + this.cellwidth = -1; + } else { + typename = celldef.substring(0, p); + celldef = celldef.substring(p + 1).trim(); + + if (typename.equals("boolean")) { + this.celltype = celltype_boolean; + this.cellwidth = 1; + } else if (typename.equals("byte")) { + this.celltype = celltype_cardinal; + this.cellwidth = 1; + } else if (typename.equals("short")) { + this.celltype = celltype_cardinal; + this.cellwidth = 2; + } else if (typename.equals("int")) { + this.celltype = celltype_cardinal; + this.cellwidth = 4; + } else if (typename.equals("long")) { + this.celltype = celltype_cardinal; + this.cellwidth = 8; + } else if (typename.equals("byte[]")) { + this.celltype = celltype_binary; + this.cellwidth = -1; // yet undefined + } else if (typename.equals("char")) { + this.celltype = celltype_string; + this.cellwidth = 1; + } else if (typename.equals("String")) { + this.celltype = celltype_string; + this.cellwidth = -1; // yet undefined + } else if (typename.equals("Cardinal")) { + this.celltype = celltype_cardinal; + this.cellwidth = -1; // yet undefined + } else { + throw new kelondroException("kelondroColumn - undefined type def '" + typename + "'"); + } + } + + // parse length + p = celldef.indexOf('-'); + if (p < 0) { + // if the cell was defined with a type, we dont need to give an explicit with definition + if (this.cellwidth < 0) throw new kelondroException("kelondroColumn - no cell width definition given"); + p = celldef.indexOf(' '); + if (p < 0) { + this.nickname = celldef; + celldef = ""; + } else { + this.nickname = celldef.substring(0, p); + celldef = celldef.substring(p + 1); + } + } else { + int q = celldef.indexOf(' '); + if (q < 0) { + this.nickname = celldef.substring(0, p); + try { + this.cellwidth = Integer.parseInt(celldef.substring(p + 1)); + } catch (NumberFormatException e) { + throw new kelondroException("kelondroColumn - cellwidth description wrong:" + celldef.substring(p + 1)); + } + celldef = ""; + } else { + this.nickname = celldef.substring(0, q); + try { + this.cellwidth = Integer.parseInt(celldef.substring(p + 1, q)); + } catch (NumberFormatException e) { + throw new kelondroException("kelondroColumn - cellwidth description wrong:" + celldef.substring(p + 1, q)); + } + celldef = celldef.substring(q + 1); + } + } + + // check length constraints + if (this.cellwidth <= 0) throw new kelondroException("kelondroColumn - no cell width given for " + this.nickname); + if (((typename.equals("boolean")) && (this.cellwidth > 1)) || + ((typename.equals("byte")) && (this.cellwidth > 1)) || + ((typename.equals("short")) && (this.cellwidth > 2)) || + ((typename.equals("int")) && (this.cellwidth > 4)) || + ((typename.equals("long")) && (this.cellwidth > 8)) || + ((typename.equals("char")) && (this.cellwidth > 1)) + ) throw new kelondroException("kelondroColumn - cell width " + this.cellwidth + " too wide for type " + typename); + if (((typename.equals("short")) && (this.cellwidth <= 1)) || + ((typename.equals("int")) && (this.cellwidth <= 2)) || + ((typename.equals("long")) && (this.cellwidth <= 4)) + ) throw new kelondroException("kelondroColumn - cell width " + this.cellwidth + " not appropriate for type " + typename); + + // parse/check encoder type + if ((celldef.length() > 0) && (celldef.charAt(0) == '{')) { + p = celldef.indexOf('}'); + String expf = celldef.substring(1, p); + celldef = celldef.substring(p + 1).trim(); + if (expf.equals("b64e")) this.encoder = encoder_b64e; + else if (expf.equals("b256")) this.encoder = encoder_b64e; + else if (expf.equals("bytes")) this.encoder = encoder_b64e; + else { + if (this.celltype == celltype_undefined) this.encoder = encoder_bytes; + else if (this.celltype == celltype_boolean) this.encoder = encoder_bytes; + else if (this.celltype == celltype_binary) this.encoder = encoder_bytes; + else if (this.celltype == celltype_string) this.encoder = encoder_bytes; + else if (this.celltype == celltype_cardinal) throw new kelondroException("kelondroColumn - encoder missing for cell " + this.nickname); + } + } else { + if (this.celltype == celltype_cardinal) throw new kelondroException("kelondroColumn - encoder missing for cell " + this.nickname); + this.encoder = encoder_bytes; + } + + // parse/check description + if ((celldef.length() > 0) && (celldef.charAt(0) == '"')) { + p = celldef.indexOf('"', 1); + this.description = celldef.substring(1, p); + celldef = celldef.substring(p + 1).trim(); + } else { + this.description = this.nickname; + } + } + public int celltype() { return this.celltype; } diff --git a/source/de/anomic/kelondro/kelondroException.java b/source/de/anomic/kelondro/kelondroException.java index 48f2cab2b..4715d28f7 100644 --- a/source/de/anomic/kelondro/kelondroException.java +++ b/source/de/anomic/kelondro/kelondroException.java @@ -50,8 +50,12 @@ public class kelondroException extends java.lang.RuntimeException { super("unspecific-error"); } + public kelondroException(String message) { + super(message); + } + public kelondroException(String database, String message) { super(message + " in db '" + database + "'"); } - + } diff --git a/source/de/anomic/kelondro/kelondroRow.java b/source/de/anomic/kelondro/kelondroRow.java index 2a0e7214a..9109e8bee 100644 --- a/source/de/anomic/kelondro/kelondroRow.java +++ b/source/de/anomic/kelondro/kelondroRow.java @@ -28,12 +28,17 @@ package de.anomic.kelondro; import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.StringTokenizer; public class kelondroRow { private kelondroColumn[] row; protected int[] colstart; private int objectsize; + private Map nickref = null; public kelondroRow(kelondroColumn[] row) { this.row = row; @@ -57,6 +62,56 @@ public class kelondroRow { } } + public kelondroRow(String structure) { + // define row with row syntax + // example: + //# Structure=,'=',,,,,,,,,, + + // parse a structure string + kelondroColumn pivot_col = null; + + // parse pivot definition: + int p = structure.indexOf(",'='"); + if (p >= 0) { + String pivot = structure.substring(0, p); + structure = structure.substring(p + 5); + pivot_col = new kelondroColumn(pivot); + } + + // parse property part definition: + p = structure.indexOf(",'|'"); + if (p < 0) p = structure.length(); + ArrayList l = new ArrayList(); + String attr = structure.substring(0, p); + StringTokenizer st = new StringTokenizer(attr, ","); + while (st.hasMoreTokens()) { + l.add(new kelondroColumn(st.nextToken())); + } + + // define columns + int piv_offset = (pivot_col == null) ? 0 : 1; + this.row = new kelondroColumn[l.size() + piv_offset]; + this.colstart = new int[row.length]; + this.objectsize = 0; + if (pivot_col != null) { + this.colstart[0] = 0; + this.row[0] = pivot_col; + this.objectsize += this.row[0].cellwidth(); + } + for (int i = 0; i < l.size(); i++) { + this.colstart[i + piv_offset] = this.objectsize; + this.row[i + piv_offset] = (kelondroColumn) l.get(i); + this.objectsize += this.row[i + piv_offset].cellwidth(); + } + + } + + private void genNickRef() { + if (nickref != null) return; + nickref = new HashMap(row.length); + for (int i = 0; i < row.length; i++) nickref.put(row[i].nickname(), new Object[]{row[i], new Integer(colstart[i])}); + } + public int columns() { return this.row.length; } @@ -94,6 +149,11 @@ public class kelondroRow { return new Entry(cells); } + public Entry newEntry(String external) { + if (external == null) return null; + return new Entry(external); + } + public class Entry { private byte[] rowinstance; @@ -128,6 +188,25 @@ public class kelondroRow { } } + public Entry(String external) { + // parse external form + if (external.charAt(0) == '{') external = external.substring(1, external.length() - 1); + String[] elts = external.split(","); + if (nickref == null) genNickRef(); + String nick; + int p; + Object[] f; + rowinstance = new byte[objectsize]; + for (int i = 0; i < elts.length; i++) { + p = elts[i].indexOf('='); + if (p > 0) { + nick = elts[i].substring(0, p).trim(); + f = (Object[]) nickref.get(nick); + System.arraycopy(elts[i].substring(p + 1).trim().getBytes(), 0, rowinstance, ((Integer) f[1]).intValue(), ((kelondroColumn) f[0]).cellwidth()); + } + } + } + public byte[] bytes() { return rowinstance; } @@ -183,13 +262,8 @@ public class kelondroRow { case kelondroColumn.encoder_b256: setColLongB256(column, cell); break; - case kelondroColumn.encoder_string: - setCol(column, Long.toString(cell).getBytes()); - break; case kelondroColumn.encoder_bytes: throw new kelondroException("ROW", "setColLong of celltype bytes not applicable"); - case kelondroColumn.encoder_char: - throw new kelondroException("ROW", "setColLong of celltype char not applicable"); } } @@ -229,12 +303,8 @@ public class kelondroRow { return getColLongB64E(column); case kelondroColumn.encoder_b256: return getColLongB256(column); - case kelondroColumn.encoder_string: - return Long.parseLong(getColString(column, null)); case kelondroColumn.encoder_bytes: throw new kelondroException("ROW", "getColLong of celltype bytes not applicable"); - case kelondroColumn.encoder_char: - throw new kelondroException("ROW", "getColLong of celltype char not applicable"); } throw new kelondroException("ROW", "getColLong did not find appropriate encoding"); } @@ -258,7 +328,8 @@ public class kelondroRow { System.arraycopy(rowinstance, colstart[column], c, 0, row[column].cellwidth()); return c; } - + + /* public byte[] toEncodedBytesForm() { byte[] b = new byte[objectsize]; int encoder, cellwidth; @@ -287,16 +358,15 @@ public class kelondroRow { continue; } throw new kelondroException("ROW", "toEncodedForm of celltype cardinal has no encoder (" + encoder + ")"); - case kelondroColumn.celltype_real: - throw new kelondroException("ROW", "toEncodedForm of celltype real not yet implemented"); } } return b; } + */ - public String toPropertyForm() { + public String toPropertyForm(boolean includeBraces) { StringBuffer sb = new StringBuffer(); - sb.append("{"); + if (includeBraces) sb.append("{"); int encoder, cellwidth; for (int i = 0; i < row.length; i++) { encoder = row[i].encoder(); @@ -328,12 +398,10 @@ public class kelondroRow { continue; } throw new kelondroException("ROW", "toEncodedForm of celltype cardinal has no encoder (" + encoder + ")"); - case kelondroColumn.celltype_real: - throw new kelondroException("ROW", "toEncodedForm of celltype real not yet implemented"); } } if (sb.charAt(sb.length() - 1) == ',') sb.deleteCharAt(sb.length() - 1); // remove ',' at end - sb.append("}"); + if (includeBraces) sb.append("}"); return sb.toString(); } diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java index f072966cc..783387568 100644 --- a/source/de/anomic/plasma/plasmaDHTChunk.java +++ b/source/de/anomic/plasma/plasmaDHTChunk.java @@ -44,6 +44,7 @@ package de.anomic.plasma; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import de.anomic.index.indexContainer; @@ -211,17 +212,19 @@ public class plasmaDHTChunk { try { lurl = lurls.getEntry(indexEntry.urlHash(), indexEntry); if ((lurl == null) || (lurl.url() == null)) { + yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + indexEntry.urlHash() + "' for word hash " + container.getWordHash()); notBoundCounter++; urlIter.remove(); - wordIndex.removeEntries(container.getWordHash(), new String[] { indexEntry.urlHash() }, true); + wordIndex.removeEntry(container.getWordHash(), indexEntry.urlHash(), true); } else { urlCache.put(indexEntry.urlHash(), lurl); + yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + indexEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash()); refcount++; } } catch (IOException e) { notBoundCounter++; urlIter.remove(); - wordIndex.removeEntries(container.getWordHash(), new String[] { indexEntry.urlHash() }, true); + wordIndex.removeEntry(container.getWordHash(), indexEntry.urlHash(), true); } } @@ -270,16 +273,17 @@ public class plasmaDHTChunk { public int deleteTransferIndexes() { Iterator urlIter; indexURLEntry indexEntry; - String[] urlHashes; + HashSet urlHashes; int count = 0; + for (int i = 0; i < this.indexContainers.length; i++) { // delete entries separately - int c = 0; - urlHashes = new String[this.indexContainers[i].size()]; + int c = this.indexContainers[i].size(); + urlHashes = new HashSet(this.indexContainers[i].size()); urlIter = this.indexContainers[i].entries(); while (urlIter.hasNext()) { indexEntry = (indexURLEntry) urlIter.next(); - urlHashes[c++] = indexEntry.urlHash(); + urlHashes.add(indexEntry.urlHash()); } count += wordIndex.removeEntries(this.indexContainers[i].getWordHash(), urlHashes, true); log.logFine("Deleted partial index (" + c + " URLs) for word " + this.indexContainers[i].getWordHash() + "; " + this.wordIndex.indexSize(indexContainers[i].getWordHash()) + " entries left"); diff --git a/source/de/anomic/plasma/plasmaDHTFlush.java b/source/de/anomic/plasma/plasmaDHTFlush.java index b462e3997..db4a2ab42 100644 --- a/source/de/anomic/plasma/plasmaDHTFlush.java +++ b/source/de/anomic/plasma/plasmaDHTFlush.java @@ -169,7 +169,7 @@ public class plasmaDHTFlush extends Thread { // selecting 500 words to transfer this.status = "Running: Selecting chunk " + iteration; - newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.sb.urlPool.loadedURL, this.chunkSize/3, this.chunkSize, this.startPointHash); + newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.sb.urlPool.loadedURL, this.chunkSize/3*2, this.chunkSize, this.startPointHash); /* If we havn't selected a word chunk this could be because of * a) no words are left in the index diff --git a/source/de/anomic/plasma/plasmaRankingCRProcess.java b/source/de/anomic/plasma/plasmaRankingCRProcess.java index 468b44089..d0c409d6c 100644 --- a/source/de/anomic/plasma/plasmaRankingCRProcess.java +++ b/source/de/anomic/plasma/plasmaRankingCRProcess.java @@ -221,7 +221,7 @@ public class plasmaRankingCRProcess { int count = 0; int size = cr.size(); long start = System.currentTimeMillis(); - long l; + long l; final Iterator i = cr.keys(); String referee, anchor, anchorDom; kelondroAttrSeq.Entry cr_entry, rci_entry; diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index bae16e95d..48a58f2fb 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -2007,12 +2007,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // returns number of deletions Iterator iter = words.iterator(); String word; - final String[] urlEntries = new String[] {urlhash}; int count = 0; while (iter.hasNext()) { word = (String) iter.next(); // delete the URL reference in this word index - count += wordIndex.removeEntries(indexEntryAttribute.word2hash(word), urlEntries, true); + if (wordIndex.removeEntry(indexEntryAttribute.word2hash(word), urlhash, true)) count++; } return count; } @@ -2022,13 +2021,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // returns number of deletions Map.Entry entry; String word; - final String[] urlEntries = new String[] {urlhash}; int count = 0; while (wordStatPropIterator.hasNext()) { entry = (Map.Entry) wordStatPropIterator.next(); word = (String) entry.getKey(); // delete the URL reference in this word index - count += wordIndex.removeEntries(indexEntryAttribute.word2hash(word), urlEntries, true); + if (wordIndex.removeEntry(indexEntryAttribute.word2hash(word), urlhash, true)) count++; } return count; } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index edeb2383e..637021d31 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -290,25 +290,28 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { long start = System.currentTimeMillis(); - indexTreeMapContainer container = new indexTreeMapContainer(wordHash); // get from cache - // We must not use the container from cache to store everything we find, - // as that container remains linked to in the cache and might be changed later - // while the returned container is still in use. - // e.g. indexTransfer might keep this container for minutes while - // several new pages could be added to the index, possibly with the same words that have - // been selected for transfer - container.add(ramCache.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2); + indexContainer container = ramCache.getContainer(wordHash, true, -1); // get from assortments - container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2); - + if (container == null) { + container = assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime); + } else { + // We must not use the container from cache to store everything we find, + // as that container remains linked to in the cache and might be changed later + // while the returned container is still in use. + // create a clone from the container + container = container.topLevelClone(); + // add containers from assortment cluster + container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime), -1); + } + // get from backend if (maxTime > 0) { maxTime = maxTime - (System.currentTimeMillis() - start); if (maxTime < 0) maxTime = 100; } - container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2); + container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime), -1); return container; } @@ -374,25 +377,23 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { return c; } - public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) { - int removed; - boolean addedEntryToRamCache = false; + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { synchronized (this) { - removed = ramCache.removeEntries(wordHash, urlHashes, deleteComplete); - if (removed == urlHashes.length) return removed; - indexContainer container = assortmentCluster.deleteContainer(wordHash, -1); - if (container != null) { - removed += container.removeEntries(wordHash, urlHashes, deleteComplete); - if (container.size() != 0) { - ramCache.addEntries(container, System.currentTimeMillis(), false); - addedEntryToRamCache = true; - } - } - if (removed != urlHashes.length) { - removed += backend.removeEntries(wordHash, urlHashes, deleteComplete); - } + if (ramCache.removeEntry(wordHash, urlHash, deleteComplete)) return true; + if (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete)) return true; + return backend.removeEntry(wordHash, urlHash, deleteComplete); + } + } + + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + int removed = 0;; + synchronized (this) { + removed += ramCache.removeEntries(wordHash, urlHashes, deleteComplete); + if (removed == urlHashes.size()) return removed; + removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete); + if (removed == urlHashes.size()) return removed; + removed += backend.removeEntries(wordHash, urlHashes, deleteComplete); } - if (addedEntryToRamCache) flushControl(); return removed; } @@ -604,9 +605,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { } } if (urlHashs.size() > 0) { - String[] urlArray; - urlArray = (String[]) urlHashs.toArray(new String[0]); - int removed = removeEntries(container.getWordHash(), urlArray, true); + int removed = removeEntries(container.getWordHash(), urlHashs, true); serverLog.logFine("INDEXCLEANER", container.getWordHash() + ": " + removed + " of " + container.size() + " URL-entries deleted"); lastWordHash = container.getWordHash(); lastDeletionCounter = urlHashs.size(); diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 4e48c1415..974b023eb 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -50,9 +50,11 @@ import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Iterator; +import java.util.Set; import de.anomic.index.indexContainer; import de.anomic.index.indexContainerOrder; +import de.anomic.index.indexEntry; import de.anomic.index.indexRI; import de.anomic.index.indexAbstractRI; import de.anomic.index.indexRowSetContainer; @@ -243,6 +245,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl return record; } + /* public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete) { indexContainer c = deleteContainer(wordHash, -1); int b = c.size(); @@ -252,7 +255,48 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl } return b - c.size(); } - + */ + + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + indexContainer buffer, record = new indexTreeMapContainer(wordHash); + boolean found = false; + for (int i = 0; i < clusterCount; i++) { + buffer = assortments[i].remove(wordHash); + if ((buffer != null) && (buffer.remove(urlHash) != null)) found = true; + record.add(buffer, -1); + if (found) break; + } + // put back remaining + if (record.size() != 0) { + addEntries(record, record.updated(), false); + } + return found; + } + + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + indexContainer buffer, record = new indexTreeMapContainer(wordHash); + int initialSize = urlHashes.size(); + for (int i = 0; i < clusterCount; i++) { + buffer = assortments[i].remove(wordHash); + if (buffer != null) { + // sort out url hashes that shall be deleted + Iterator bi = buffer.entries(); + indexEntry entry; + while (bi.hasNext()) { + entry = (indexEntry) bi.next(); + if (urlHashes.remove(entry.urlHash())) bi.remove(); + } + record.add(buffer, -1); + } + if (urlHashes.size() == 0) break; + } + // put back remaining + if (record.size() != 0) { + addEntries(record, record.updated(), false); + } + return initialSize - urlHashes.size(); + } + public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { // collect all records from all the assortments and return them indexContainer buffer, record = new indexTreeMapContainer(wordHash); diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java index 81a095fdf..19b5bc791 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java @@ -47,6 +47,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; +import java.util.Set; import java.util.TreeSet; import de.anomic.index.indexContainer; @@ -256,14 +257,34 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index return new indexTreeMapContainer(wordHash); } - public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) { + public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + // removes all given url hashes from a single word index. Returns number of deletions. + plasmaWordIndexFile pi = null; + boolean removed = false; + try { + pi = getEntity(wordHash, true, -1); + if (pi.removeEntry(urlHash, deleteComplete)) removed = true; + int size = pi.size(); + pi.close(); pi = null; + // check if we can remove the index completely + if ((deleteComplete) && (size == 0)) deleteContainer(wordHash); + return removed; + } catch (IOException e) { + log.logSevere("plasmaWordIndexClassic.removeEntries: " + e.getMessage()); + return false; + } finally { + if (pi != null) try{pi.close();}catch(Exception e){} + } + } + + public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { // removes all given url hashes from a single word index. Returns number of deletions. plasmaWordIndexFile pi = null; int count = 0; try { pi = getEntity(wordHash, true, -1); - for (int i = 0; i < urlHashes.length; i++) - if (pi.removeEntry(urlHashes[i], deleteComplete)) count++; + Iterator i = urlHashes.iterator(); + while (i.hasNext()) if (pi.removeEntry((String) i.next(), deleteComplete)) count++; int size = pi.size(); pi.close(); pi = null; // check if we can remove the index completely diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index f02810752..7853719e2 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -50,7 +50,6 @@ import java.util.Iterator; import de.anomic.yacy.yacyCore; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroColumn; import de.anomic.kelondro.kelondroTree; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroRow; @@ -64,12 +63,6 @@ public class yacyNewsDB { private long preloadTime; private kelondroTree news; - public static final int attributesMaxLength = yacyNewsRecord.maxNewsRecordLength - - yacyNewsRecord.idLength() - - yacyNewsRecord.categoryStringLength - - yacyCore.universalDateShortPattern.length() - - 2; - public yacyNewsDB(File path, int bufferkb, long preloadTime) { this.path = path; this.bufferkb = bufferkb; @@ -83,17 +76,9 @@ public class yacyNewsDB { news = createDB(path, bufferkb, preloadTime); } } - - public static final kelondroRow rowdef = new kelondroRow(new kelondroColumn[]{ - new kelondroColumn("newsid", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyNewsRecord.idLength(), "id = created + originator"), - new kelondroColumn("category", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyNewsRecord.categoryStringLength, ""), - new kelondroColumn("received", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyCore.universalDateShortPattern.length(), ""), - new kelondroColumn("", kelondroColumn.celltype_string, kelondroColumn.encoder_string, 2, ""), - new kelondroColumn("", kelondroColumn.celltype_string, kelondroColumn.encoder_string, attributesMaxLength, ""), - }); private static kelondroTree createDB(File path, int bufferkb, long preloadTime) { - return new kelondroTree(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); + return new kelondroTree(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, yacyNewsRecord.rowdef, true); } private void resetDB() { @@ -189,7 +174,7 @@ public class yacyNewsDB { private kelondroRow.Entry r2b(yacyNewsRecord r) { if (r == null) return null; String attributes = r.attributes().toString(); - if (attributes.length() > attributesMaxLength) throw new IllegalArgumentException("attribute length=" + attributes.length() + " exceeds maximum size=" + attributesMaxLength); + if (attributes.length() > yacyNewsRecord.attributesMaxLength) throw new IllegalArgumentException("attribute length=" + attributes.length() + " exceeds maximum size=" + yacyNewsRecord.attributesMaxLength); kelondroRow.Entry entry = news.row().newEntry(); entry.setCol(0, r.id().getBytes()); entry.setCol(1, r.category().getBytes()); diff --git a/source/de/anomic/yacy/yacyNewsPool.java b/source/de/anomic/yacy/yacyNewsPool.java index 58df4aef1..99ef0b83b 100644 --- a/source/de/anomic/yacy/yacyNewsPool.java +++ b/source/de/anomic/yacy/yacyNewsPool.java @@ -142,7 +142,7 @@ public class yacyNewsPool { // check consistency if (record.id() == null) return; - if (record.id().length() != yacyNewsRecord.idLength()) return; + if (record.id().length() != yacyNewsRecord.idLength) return; if (record.category() == null) return; if (!(categories.contains(record.category()))) return; if (record.created().getTime() == 0) return; diff --git a/source/de/anomic/yacy/yacyNewsQueue.java b/source/de/anomic/yacy/yacyNewsQueue.java index b3a8ed264..31c7d58d6 100644 --- a/source/de/anomic/yacy/yacyNewsQueue.java +++ b/source/de/anomic/yacy/yacyNewsQueue.java @@ -77,8 +77,8 @@ public class yacyNewsQueue { } public static final kelondroRow rowdef = new kelondroRow(new kelondroColumn[]{ - new kelondroColumn("newsid", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyNewsRecord.idLength(), "id = created + originator"), - new kelondroColumn("last touched", kelondroColumn.celltype_string, kelondroColumn.encoder_string, yacyCore.universalDateShortPattern.length(), "") + new kelondroColumn("newsid", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacyNewsRecord.idLength, "id = created + originator"), + new kelondroColumn("last touched", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, yacyCore.universalDateShortPattern.length(), "") }); private static kelondroStack createStack(File path) { diff --git a/source/de/anomic/yacy/yacyNewsRecord.java b/source/de/anomic/yacy/yacyNewsRecord.java index b4af7092d..06644b0e3 100644 --- a/source/de/anomic/yacy/yacyNewsRecord.java +++ b/source/de/anomic/yacy/yacyNewsRecord.java @@ -44,6 +44,7 @@ package de.anomic.yacy; +import de.anomic.kelondro.kelondroRow; import de.anomic.server.serverCodings; import de.anomic.server.serverDate; @@ -54,6 +55,7 @@ public class yacyNewsRecord { public static final int maxNewsRecordLength = 512; public static final int categoryStringLength = 8; + public static final int idLength = yacyCore.universalDateShortPattern.length() + yacySeedDB.commonHashLength; private String originator; // hash of originating peer private Date created; // Date when news was created by originator @@ -62,6 +64,20 @@ public class yacyNewsRecord { private int distributed; // counter that counts number of distributions of this news record private Map attributes; // elemets of the news for a special category + public static final int attributesMaxLength = maxNewsRecordLength + - idLength + - categoryStringLength + - yacyCore.universalDateShortPattern.length() + - 2; + + public static final kelondroRow rowdef = new kelondroRow( + "String idx-" + idLength + " \"id = created + originator\"," + + "String cat-" + categoryStringLength + "," + + "String rec-" + yacyCore.universalDateShortPattern.length() + "," + + "short dis-2 {b64e}," + + "String att-" + attributesMaxLength + ); + public yacyNewsRecord(String newsString) { this.attributes = serverCodings.string2map(newsString); this.received = (attributes.containsKey("rec")) ? yacyCore.parseUniversalDate((String) attributes.get("rec"), serverDate.UTCDiffString()) : new Date(); @@ -118,10 +134,6 @@ public class yacyNewsRecord { return yacyCore.universalDateShortString(created) + originator; } - public static int idLength() { - return yacyCore.universalDateShortPattern.length() + yacySeedDB.commonHashLength; - } - public String originator() { return originator; }