From 8fdefd5c683bffc9fa1f6f1f196e1116c29407b2 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 5 Nov 2006 02:10:40 +0000 Subject: [PATCH] generalization of payload definition of index storage this is one step forward to the migration to a new collection data format git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2912 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacy/search.java | 13 +++++--- source/de/anomic/index/indexCollectionRI.java | 11 ++++--- source/de/anomic/index/indexContainer.java | 30 +++++++++++-------- source/de/anomic/index/indexRAMCacheRI.java | 22 ++++++++------ .../kelondro/kelondroCollectionIndex.java | 26 +++++++++------- source/de/anomic/kelondro/kelondroRow.java | 8 +++++ .../kelondro/kelondroRowCollection.java | 4 +-- source/de/anomic/kelondro/kelondroRowSet.java | 8 ++++- .../plasma/dbImport/AssortmentImporter.java | 3 +- .../anomic/plasma/parser/swf/swfParser.java | 1 - .../de/anomic/plasma/plasmaSearchEvent.java | 24 ++++++++------- .../de/anomic/plasma/plasmaSwitchboard.java | 2 +- source/de/anomic/plasma/plasmaWordIndex.java | 22 +++++++++----- .../plasma/plasmaWordIndexAssortment.java | 30 ++++++++++--------- .../plasmaWordIndexAssortmentCluster.java | 25 +++++++++------- .../plasma/plasmaWordIndexFileCluster.java | 15 ++++++---- source/de/anomic/yacy/yacyClient.java | 2 +- source/yacy.java | 3 +- 18 files changed, 150 insertions(+), 99 deletions(-) diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 07649e5f6..d09819441 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -207,10 +207,15 @@ public final class search { // join and order the result indexContainer localResults = theSearch.localSearchJoin(containers.values()); - joincount = localResults.size(); - prop.put("joincount", Integer.toString(joincount)); - acc = theSearch.orderFinal(localResults); - + if (localResults == null) { + joincount = 0; + prop.put("joincount", 0); + acc = null; + } else { + joincount = localResults.size(); + prop.put("joincount", Integer.toString(joincount)); + acc = theSearch.orderFinal(localResults); + } // generate compressed index for maxcounthash // this is not needed if the search is restricted to specific // urls, because it is a re-search diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java index a3029584e..c0e9218ed 100644 --- a/source/de/anomic/index/indexCollectionRI.java +++ b/source/de/anomic/index/indexCollectionRI.java @@ -32,8 +32,8 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroCollectionIndex; -import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroOutOfLimitsException; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowCollection; @@ -44,18 +44,17 @@ public class indexCollectionRI implements indexRI { kelondroCollectionIndex collectionIndex; - public indexCollectionRI(File path, String filenameStub, long buffersize, long preloadTime) { - kelondroRow rowdef = indexURLEntry.urlEntryRow; + public indexCollectionRI(File path, String filenameStub, long buffersize, long preloadTime, kelondroRow payloadrow) { try { collectionIndex = new kelondroCollectionIndex( path, filenameStub, 12 /*keyLength*/, - kelondroNaturalOrder.naturalOrder, + kelondroBase64Order.enhancedCoder, buffersize, preloadTime, 4 /*loadfactor*/, - rowdef); + payloadrow); } catch (IOException e) { serverLog.logSevere("PLASMA", "unable to open collection index at " + path.toString() + ":" + e.getMessage()); } @@ -154,7 +153,7 @@ public class indexCollectionRI implements indexRI { } public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { - indexContainer container = new indexContainer(wordHash); + indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow()); container.add(newEntry); return addEntries(container, updateTime, dhtCase); } diff --git a/source/de/anomic/index/indexContainer.java b/source/de/anomic/index/indexContainer.java index 3a3e10846..eca2dcf99 100644 --- a/source/de/anomic/index/indexContainer.java +++ b/source/de/anomic/index/indexContainer.java @@ -34,7 +34,6 @@ import java.util.Set; import java.util.TreeMap; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; @@ -43,8 +42,13 @@ public class indexContainer extends kelondroRowSet { private String wordHash; - public indexContainer(String wordHash) { - this(wordHash, new kelondroNaturalOrder(true), 0); + public indexContainer(String wordHash, kelondroRow rowdef, int objectCount, byte[] cache) { + super(rowdef, objectCount, cache, kelondroBase64Order.enhancedCoder, 0, 0); + this.wordHash = wordHash; + } + + public indexContainer(String wordHash, kelondroRow rowdef) { + this(wordHash, rowdef, kelondroBase64Order.enhancedCoder, 0); } public indexContainer(String wordHash, kelondroRowSet collection) { @@ -52,15 +56,15 @@ public class indexContainer extends kelondroRowSet { this.wordHash = wordHash; } - public indexContainer(String wordHash, kelondroOrder ordering, int column) { - super(indexURLEntry.urlEntryRow); + public indexContainer(String wordHash, kelondroRow rowdef, kelondroOrder ordering, int column) { + super(rowdef); this.wordHash = wordHash; this.lastTimeWrote = 0; this.setOrdering(ordering, column); } public indexContainer topLevelClone() { - indexContainer newContainer = new indexContainer(this.wordHash, this.sortOrder, this.sortColumn); + indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef, this.sortOrder, this.sortColumn); newContainer.add(this, -1); return newContainer; } @@ -220,7 +224,7 @@ public class indexContainer extends kelondroRowSet { singleContainer = (indexContainer) i.next(); // check result - if ((singleContainer == null) || (singleContainer.size() == 0)) return new indexContainer(null); // as this is a cunjunction of searches, we have no result if any word is not known + if ((singleContainer == null) || (singleContainer.size() == 0)) return null; // as this is a cunjunction of searches, we have no result if any word is not known // store result in order of result size map.put(new Long(singleContainer.size() * 1000 + count), singleContainer); @@ -228,7 +232,7 @@ public class indexContainer extends kelondroRowSet { } // check if there is any result - if (map.size() == 0) return new indexContainer(null); // no result, nothing found + if (map.size() == 0) return null; // no result, nothing found // the map now holds the search results in order of number of hits per word // we now must pairwise build up a conjunction of these sets @@ -247,7 +251,7 @@ public class indexContainer extends kelondroRowSet { } // in 'searchResult' is now the combined search result - if (searchResult.size() == 0) return new indexContainer(null); + if (searchResult.size() == 0) return null; return searchResult; } @@ -260,7 +264,7 @@ public class indexContainer extends kelondroRowSet { public static indexContainer joinConstructive(indexContainer i1, indexContainer i2, long time, int maxDistance) { if ((i1 == null) || (i2 == null)) return null; - if ((i1.size() == 0) || (i2.size() == 0)) return new indexContainer(null); + if ((i1.size() == 0) || (i2.size() == 0)) return null; // decide which method to use int high = ((i1.size() > i2.size()) ? i1.size() : i2.size()); @@ -281,7 +285,8 @@ public class indexContainer extends kelondroRowSet { private static indexContainer joinConstructiveByTest(indexContainer small, indexContainer large, long time, int maxDistance) { System.out.println("DEBUG: JOIN METHOD BY TEST"); - indexContainer conj = new indexContainer(null); // start with empty search result + assert small.rowdef.equals(large); + indexContainer conj = new indexContainer(null, small.rowdef); // start with empty search result Iterator se = small.entries(); indexEntry ie0, ie1; long stamp = System.currentTimeMillis(); @@ -299,7 +304,8 @@ public class indexContainer extends kelondroRowSet { private static indexContainer joinConstructiveByEnumeration(indexContainer i1, indexContainer i2, long time, int maxDistance) { System.out.println("DEBUG: JOIN METHOD BY ENUMERATION"); - indexContainer conj = new indexContainer(null); // start with empty search result + assert i1.rowdef.equals(i2); + indexContainer conj = new indexContainer(null, i1.rowdef); // start with empty search result if (!((i1.order().signature().equals(i2.order().signature())) && (i1.primarykey() == i2.primarykey()))) return conj; // ordering must be equal Iterator e1 = i1.entries(); diff --git a/source/de/anomic/index/indexRAMCacheRI.java b/source/de/anomic/index/indexRAMCacheRI.java index 0b4c1f77f..2758c5174 100644 --- a/source/de/anomic/index/indexRAMCacheRI.java +++ b/source/de/anomic/index/indexRAMCacheRI.java @@ -45,11 +45,6 @@ public final class indexRAMCacheRI implements indexRI { // environment constants public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes - public static final kelondroRow bufferStructureBasis = new kelondroRow( - "byte[] wordhash-" + indexEntryAttribute.wordHashLength + ", " + - "Cardinal occ-4 {b256}, " + - "Cardinal time-8 {b256}, " + - "byte[] urlprops-" + indexURLEntry.urlEntryRow.objectsize()); // class variables private final File databaseRoot; @@ -61,6 +56,8 @@ public final class indexRAMCacheRI implements indexRI { public int cacheReferenceLimit; private final serverLog log; private String indexArrayFileName; + private kelondroRow payloadrow; + private kelondroRow bufferStructureBasis; // calculated constants private static String maxKey; @@ -69,7 +66,7 @@ public final class indexRAMCacheRI implements indexRI { //minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-'; } - public indexRAMCacheRI(File databaseRoot, int wCacheReferenceLimitInit, String dumpname, serverLog log) { + public indexRAMCacheRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log) { // creates a new index cache // the cache has a back-end where indexes that do not fit in the cache are flushed @@ -81,7 +78,13 @@ public final class indexRAMCacheRI implements indexRI { this.cacheMaxCount = 10000; this.cacheReferenceLimit = wCacheReferenceLimitInit; this.log = log; - indexArrayFileName = dumpname; + this.indexArrayFileName = dumpname; + this.payloadrow = payloadrow; + this.bufferStructureBasis = new kelondroRow( + "byte[] wordhash-" + indexEntryAttribute.wordHashLength + ", " + + "Cardinal occ-4 {b256}, " + + "Cardinal time-8 {b256}, " + + "byte[] urlprops-" + payloadrow.objectsize()); // read in dump of last session try { @@ -91,6 +94,7 @@ public final class indexRAMCacheRI implements indexRI { } } + public synchronized long getUpdateTime(String wordHash) { indexContainer entries = getContainer(wordHash, null, false, -1); if (entries == null) return 0; @@ -423,7 +427,7 @@ public final class indexRAMCacheRI implements indexRI { // put container into wCache String wordHash = container.getWordHash(); indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null - if (entries == null) entries = new indexContainer(wordHash); + if (entries == null) entries = new indexContainer(wordHash, container.row()); added = entries.add(container, -1); if (added > 0) { cache.put(wordHash, entries); @@ -436,7 +440,7 @@ public final class indexRAMCacheRI implements indexRI { public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { indexContainer container = (indexContainer) cache.get(wordHash); - if (container == null) container = new indexContainer(wordHash); + if (container == null) container = new indexContainer(wordHash, this.payloadrow); indexEntry[] entries = new indexEntry[] { newEntry }; if (container.add(entries, updateTime) > 0) { cache.put(wordHash, container); diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index f5125a63e..946c6cbc2 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -43,7 +43,7 @@ public class kelondroCollectionIndex { private String filenameStub; private int loadfactor; private Map arrays; // Map of (partitionNumber"-"chunksize)/kelondroFixedWidthArray - Objects - private kelondroRow playloadrow; // definition of the payload (chunks inside the collections) + private kelondroRow payloadrow; // definition of the payload (chunks inside the collections) // private int partitions; // this is the maxmimum number of array files; yet not used private static final int idx_col_key = 0; // the index @@ -68,6 +68,10 @@ public class kelondroCollectionIndex { ); } + public kelondroRow payloadRow() { + return this.payloadrow; + } + private static String fillZ(String s, int len) { while (s.length() < len) s = "0" + s; return s; @@ -94,7 +98,7 @@ public class kelondroCollectionIndex { this.path = path; this.filenameStub = filenameStub; this.keylength = keyLength; - this.playloadrow = rowdef; + this.payloadrow = rowdef; this.loadfactor = loadfactor; boolean ramIndexGeneration = false; @@ -151,7 +155,7 @@ public class kelondroCollectionIndex { key = aentry.getColBytes(0); assert (key != null); if (key == null) continue; // skip deleted entries - kelondroRowSet indexrows = new kelondroRowSet(this.playloadrow, aentry.getColBytes(1)); + kelondroRowSet indexrows = new kelondroRowSet(this.payloadrow, aentry.getColBytes(1)); ientry = irow.newEntry(); ientry.setCol(idx_col_key, key); ientry.setCol(idx_col_chunksize, chunksize); @@ -199,11 +203,11 @@ public class kelondroCollectionIndex { } private kelondroFixedWidthArray openArrayFile(int partitionNumber, int serialNumber, boolean create) throws IOException { - File f = arrayFile(path, filenameStub, loadfactor, playloadrow.objectsize(), partitionNumber, serialNumber); + File f = arrayFile(path, filenameStub, loadfactor, payloadrow.objectsize(), partitionNumber, serialNumber); int load = arrayCapacity(partitionNumber); kelondroRow rowdef = new kelondroRow( "byte[] key-" + keylength + "," + - "byte[] collection-" + (kelondroRowCollection.exportOverheadSize + load * this.playloadrow.objectsize()) + "byte[] collection-" + (kelondroRowCollection.exportOverheadSize + load * this.payloadrow.objectsize()) ); if ((!(f.exists())) && (!create)) return null; kelondroFixedWidthArray a = new kelondroFixedWidthArray(f, rowdef, 0); @@ -270,7 +274,7 @@ public class kelondroCollectionIndex { // the collection is new int newPartitionNumber = arrayIndex(collection.size()); indexrow = index.row().newEntry(); - kelondroFixedWidthArray array = getArray(newPartitionNumber, 0, this.playloadrow.objectsize()); + kelondroFixedWidthArray array = getArray(newPartitionNumber, 0, this.payloadrow.objectsize()); // define row kelondroRow.Entry arrayEntry = array.row().newEntry(); @@ -282,7 +286,7 @@ public class kelondroCollectionIndex { // store the new row number in the index indexrow.setCol(idx_col_key, key); - indexrow.setCol(idx_col_chunksize, this.playloadrow.objectsize()); + indexrow.setCol(idx_col_chunksize, this.payloadrow.objectsize()); indexrow.setCol(idx_col_chunkcount, collection.size()); indexrow.setCol(idx_col_clusteridx, (byte) newPartitionNumber); indexrow.setCol(idx_col_flags, (byte) 0); @@ -348,7 +352,7 @@ public class kelondroCollectionIndex { // we don't need a new slot, just write into the old one // find array file - kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.playloadrow.objectsize()); + kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.payloadrow.objectsize()); // define row kelondroRow.Entry arrayEntry = array.row().newEntry(); @@ -372,7 +376,7 @@ public class kelondroCollectionIndex { array.remove(oldrownumber); // write a new entry in the other array - array = getArray(newPartitionNumber, 0, this.playloadrow.objectsize()); + array = getArray(newPartitionNumber, 0, this.payloadrow.objectsize()); // define row kelondroRow.Entry arrayEntry = array.row().newEntry(); @@ -438,7 +442,7 @@ public class kelondroCollectionIndex { if (arrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, clusteridx, serialnumber).toString(), "array does not contain expected row"); // read the row and define a collection - kelondroRowSet collection = new kelondroRowSet(this.playloadrow, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize() + kelondroRowSet collection = new kelondroRowSet(this.payloadrow, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize() byte[] key = indexrow.getColBytes(idx_col_key); if (index.order().compare(arrayrow.getColBytes(0), key) != 0) { // check if we got the right row; this row is wrong. Fix it: @@ -446,7 +450,7 @@ public class kelondroCollectionIndex { // store the row number in the index; this may be a double-entry, but better than nothing kelondroRow.Entry indexEntry = index.row().newEntry(); indexEntry.setCol(idx_col_key, arrayrow.getColBytes(0)); - indexEntry.setCol(idx_col_chunksize, this.playloadrow.objectsize()); + indexEntry.setCol(idx_col_chunksize, this.payloadrow.objectsize()); indexEntry.setCol(idx_col_chunkcount, collection.size()); indexEntry.setCol(idx_col_clusteridx, (byte) clusteridx); indexEntry.setCol(idx_col_flags, (byte) 0); diff --git a/source/de/anomic/kelondro/kelondroRow.java b/source/de/anomic/kelondro/kelondroRow.java index 5f04701e1..06a0830e8 100644 --- a/source/de/anomic/kelondro/kelondroRow.java +++ b/source/de/anomic/kelondro/kelondroRow.java @@ -470,4 +470,12 @@ public class kelondroRow { return true; } + public boolean equals(kelondroRow otherRow) { + if (this.objectsize != otherRow.objectsize) return false; + for (int i = 0; i < otherRow.row.length; i++) { + if (!(this.row[i].equals(otherRow.row[i]))) return false; + } + return true; + } + } diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 26bc7c62c..c8704f586 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -417,13 +417,13 @@ public class kelondroRowCollection { if (this.chunkcount * this.rowdef.objectsize() < this.chunkcache.length) { // there is space in the chunkcache that we can use as buffer System.arraycopy(chunkcache, this.rowdef.objectsize() * i, chunkcache, chunkcache.length - this.rowdef.objectsize(), this.rowdef.objectsize()); - System.arraycopy(chunkcache, this.rowdef.objectsize() *j , chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize()); + System.arraycopy(chunkcache, this.rowdef.objectsize() * j, chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize()); System.arraycopy(chunkcache, chunkcache.length - this.rowdef.objectsize(), chunkcache, this.rowdef.objectsize() * j, this.rowdef.objectsize()); } else { // allocate a chunk to use as buffer byte[] a = new byte[this.rowdef.objectsize()]; System.arraycopy(chunkcache, this.rowdef.objectsize() * i, a, 0, this.rowdef.objectsize()); - System.arraycopy(chunkcache, this.rowdef.objectsize() * j , chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize()); + System.arraycopy(chunkcache, this.rowdef.objectsize() * j, chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize()); System.arraycopy(a, 0, chunkcache, this.rowdef.objectsize() * j, this.rowdef.objectsize()); } if (i == p) return j; else if (j == p) return i; else return p; diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 8da837c98..69f858437 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -37,12 +37,18 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd private kelondroProfile profile; private TreeSet removeMarker; + public kelondroRowSet(kelondroRow rowdef, int objectCount, byte[] cache, kelondroOrder sortOrder, int sortColumn, int sortBound) { + super(rowdef, objectCount, cache, sortOrder, sortColumn, sortBound); + this.removeMarker = new TreeSet(); + this.profile = new kelondroProfile(); + } + public kelondroRowSet(kelondroRowSet rs) { super(rs); this.profile = rs.profile; this.removeMarker = rs.removeMarker; } - + public kelondroRowSet(kelondroRow rowdef) { super(rowdef, 0); this.removeMarker = new TreeSet(); diff --git a/source/de/anomic/plasma/dbImport/AssortmentImporter.java b/source/de/anomic/plasma/dbImport/AssortmentImporter.java index 8ffe978a1..86183dde1 100644 --- a/source/de/anomic/plasma/dbImport/AssortmentImporter.java +++ b/source/de/anomic/plasma/dbImport/AssortmentImporter.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.util.Iterator; import de.anomic.index.indexContainer; +import de.anomic.index.indexURLEntry; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaWordIndexAssortment; @@ -62,7 +63,7 @@ public class AssortmentImporter extends AbstractImporter implements dbImporter{ // initializing the import assortment db this.log.logInfo("Initializing source assortment file"); try { - this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath,assortmentNr, this.cacheSize/1024, preloadTime, this.log); + this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath, indexURLEntry.urlEntryRow, assortmentNr, this.cacheSize/1024, preloadTime, this.log); } catch (IOException e) { e.printStackTrace(); System.exit(-1); diff --git a/source/de/anomic/plasma/parser/swf/swfParser.java b/source/de/anomic/plasma/parser/swf/swfParser.java index 0fca5fa98..6a9642a50 100644 --- a/source/de/anomic/plasma/parser/swf/swfParser.java +++ b/source/de/anomic/plasma/parser/swf/swfParser.java @@ -43,7 +43,6 @@ package de.anomic.plasma.parser.swf; -import java.io.File; import java.io.InputStream; import de.anomic.net.URL; import java.util.Hashtable; diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index d264564d3..f639df0fe 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -91,7 +91,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { this.ranking = ranking; this.urlStore = urlStore; this.snippetCache = snippetCache; - this.rcContainers = new indexContainer(null); + this.rcContainers = new indexContainer(null, wordIndex.payloadrow()); this.rcContainerFlushCount = 0; this.rcAbstracts = (query.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches this.profileLocal = localTiming; @@ -195,12 +195,14 @@ public final class plasmaSearchEvent extends Thread implements Runnable { // combine the result and order plasmaSearchResult result = orderFinal(rcLocal); - result.globalContributions = globalContributions; - result.localContributions = rcLocal.size(); - - // flush results in a separate thread - this.start(); // start to flush results + if (result != null) { + result.globalContributions = globalContributions; + result.localContributions = rcLocal.size(); + // flush results in a separate thread + this.start(); // start to flush results + } + // return search result log.logFine("SEARCHRESULT: " + profileLocal.reportToString()); lastEvent = this; @@ -209,7 +211,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { Map searchContainerMap = localSearchContainers(null); indexContainer rcLocal = localSearchJoin((searchContainerMap == null) ? null : searchContainerMap.values()); plasmaSearchResult result = orderFinal(rcLocal); - result.localContributions = rcLocal.size(); + result.localContributions = (rcLocal == null) ? 0 : rcLocal.size(); // return search result log.logFine("SEARCHRESULT: " + profileLocal.reportToString()); @@ -333,9 +335,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { // join a search result and return the joincount (number of pages after join) // since this is a conjunction we return an empty entity if any word is not known - if (containers == null) { - return new indexContainer(null); - } + if (containers == null) return null; // join the result profileLocal.startTimer(); @@ -352,7 +352,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable { // we collect the urlhashes and construct a list with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime - indexContainer searchResult = new indexContainer(null); + if (rcLocal == null) return null; + indexContainer searchResult = new indexContainer(null, rcLocal.row()); long preorderTime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_PRESORT); profileLocal.startTimer(); @@ -416,6 +417,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { private void prefetchLocal(indexContainer rcLocal, long timeout) { // pre-fetch some urls to fill LURL ram cache + if (rcLocal == null) return; plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, rcLocal, timeout - System.currentTimeMillis()); preorder.remove(true, true); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index a5d5dc14a..061d46d93 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1662,7 +1662,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String word = (String) wentry.getKey(); wordStat = (plasmaCondenser.wordStatProp) wentry.getValue(); String wordHash = indexEntryAttribute.word2hash(word); - indexContainer wordIdxContainer = new indexContainer(wordHash); indexEntry wordIdxEntry = new indexURLEntry( urlHash, urlLength, urlComps, @@ -1684,6 +1683,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ioLinks[1].intValue(), true ); + indexContainer wordIdxContainer = new indexContainer(wordHash, wordIndex.payloadrow()); wordIdxContainer.add(wordIdxEntry); tmpContainers.add(wordIdxContainer); } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index aa264d9dc..844e1fa7e 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -50,6 +50,7 @@ import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroOrder; +import de.anomic.kelondro.kelondroRow; import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.logging.serverLog; @@ -59,6 +60,7 @@ public final class plasmaWordIndex implements indexRI { private static final String indexAssortmentClusterPath = "ACLUSTER"; private static final int assortmentCount = 64; + private static final kelondroRow payloadrow = indexURLEntry.urlEntryRow; private final File oldDatabaseRoot; private final kelondroOrder indexOrder = new kelondroNaturalOrder(true); @@ -73,9 +75,9 @@ public final class plasmaWordIndex implements indexRI { public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) throws IOException { this.oldDatabaseRoot = oldDatabaseRoot; - this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, log); - this.dhtOutCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log); - this.dhtInCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 1024 : 64, "indexDump2.array", log); + this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, payloadrow, log); + this.dhtOutCache = new indexRAMCacheRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log); + this.dhtInCache = new indexRAMCacheRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump2.array", log); // create assortment cluster path File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath); @@ -85,15 +87,15 @@ public final class plasmaWordIndex implements indexRI { File textindexpath = new File(newIndexRoot, "PUBLIC/TEXT"); if (!(textindexpath.exists())) textindexpath.mkdirs(); if (useCollectionIndex) { - this.collections = new indexCollectionRI(textindexpath, "test_generation1", bufferkb * 1024, preloadTime); + this.collections = new indexCollectionRI(textindexpath, "test_generation1", bufferkb * 1024, preloadTime, payloadrow); if (assortmentClusterPath.exists()) - this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, preloadTime, log); + this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, payloadrow, assortmentBufferSize, preloadTime, log); else this.assortmentCluster = null; } else { this.collections = null; if (!(assortmentClusterPath.exists())) assortmentClusterPath.mkdirs(); - this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, preloadTime, log); + this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, payloadrow, assortmentBufferSize, preloadTime, log); } busyCacheFlush = false; @@ -102,6 +104,10 @@ public final class plasmaWordIndex implements indexRI { this.idleDivisor = 420; } + public kelondroRow payloadrow() { + return payloadrow; + } + public File getRoot() { return oldDatabaseRoot; } @@ -459,7 +465,7 @@ public final class plasmaWordIndex implements indexRI { } public indexContainer deleteContainer(String wordHash) { - indexContainer c = new indexContainer(wordHash); + indexContainer c = new indexContainer(wordHash, payloadrow); c.add(dhtInCache.deleteContainer(wordHash), -1); c.add(dhtOutCache.deleteContainer(wordHash), -1); if (useCollectionIndex) c.add(collections.deleteContainer(wordHash), -1); @@ -712,7 +718,7 @@ public final class plasmaWordIndex implements indexRI { try { entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true); int size = entity.size(); - indexContainer container = new indexContainer(wordhash); + indexContainer container = new indexContainer(wordhash, payloadrow); try { Iterator entries = entity.elements(true); diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index 756eb14d6..10215e064 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -58,7 +58,7 @@ import java.util.Iterator; import de.anomic.index.indexContainer; import de.anomic.index.indexEntry; -import de.anomic.index.indexRAMCacheRI; +import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroColumn; @@ -79,6 +79,7 @@ public final class plasmaWordIndexAssortment { private kelondroCache assortments; private long bufferSize; private long preloadTime; + private kelondroRow payloadrow; private static String intx(int x) { String s = Integer.toString(x); @@ -86,23 +87,23 @@ public final class plasmaWordIndexAssortment { return s; } - private static kelondroRow bufferStructure(int assortmentCapacity) { + private kelondroRow bufferStructure(int assortmentCapacity) { kelondroColumn[] structure = new kelondroColumn[3 + assortmentCapacity]; - structure[0] = indexRAMCacheRI.bufferStructureBasis.column(0); - structure[1] = indexRAMCacheRI.bufferStructureBasis.column(1); - structure[2] = indexRAMCacheRI.bufferStructureBasis.column(2); - for (int i = 0; i < assortmentCapacity; i++) { - structure[3 + i] = indexRAMCacheRI.bufferStructureBasis.column(3); - } + structure[0] = new kelondroColumn("byte[] wordhash-" + indexEntryAttribute.wordHashLength); + structure[1] = new kelondroColumn("Cardinal occ-4 {b256}"); + structure[2] = new kelondroColumn("Cardinal time-8 {b256}"); + kelondroColumn p = new kelondroColumn("byte[] urlprops-" + payloadrow.objectsize()); + for (int i = 0; i < assortmentCapacity; i++) structure[3 + i] = p; return new kelondroRow(structure); } - private static int assortmentCapacity(int rowsize) { - return (rowsize - indexRAMCacheRI.bufferStructureBasis.width(0) - indexRAMCacheRI.bufferStructureBasis.width(1) - indexRAMCacheRI.bufferStructureBasis.width(2)) / indexRAMCacheRI.bufferStructureBasis.width(3); + private int assortmentCapacity(int rowsize) { + return (rowsize - indexEntryAttribute.wordHashLength - 12) / payloadrow.objectsize(); } - - public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) throws IOException { + + public plasmaWordIndexAssortment(File storagePath, kelondroRow payloadrow, int assortmentLength, int bufferkb, long preloadTime, serverLog log) throws IOException { if (!(storagePath.exists())) storagePath.mkdirs(); + this.payloadrow = payloadrow; this.assortmentFile = new File(storagePath, assortmentFileName + intx(assortmentLength) + ".db"); this.assortmentLength = assortmentLength; //this.bufferStructureLength = 3 + 2 * assortmentLength; @@ -119,6 +120,7 @@ public final class plasmaWordIndexAssortment { preloadTime + " ms preloadTime, " + (stop - start) + " ms effective, " + assortments.cacheNodeStatus()[1] + " preloaded"); + } public void store(indexContainer newContainer) throws IOException { @@ -212,11 +214,11 @@ public final class plasmaWordIndexAssortment { return row2container(row); } - public final static indexContainer row2container(kelondroRow.Entry row) { + public final indexContainer row2container(kelondroRow.Entry row) { if (row == null) return null; String wordHash = row.getColString(0, null); final long updateTime = row.getColLong(2); - indexContainer container = new indexContainer(wordHash); + indexContainer container = new indexContainer(wordHash, payloadrow); int al = assortmentCapacity(row.objectsize()); for (int i = 0; i < al; i++) { container.add(new indexEntry[] { new indexURLEntry(row.getColBytes(3 + i)) }, updateTime); diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 437cbc093..c26ab24e9 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -60,6 +60,7 @@ import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroRecords; +import de.anomic.kelondro.kelondroRow; import de.anomic.server.logging.serverLog; public final class plasmaWordIndexAssortmentCluster implements indexRI { @@ -71,10 +72,12 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { //private serverLog log; private plasmaWordIndexAssortment[] assortments; private long completeBufferKB; + private kelondroRow payloadrow; - public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCount, int bufferkb, long preloadTime, serverLog log) throws IOException { + public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCount, kelondroRow payloadrow, int bufferkb, long preloadTime, serverLog log) throws IOException { // set class variables if (!(assortmentsPath.exists())) assortmentsPath.mkdirs(); + this.payloadrow = payloadrow; this.clusterCount = clusterCount; this.clusterCapacity = clusterCount * (clusterCount + 1) / 2; this.completeBufferKB = bufferkb; @@ -86,7 +89,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { int sumSizes = 1; plasmaWordIndexAssortment testAssortment; for (int i = 0; i < clusterCount; i++) { - testAssortment = new plasmaWordIndexAssortment(assortmentsPath, i + 1, 0, 0, null); + testAssortment = new plasmaWordIndexAssortment(assortmentsPath, payloadrow, i + 1, 0, 0, null); sizes[i] = testAssortment.size() + clusterCount - i; sumSizes += sizes[i]; testAssortment.close(); @@ -102,7 +105,9 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { nextTime = Math.max(0, preloadTime * ((long) sizes[i]) / sS); startTime = System.currentTimeMillis(); assortments[i] = new plasmaWordIndexAssortment( - assortmentsPath, i + 1, + assortmentsPath, + payloadrow, + i + 1, (int) (completeBufferKB * (long) sizes[i] / (long) sumSizes), nextTime, log); @@ -160,7 +165,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { indexContainer c; Iterator i = newContainer.entries(); for (int j = clusterStart; j >= 1; j--) { - c = new indexContainer(newContainer.getWordHash()); + c = new indexContainer(newContainer.getWordHash(), payloadrow); for (int k = 0; k < j; k++) { if (i.hasNext()) { c.add((indexEntry) i.next(), newContainer.updated()); @@ -174,7 +179,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { } public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { - indexContainer container = new indexContainer(wordHash); + indexContainer container = new indexContainer(wordHash, payloadrow); container.add(newEntry); return addEntries(container, updateTime, dhtCase); } @@ -215,7 +220,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { Iterator i = newContainer.entries(); for (int j = testsize - 1; j >= 0; j--) { if (spaces[j] == 0) continue; - c = new indexContainer(newContainer.getWordHash()); + c = new indexContainer(newContainer.getWordHash(), payloadrow); for (int k = 0; k <= j; k++) { assert (i.hasNext()); c.add((indexEntry) i.next(), newContainer.updated()); @@ -253,7 +258,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { public indexContainer deleteContainer(String wordHash, long maxTime) { // removes all records from all the assortments and return them - indexContainer buffer, record = new indexContainer(wordHash); + indexContainer buffer, record = new indexContainer(wordHash, payloadrow); long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; long remainingTime; for (int i = 0; i < clusterCount; i++) { @@ -278,7 +283,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { */ public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { - indexContainer buffer, record = new indexContainer(wordHash); + indexContainer buffer, record = new indexContainer(wordHash, payloadrow); boolean found = false; for (int i = 0; i < clusterCount; i++) { buffer = assortments[i].remove(wordHash); @@ -294,7 +299,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { } public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { - indexContainer buffer, record = new indexContainer(wordHash); + indexContainer buffer, record = new indexContainer(wordHash, payloadrow); int initialSize = urlHashes.size(); for (int i = 0; i < clusterCount; i++) { buffer = assortments[i].remove(wordHash); @@ -319,7 +324,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI { public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { // collect all records from all the assortments and return them - indexContainer buffer, record = new indexContainer(wordHash); + indexContainer buffer, record = new indexContainer(wordHash, payloadrow); long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; for (int i = 0; i < clusterCount; i++) { buffer = assortments[i].get(wordHash); diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java index 3e17738e2..3cfd4be0b 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java @@ -54,6 +54,7 @@ import de.anomic.index.indexContainer; import de.anomic.index.indexEntry; import de.anomic.index.indexRI; import de.anomic.kelondro.kelondroNaturalOrder; +import de.anomic.kelondro.kelondroRow; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; @@ -63,9 +64,11 @@ public class plasmaWordIndexFileCluster implements indexRI { private final File databaseRoot; private final serverLog log; private int size; + private kelondroRow payloadrow; - public plasmaWordIndexFileCluster(File databaseRoot, serverLog log) { - this.databaseRoot = databaseRoot; + public plasmaWordIndexFileCluster(File databaseRoot, kelondroRow payloadrow, serverLog log) { + this.databaseRoot = databaseRoot; + this.payloadrow = payloadrow; this.log = log; this.size = 0; } @@ -231,7 +234,7 @@ public class plasmaWordIndexFileCluster implements indexRI { if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute if (exists(wordHash)) { plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10); - indexContainer container = new indexContainer(wordHash); + indexContainer container = new indexContainer(wordHash, payloadrow); indexEntry entry; Iterator i = entity.elements(true); while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) { @@ -240,7 +243,7 @@ public class plasmaWordIndexFileCluster implements indexRI { } return container; } else { - return new indexContainer(wordHash); + return new indexContainer(wordHash, payloadrow); } } @@ -255,7 +258,7 @@ public class plasmaWordIndexFileCluster implements indexRI { public indexContainer deleteContainer(String wordHash) { plasmaWordIndexFile.removePlasmaIndex(databaseRoot, wordHash); - return new indexContainer(wordHash); + return new indexContainer(wordHash, payloadrow); } public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { @@ -300,7 +303,7 @@ public class plasmaWordIndexFileCluster implements indexRI { } public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { - indexContainer container = new indexContainer(wordHash); + indexContainer container = new indexContainer(wordHash, payloadrow); container.add(newEntry); return addEntries(container, updateTime, dhtCase); } diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 3b3194ff2..b23ba3eec 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -494,7 +494,7 @@ public final class yacyClient { final int words = wordhashes.length() / indexEntryAttribute.wordHashLength; indexContainer[] container = new indexContainer[words]; for (int i = 0; i < words; i++) { - container[i] = new indexContainer(wordhashes.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength)); + container[i] = new indexContainer(wordhashes.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength), indexURLEntry.urlEntryRow); } // insert results to containers diff --git a/source/yacy.java b/source/yacy.java index 0e7522d9e..b648e9e2d 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -74,6 +74,7 @@ import de.anomic.index.indexContainer; import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURL; +import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMap; @@ -1235,7 +1236,7 @@ public final class yacy { WordIndex = new plasmaWordIndex(homeDBroot, indexRoot, true, 8*1024*1024, 3000, log, sps.getConfigBool("useCollectionIndex", false)); indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, plasmaWordIndex.RL_WORDFILES, false); } else if (resource.equals("assortments")) { - plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, 3000, log); + plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, indexURLEntry.urlEntryRow, 16*1024*1024, 3000, log); indexContainerIterator = assortmentCluster.wordContainers(wordChunkStartHash, true, false); } /*else if (resource.startsWith("assortment")) { int a = Integer.parseInt(resource.substring(10));