refactoring: integrated indexContainer abstraction layer

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2149 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 9937730560
commit 7b3b12888c

@ -57,13 +57,13 @@ import java.util.TreeMap;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexEntryInstance;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyClient;
@ -147,7 +147,7 @@ public class IndexControl_p {
if (post.containsKey("keyhashdeleteall")) {
if (delurl || delurlref) {
// generate an urlx array
plasmaWordIndexEntryContainer index = null;
indexContainer index = null;
index = switchboard.wordIndex.getContainer(keyhash, true, -1);
Iterator en = index.entries();
int i = 0;
@ -246,7 +246,7 @@ public class IndexControl_p {
}
prop.put("urlstring", "");
prop.put("urlhash", "");
plasmaWordIndexEntryContainer index;
indexContainer index;
String result;
long starttime = System.currentTimeMillis();
index = switchboard.wordIndex.getContainer(keyhash, true, -1);
@ -275,7 +275,7 @@ public class IndexControl_p {
int timeout = (int) switchboard.getConfigLong("indexControl.timeout",60000);
result = yacyClient.transferIndex(
yacyCore.seedDB.getConnected(post.get("hostHash", "")),
new plasmaWordIndexEntryContainer[]{index},
new indexContainer[]{index},
knownURLs,
"true".equalsIgnoreCase(gzipBody),
timeout);
@ -421,7 +421,7 @@ public class IndexControl_p {
public static String genUrlList(plasmaSwitchboard switchboard, String keyhash, String keystring) {
// search for a word hash and generate a list of url links
plasmaWordIndexEntryContainer index = null;
indexContainer index = null;
try {
index = switchboard.wordIndex.getContainer(keyhash, true, -1);

@ -0,0 +1,64 @@
// indexAbstractConatiner.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 20.05.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.index;
import de.anomic.kelondro.kelondroBase64Order;
public abstract class indexAbstractContainer implements indexContainer {
private String wordHash;
private long updateTime;
public void setWordHash(String newWordHash) {
// this is used to replicate a container for different word indexes during global search
this.wordHash = newWordHash;
}
public long updated() {
return updateTime;
}
public String wordHash() {
return wordHash;
}
public int add(indexEntry entry) {
return add(entry, System.currentTimeMillis());
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
if (!wordHash.equals(this.wordHash)) return 0;
int count = 0;
for (int i = 0; i < urlHashes.length; i++) count += (remove(urlHashes[i]) == null) ? 0 : 1;
return count;
}
public int hashCode() {
return (int) kelondroBase64Order.enhancedCoder.decodeLong(this.wordHash.substring(0, 4));
}
}

@ -26,7 +26,6 @@
package de.anomic.index;
//import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaWordIndex;
public abstract class indexAbstractEntry implements indexEntry {

@ -30,14 +30,14 @@ import de.anomic.plasma.plasmaWordIndexEntryContainer;
public abstract class indexAbstractRI implements indexRI {
public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public long getUpdateTime(String wordHash) {
plasmaWordIndexEntryContainer entries = getContainer(wordHash, false, -1);
indexContainer entries = getContainer(wordHash, false, -1);
if (entries == null) return 0;
return entries.updated();
}

@ -0,0 +1,75 @@
// indexContainer.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 20.05.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// an index container is a set of index entries
package de.anomic.index;
import java.util.Iterator;
import de.anomic.kelondro.kelondroOrder;
public interface indexContainer {
public void setWordHash(String newWordHash);
public void clear();
public int size();
public long updated();
public kelondroOrder order();
public String wordHash();
public int add(indexEntry entry);
public int add(indexEntry entry, long updateTime);
public int add(indexEntry[] entries, long updateTime);
public int add(indexContainer c, long maxTime);
public boolean contains(String urlHash) ;
public indexEntry get(String urlHash);
public indexEntry[] getEntryArray() ;
public indexEntry remove(String urlHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
public Iterator entries();
public String toString();
public int hashCode();
//public void joinConstructive(indexContainer c, long time, int maxDistance);
}

@ -35,6 +35,7 @@ public interface indexEntry {
public String getUrlHash();
public void combineDistance(indexEntry oe);
public int worddistance();
public void min(indexEntry other);
public void max(indexEntry other);
public void normalize(indexEntry min, indexEntry max);

@ -44,8 +44,6 @@ package de.anomic.index;
import java.util.Iterator;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
public interface indexRI {
public int size();
@ -53,12 +51,12 @@ public interface indexRI {
public Iterator wordHashes(String startWordHash, boolean rot);
public long getUpdateTime(String wordHash);
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime);
public plasmaWordIndexEntryContainer deleteContainer(String wordHash);
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime);
public indexContainer deleteContainer(String wordHash);
public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete);
public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase);
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean dhtCase);
public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase);
public indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase);
public void close(int waitingSeconds);

@ -6,12 +6,12 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.index.indexContainer;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexEntryInstance;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.server.serverDate;
public class plasmaDbImporter extends AbstractImporter implements dbImporter {
@ -119,7 +119,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
while (!isAborted() && importWordHashIterator.hasNext()) {
TreeSet entityUrls = new TreeSet(new kelondroNaturalOrder(true));
plasmaWordIndexEntryContainer newContainer = null;
indexContainer newContainer = null;
try {
this.wordCounter++;
this.wordHash = (String) importWordHashIterator.next();

@ -3,9 +3,9 @@ package de.anomic.plasma.dbImport;
import java.io.File;
import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndexAssortment;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
public class plasmaWordIndexAssortmentImporter extends AbstractImporter implements dbImporter{
@ -99,7 +99,7 @@ public class plasmaWordIndexAssortmentImporter extends AbstractImporter implemen
String hash = new String(row[0]);
// creating an word entry container
plasmaWordIndexEntryContainer container;
indexContainer container;
try {
container = this.assortmentFile.row2container(hash, row);
} catch (NullPointerException e) {

@ -46,6 +46,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.serverCodings;
@ -186,7 +187,7 @@ public class plasmaDHTChunk {
String nexthash = "";
try {
Iterator wordHashIterator = wordIndex.wordHashSet(hash, resourceLevel, true, maxcount).iterator();
plasmaWordIndexEntryContainer indexContainer;
indexContainer indexContainer;
Iterator urlIter;
plasmaWordIndexEntryInstance indexEntry;
plasmaCrawlLURL.Entry lurl;

@ -51,6 +51,7 @@ import de.anomic.kelondro.kelondroException;
import de.anomic.server.logging.serverLog;
import de.anomic.server.serverInstantThread;
import de.anomic.yacy.yacySearch;
import de.anomic.index.indexContainer;
public final class plasmaSearchEvent extends Thread implements Runnable {
@ -64,7 +65,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private plasmaWordIndex wordIndex;
private plasmaCrawlLURL urlStore;
private plasmaSnippetCache snippetCache;
private plasmaWordIndexEntryContainer rcLocal, rcGlobal; // caches for results
private indexContainer rcLocal, rcGlobal; // caches for results
private int rcGlobalCount;
private plasmaSearchTimingProfile profileLocal, profileGlobal;
private yacySearch[] searchThreads;

@ -58,6 +58,7 @@ import java.util.TreeSet;
import java.net.URL;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexRI;
@ -158,8 +159,8 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
}
}
public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase) {
plasmaWordIndexEntryContainer c;
public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase) {
indexContainer c;
if ((c = ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) == null) {
if (!dhtCase) flushControl();
return null;
@ -167,8 +168,8 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
return c;
}
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean dhtCase) {
plasmaWordIndexEntryContainer added = ramCache.addEntries(entries, updateTime, dhtCase);
public indexContainer addEntries(indexContainer entries, long updateTime, boolean dhtCase) {
indexContainer added = ramCache.addEntries(entries, updateTime, dhtCase);
// force flush
if (!dhtCase) flushControl();
@ -192,9 +193,9 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
}
private synchronized void flushCache(String wordHash) {
plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash);
indexContainer c = ramCache.deleteContainer(wordHash);
if (c != null) {
plasmaWordIndexEntryContainer feedback = assortmentCluster.addEntries(c, c.updated(), false);
indexContainer feedback = assortmentCluster.addEntries(c, c.updated(), false);
if (feedback != null) {
backend.addEntries(feedback, System.currentTimeMillis(), true);
}
@ -277,7 +278,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
return condenser.RESULT_SIMI_WORDS;
}
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis();
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);
@ -307,7 +308,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
// retrieve entities that belong to the hashes
HashSet containers = new HashSet();
String singleHash;
plasmaWordIndexEntryContainer singleContainer;
indexContainer singleContainer;
Iterator i = wordHashes.iterator();
long start = System.currentTimeMillis();
long remaining;
@ -356,8 +357,9 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
backend.close(10);
}
public synchronized plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash);
public synchronized indexContainer deleteContainer(String wordHash) {
indexContainer c = ramCache.deleteContainer(wordHash);
if (c == null) c = new plasmaWordIndexEntryContainer(wordHash);
c.add(assortmentCluster.deleteContainer(wordHash, -1), -1);
c.add(backend.deleteContainer(wordHash), -1);
return c;
@ -369,7 +371,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
synchronized (this) {
removed = ramCache.removeEntries(wordHash, urlHashes, deleteComplete);
if (removed == urlHashes.length) return removed;
plasmaWordIndexEntryContainer container = assortmentCluster.deleteContainer(wordHash, -1);
indexContainer container = assortmentCluster.deleteContainer(wordHash, -1);
if (container != null) {
removed += container.removeEntries(wordHash, urlHashes, deleteComplete);
if (container.size() != 0) {
@ -506,7 +508,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
} else {
// take out all words from the assortment to see if it fits
// together with the extracted assortment
plasmaWordIndexEntryContainer container = assortmentCluster.deleteContainer(wordhash, -1);
indexContainer container = assortmentCluster.deleteContainer(wordhash, -1);
if (size + container.size() > assortmentCluster.clusterCapacity) {
// this will also be too big to integrate, add to entity
entity.addEntries(container);
@ -567,7 +569,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public void run() {
serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread started");
String wordHash = "";
plasmaWordIndexEntryContainer wordContainer = null;
indexContainer wordContainer = null;
plasmaWordIndexEntryInstance entry = null;
URL url = null;
HashSet urlHashs = new HashSet();

@ -56,6 +56,7 @@ import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntryAttribute;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRecords;
@ -125,7 +126,7 @@ public final class plasmaWordIndexAssortment {
if (log != null) log.logConfig("Created new Assortment Database, width " + assortmentLength + ", " + bufferkb + "kb buffer");
}
public void store(plasmaWordIndexEntryContainer newContainer) {
public void store(indexContainer newContainer) {
// stores a word index to assortment database
// this throws an exception if the word hash already existed
//log.logDebug("storeAssortment: wordHash=" + wordHash + ", urlHash=" + entry.getUrlHash() + ", time=" + creationTime);
@ -154,7 +155,7 @@ public final class plasmaWordIndexAssortment {
if (oldrow != null) throw new RuntimeException("Store to assortment ambiguous");
}
public plasmaWordIndexEntryContainer remove(String wordHash) {
public indexContainer remove(String wordHash) {
// deletes a word index from assortment database
// and returns the content record
byte[][] row = null;
@ -191,7 +192,7 @@ public final class plasmaWordIndexAssortment {
}
}
public plasmaWordIndexEntryContainer get(String wordHash) {
public indexContainer get(String wordHash) {
// gets a word index from assortment database
// and returns the content record
byte[][] row = null;
@ -211,7 +212,7 @@ public final class plasmaWordIndexAssortment {
return row2container(wordHash, row);
}
public plasmaWordIndexEntryContainer row2container(String wordHash, byte[][] row) {
public indexContainer row2container(String wordHash, byte[][] row) {
if (row == null) return null;
final long updateTime = kelondroRecords.bytes2long(row[2]);
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);

@ -51,6 +51,7 @@ import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRI;
import de.anomic.index.indexAbstractRI;
import de.anomic.kelondro.kelondroNaturalOrder;
@ -97,13 +98,13 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
}
}
private plasmaWordIndexEntryContainer storeSingular(plasmaWordIndexEntryContainer newContainer) {
private indexContainer storeSingular(indexContainer newContainer) {
// this tries to store the record. If the record does not fit, or a same hash already
// exists and would not fit together with the new record, then the record is deleted from
// the assortmen(s) and returned together with the newRecord.
// if storage was successful, NULL is returned.
if (newContainer.size() > clusterCount) return newContainer; // it will not fit
plasmaWordIndexEntryContainer buffer;
indexContainer buffer;
while ((buffer = assortments[newContainer.size() - 1].remove(newContainer.wordHash())) != null) {
if (newContainer.add(buffer, -1) == 0) return newContainer; // security check; othervise this loop does not terminate
if (newContainer.size() > clusterCount) return newContainer; // it will not fit
@ -114,14 +115,14 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
return null;
}
private void storeForced(plasmaWordIndexEntryContainer newContainer) {
private void storeForced(indexContainer newContainer) {
// this stores the record and overwrites an existing record.
// this is safe if we can be shure that the record does not exist before.
if ((newContainer == null) || (newContainer.size() == 0) || (newContainer.size() > clusterCount)) return; // it will not fit
assortments[newContainer.size() - 1].store(newContainer);
}
private void storeStretched(plasmaWordIndexEntryContainer newContainer) {
private void storeStretched(indexContainer newContainer) {
// this stores the record and stretches the storage over
// all the assortments that are necessary to fit in the record
// IMPORTANT: it must be ensured that the wordHash does not exist in the cluster before
@ -159,7 +160,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
}
}
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer newContainer, long creationTime, boolean dhtCase) {
public indexContainer addEntries(indexContainer newContainer, long creationTime, boolean dhtCase) {
// this is called by the index ram cache flush process
// it returnes NULL if the storage was successful
// it returnes a new container if the given container cannot be stored
@ -209,13 +210,13 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
return null;
}
public plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
public indexContainer deleteContainer(String wordHash) {
return deleteContainer(wordHash, -1);
}
public plasmaWordIndexEntryContainer deleteContainer(String wordHash, long maxTime) {
public indexContainer deleteContainer(String wordHash, long maxTime) {
// removes all records from all the assortments and return them
plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
indexContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
long remainingTime;
for (int i = 0; i < clusterCount; i++) {
@ -228,7 +229,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
}
public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete) {
plasmaWordIndexEntryContainer c = deleteContainer(wordHash, -1);
indexContainer c = deleteContainer(wordHash, -1);
int b = c.size();
c.removeEntries(wordHash, referenceHashes, false);
if (c.size() != 0) {
@ -237,9 +238,9 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
return b - c.size();
}
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
// collect all records from all the assortments and return them
plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
indexContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
long remainingTime;
for (int i = 0; i < clusterCount; i++) {

@ -50,6 +50,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexRI;
import de.anomic.index.indexAbstractRI;
@ -360,11 +361,11 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
return (((long) intTime) * (long) 1000) + startTime;
}
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime_dummy) {
public indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxtime_dummy) {
return (plasmaWordIndexEntryContainer) wCache.get(wordHash);
}
public plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
public indexContainer deleteContainer(String wordHash) {
// returns the index that had been deleted
synchronized (wCache) {
plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) wCache.remove(wordHash);
@ -416,7 +417,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
return delCount;
}
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean dhtCase) {
public indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) {
// this puts the entries into the cache, not into the assortment directly
int added = 0;
@ -443,7 +444,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
return null;
}
public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
if (dhtCase) synchronized (kCache) {
// put container into kCache
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);

@ -49,6 +49,7 @@ import java.util.Comparator;
import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRI;
import de.anomic.index.indexAbstractRI;
import de.anomic.kelondro.kelondroNaturalOrder;
@ -187,7 +188,7 @@ public class plasmaWordIndexClassicDB extends indexAbstractRI implements indexRI
}
}
public synchronized plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
public synchronized indexContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis();
if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute
if (plasmaWordIndexEntity.wordHash2path(databaseRoot, wordHash).exists()) {
@ -214,7 +215,7 @@ public class plasmaWordIndexClassicDB extends indexAbstractRI implements indexRI
if (f.exists()) return f.lastModified(); else return -1;
}
public plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
public indexContainer deleteContainer(String wordHash) {
plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash);
return new plasmaWordIndexEntryContainer(wordHash);
}
@ -240,7 +241,7 @@ public class plasmaWordIndexClassicDB extends indexAbstractRI implements indexRI
}
}
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) {
public indexContainer addEntries(indexContainer container, long creationTime, boolean highPriority) {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache
if ((container == null) || (container.size() == 0)) return null;

@ -48,6 +48,7 @@ import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroTree;
import de.anomic.kelondro.kelondroException;
@ -157,7 +158,7 @@ public final class plasmaWordIndexEntity {
return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedStringForm().getBytes()) == null);
}
public int addEntries(plasmaWordIndexEntryContainer container) throws IOException {
public int addEntries(indexContainer container) throws IOException {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache
if ((container == null) || (container.size() == 0)) return 0;
@ -178,6 +179,7 @@ public final class plasmaWordIndexEntity {
}
public boolean deleteComplete() {
if (theIndex == null) return false;
try { theIndex.close(); } catch (IOException e) {}
// remove file
boolean success = theLocation.delete();

@ -57,12 +57,14 @@ import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
import de.anomic.index.indexContainer;
import de.anomic.index.indexAbstractContainer;
import de.anomic.index.indexEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
public final class plasmaWordIndexEntryContainer {
public final class plasmaWordIndexEntryContainer extends indexAbstractContainer implements indexContainer {
private String wordHash;
private final TreeMap container; // urlHash/plasmaWordIndexEntry - Mapping
@ -97,6 +99,10 @@ public final class plasmaWordIndexEntryContainer {
return updateTime;
}
public kelondroOrder order() {
return ordering;
}
public String wordHash() {
return wordHash;
}
@ -117,7 +123,7 @@ public final class plasmaWordIndexEntryContainer {
return c;
}
public int add(plasmaWordIndexEntryContainer c, long maxTime) {
public int add(indexContainer c, long maxTime) {
// returns the number of new elements
long startTime = System.currentTimeMillis();
if (c == null) return 0;
@ -128,7 +134,7 @@ public final class plasmaWordIndexEntryContainer {
if (addi((plasmaWordIndexEntryInstance) i.next())) x++;
} catch (ConcurrentModificationException e) {}
}
this.updateTime = java.lang.Math.max(this.updateTime, c.updateTime);
this.updateTime = java.lang.Math.max(this.updateTime, c.updated());
return x;
}
@ -146,15 +152,15 @@ public final class plasmaWordIndexEntryContainer {
return container.containsKey(urlHash);
}
public plasmaWordIndexEntryInstance get(String urlHash) {
public indexEntry get(String urlHash) {
return (plasmaWordIndexEntryInstance) container.get(urlHash);
}
public plasmaWordIndexEntryInstance[] getEntryArray() {
public indexEntry[] getEntryArray() {
return (plasmaWordIndexEntryInstance[]) container.values().toArray();
}
public plasmaWordIndexEntryInstance remove(String urlHash) {
public indexEntry remove(String urlHash) {
return (plasmaWordIndexEntryInstance) container.remove(urlHash);
}
@ -178,7 +184,7 @@ public final class plasmaWordIndexEntryContainer {
return (int) kelondroBase64Order.enhancedCoder.decodeLong(this.wordHash.substring(0, 4));
}
public static plasmaWordIndexEntryContainer joinContainer(Set containers, long time, int maxDistance) {
public static indexContainer joinContainer(Set containers, long time, int maxDistance) {
long stamp = System.currentTimeMillis();
@ -205,13 +211,13 @@ public final class plasmaWordIndexEntryContainer {
// the map now holds the search results in order of number of hits per word
// we now must pairwise build up a conjunction of these sets
Long k = (Long) map.firstKey(); // the smallest, which means, the one with the least entries
plasmaWordIndexEntryContainer searchA, searchB, searchResult = (plasmaWordIndexEntryContainer) map.remove(k);
indexContainer searchA, searchB, searchResult = (indexContainer) map.remove(k);
while ((map.size() > 0) && (searchResult.size() > 0)) {
// take the first element of map which is a result and combine it with result
k = (Long) map.firstKey(); // the next smallest...
time -= (System.currentTimeMillis() - stamp); stamp = System.currentTimeMillis();
searchA = searchResult;
searchB = (plasmaWordIndexEntryContainer) map.remove(k);
searchB = (indexContainer) map.remove(k);
searchResult = plasmaWordIndexEntryContainer.joinConstructive(searchA, searchB, 2 * time / (map.size() + 1), maxDistance);
// free resources
searchA = null;
@ -230,7 +236,7 @@ public final class plasmaWordIndexEntryContainer {
return l;
}
public static plasmaWordIndexEntryContainer joinConstructive(plasmaWordIndexEntryContainer i1, plasmaWordIndexEntryContainer i2, long time, int maxDistance) {
public static indexContainer joinConstructive(indexContainer i1, indexContainer i2, long time, int maxDistance) {
if ((i1 == null) || (i2 == null)) return null;
if ((i1.size() == 0) || (i2.size() == 0)) return new plasmaWordIndexEntryContainer(null);
@ -251,14 +257,14 @@ public final class plasmaWordIndexEntryContainer {
}
}
private static plasmaWordIndexEntryContainer joinConstructiveByTest(plasmaWordIndexEntryContainer small, plasmaWordIndexEntryContainer large, long time, int maxDistance) {
private static indexContainer joinConstructiveByTest(indexContainer small, indexContainer large, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY TEST");
plasmaWordIndexEntryContainer conj = new plasmaWordIndexEntryContainer(null); // start with empty search result
Iterator se = small.entries();
plasmaWordIndexEntryInstance ie0, ie1;
indexEntry ie0, ie1;
long stamp = System.currentTimeMillis();
while ((se.hasNext()) && ((System.currentTimeMillis() - stamp) < time)) {
ie0 = (plasmaWordIndexEntryInstance) se.next();
ie0 = (indexEntry) se.next();
ie1 = large.get(ie0.getUrlHash());
if (ie1 != null) {
// this is a hit. Calculate word distance:
@ -269,10 +275,10 @@ public final class plasmaWordIndexEntryContainer {
return conj;
}
private static plasmaWordIndexEntryContainer joinConstructiveByEnumeration(plasmaWordIndexEntryContainer i1, plasmaWordIndexEntryContainer i2, long time, int maxDistance) {
private static indexContainer joinConstructiveByEnumeration(indexContainer i1, indexContainer i2, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY ENUMERATION");
plasmaWordIndexEntryContainer conj = new plasmaWordIndexEntryContainer(null); // start with empty search result
if (!(i1.ordering.signature().equals(i2.ordering.signature()))) return conj; // ordering must be equal
if (!(i1.order().signature().equals(i2.order().signature()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();
Iterator e2 = i2.entries();
int c;
@ -284,7 +290,7 @@ public final class plasmaWordIndexEntryContainer {
long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
c = i1.ordering.compare(ie1.getUrlHash(), ie2.getUrlHash());
c = i1.order().compare(ie1.getUrlHash(), ie2.getUrlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = (plasmaWordIndexEntryInstance) e1.next(); else break;

@ -53,6 +53,7 @@ import java.util.Iterator;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpc;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntryAttribute;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.plasma.plasmaCrawlLURL;
@ -363,7 +364,7 @@ public final class yacyClient {
boolean global,
yacySeed targetPeer,
plasmaCrawlLURL urlManager,
plasmaWordIndexEntryContainer containerCache,
indexContainer containerCache,
plasmaURLPattern blacklist,
plasmaSnippetCache snippets,
plasmaSearchTimingProfile timingProfile,
@ -877,7 +878,7 @@ public final class yacyClient {
}
}
public static String transferIndex(yacySeed targetSeed, plasmaWordIndexEntryContainer[] indexes, HashMap urlCache, boolean gzipBody, int timeout) {
public static String transferIndex(yacySeed targetSeed, indexContainer[] indexes, HashMap urlCache, boolean gzipBody, int timeout) {
// check if we got all necessary urls in the urlCache (only for debugging)
Iterator eenum;
@ -935,7 +936,7 @@ public final class yacyClient {
return null;
}
private static HashMap transferRWI(yacySeed targetSeed, plasmaWordIndexEntryContainer[] indexes, boolean gzipBody, int timeout) {
private static HashMap transferRWI(yacySeed targetSeed, indexContainer[] indexes, boolean gzipBody, int timeout) {
final String address = targetSeed.getAddress();
if (address == null) { return null; }

@ -48,13 +48,13 @@ import java.util.Iterator;
import java.util.Set;
import java.util.HashMap;
import de.anomic.index.indexContainer;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.server.logging.serverLog;
public class yacySearch extends Thread {
@ -62,7 +62,7 @@ public class yacySearch extends Thread {
final private Set wordhashes;
final private boolean global;
final private plasmaCrawlLURL urlManager;
final private plasmaWordIndexEntryContainer containerCache;
final private indexContainer containerCache;
final private plasmaURLPattern blacklist;
final private plasmaSnippetCache snippetCache;
final private yacySeed targetPeer;
@ -73,7 +73,7 @@ public class yacySearch extends Thread {
final private String prefer, filter;
public yacySearch(Set wordhashes, String prefer, String filter, int maxDistance, boolean global, yacySeed targetPeer,
plasmaCrawlLURL urlManager, plasmaWordIndexEntryContainer containerCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaCrawlLURL urlManager, indexContainer containerCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile) {
super("yacySearch_" + targetPeer.getName());
this.wordhashes = wordhashes;
@ -181,7 +181,7 @@ public class yacySearch extends Thread {
return result;
}
public static yacySearch[] searchHashes(Set wordhashes, String prefer, String filter, int maxDist, plasmaCrawlLURL urlManager, plasmaWordIndexEntryContainer containerCache,
public static yacySearch[] searchHashes(Set wordhashes, String prefer, String filter, int maxDist, plasmaCrawlLURL urlManager, indexContainer containerCache,
int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile) {
// check own peer status

@ -71,6 +71,7 @@ import de.anomic.http.httpc;
import de.anomic.http.httpd;
import de.anomic.http.httpdFileHandler;
import de.anomic.http.httpdProxyHandler;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroDyn;
@ -85,7 +86,6 @@ import de.anomic.plasma.plasmaWordIndexAssortmentCluster;
import de.anomic.plasma.plasmaWordIndexClassicDB;
import de.anomic.plasma.plasmaWordIndexEntity;
import de.anomic.plasma.plasmaWordIndexEntryInstance;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.server.serverFileUtils;
@ -754,7 +754,7 @@ public final class yacy {
byte[][] row = (byte[][]) contentIter.next();
String hash = new String(row[0]);
plasmaWordIndexEntryContainer container = assortmentFile.row2container(hash, row);
indexContainer container = assortmentFile.row2container(hash, row);
wordEntryCount += container.size();
// importing entity container to home db
@ -848,7 +848,7 @@ public final class yacy {
// testing if import process was aborted
if (Thread.interrupted()) break;
plasmaWordIndexEntryContainer newContainer;
indexContainer newContainer;
try {
wordCounter++;
wordHash = (String) importWordHashIterator.next();
@ -961,7 +961,7 @@ public final class yacy {
String wordChunkStartHash = "------------", wordChunkEndHash;
while (wordHashIterator.hasNext()) {
plasmaWordIndexEntryContainer wordIdxContainer = null;
indexContainer wordIdxContainer = null;
try {
wordCounter++;
wordhash = (String) wordHashIterator.next();

Loading…
Cancel
Save