- removed 'deleteComplete' flag; this was used especially for WORDS indexes
- shifted methods from plasmaSwitchboard to plasmaWordIndex

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3051 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent fbe1ee402b
commit 9a85f5abc3

@ -117,7 +117,7 @@ public class DetailedSearch {
return prop; return prop;
} }
final String delHash = post.get("deleteref", ""); final String delHash = post.get("deleteref", "");
sb.removeReferences(delHash, query); sb.wordIndex.removeReferences(query, delHash);
} }
// prepare search order // prepare search order

@ -155,7 +155,7 @@ public class IndexControl_p {
if (delurl || delurlref) { if (delurl || delurlref) {
// generate an urlx array // generate an urlx array
indexContainer index = null; indexContainer index = null;
index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); index = switchboard.wordIndex.getContainer(keyhash, null, -1);
Iterator en = index.entries(); Iterator en = index.entries();
int i = 0; int i = 0;
urlx = new String[index.size()]; urlx = new String[index.size()];
@ -194,7 +194,7 @@ public class IndexControl_p {
} }
Set urlHashes = new HashSet(); Set urlHashes = new HashSet();
for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]); for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]);
switchboard.wordIndex.removeEntries(keyhash, urlHashes, true); switchboard.wordIndex.removeEntries(keyhash, urlHashes);
// this shall lead to a presentation of the list; so handle that the remaining program // this shall lead to a presentation of the list; so handle that the remaining program
// thinks that it was called for a list presentation // thinks that it was called for a list presentation
post.remove("keyhashdelete"); post.remove("keyhashdelete");
@ -272,7 +272,7 @@ public class IndexControl_p {
indexContainer index; indexContainer index;
String result; String result;
long starttime = System.currentTimeMillis(); long starttime = System.currentTimeMillis();
index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); index = switchboard.wordIndex.getContainer(keyhash, null, -1);
// built urlCache // built urlCache
Iterator urlIter = index.entries(); Iterator urlIter = index.entries();
HashMap knownURLs = new HashMap(); HashMap knownURLs = new HashMap();
@ -451,7 +451,7 @@ public class IndexControl_p {
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
indexContainer index = null; indexContainer index = null;
try { try {
index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); index = switchboard.wordIndex.getContainer(keyhash, null, -1);
prop.put("genUrlList_keyHash", keyhash); prop.put("genUrlList_keyHash", keyhash);

@ -399,7 +399,7 @@ public class dir {
Map.Entry entry; Map.Entry entry;
while (words.hasNext()) { while (words.hasNext()) {
entry = (Map.Entry) words.next(); entry = (Map.Entry) words.next();
switchboard.wordIndex.removeEntry(plasmaCondenser.word2hash((String) entry.getKey()), urlhash, true); switchboard.wordIndex.removeEntry(plasmaCondenser.word2hash((String) entry.getKey()), urlhash);
} }
switchboard.wordIndex.loadedURL.remove(urlhash); switchboard.wordIndex.loadedURL.remove(urlhash);
} catch (Exception e) { } catch (Exception e) {

@ -53,7 +53,7 @@ public class snippet {
} else { } else {
String error = snippet.getError(); String error = snippet.getError();
if ((remove) && (error.equals("no matching snippet found"))) { if ((remove) && (error.equals("no matching snippet found"))) {
switchboard.removeReferences(plasmaURL.urlHash(url), query); switchboard.wordIndex.removeReferences(query, plasmaURL.urlHash(url));
} }
prop.put("text", error); prop.put("text", error);
} }

@ -184,7 +184,7 @@ public class yacysearch {
// delete the index entry locally // delete the index entry locally
final String delHash = post.get("deleteref", ""); // urlhash final String delHash = post.get("deleteref", ""); // urlhash
sb.removeReferences(delHash, query); sb.wordIndex.removeReferences(query, delHash);
// make new news message with negative voting // make new news message with negative voting
HashMap map = new HashMap(); HashMap map = new HashMap();

@ -80,7 +80,7 @@ public class indexCachedRI implements indexRI {
} }
public long getUpdateTime(String wordHash) { public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1); indexContainer entries = getContainer(wordHash, null, -1);
if (entries == null) return 0; if (entries == null) return 0;
return entries.updated(); return entries.updated();
} }
@ -139,25 +139,25 @@ public class indexCachedRI implements indexRI {
busyCacheFlush = false; busyCacheFlush = false;
} }
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { public indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
// get from cache // get from cache
indexContainer container = riExtern.getContainer(wordHash, urlselection, true, maxTime); indexContainer container = riExtern.getContainer(wordHash, urlselection, maxTime);
if (container == null) { if (container == null) {
container = riIntern.getContainer(wordHash, urlselection, true, maxTime); container = riIntern.getContainer(wordHash, urlselection, maxTime);
} else { } else {
container.add(riIntern.getContainer(wordHash, urlselection, true, maxTime), maxTime); container.add(riIntern.getContainer(wordHash, urlselection, maxTime), maxTime);
} }
// get from collection index // get from collection index
if (container == null) { if (container == null) {
container = backend.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime); container = backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else { } else {
container.add(backend.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), maxTime); container.add(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), maxTime);
} }
return container; return container;
} }
public Map getContainers(Set wordHashes, Set urlselection, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) { public Map getContainers(Set wordHashes, Set urlselection, boolean interruptIfEmpty, long maxTime) {
// return map of wordhash:indexContainer // return map of wordhash:indexContainer
// retrieve entities that belong to the hashes // retrieve entities that belong to the hashes
@ -177,7 +177,7 @@ public class indexCachedRI implements indexRI {
singleHash = (String) i.next(); singleHash = (String) i.next();
// retrieve index // retrieve index
singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); singleContainer = getContainer(singleHash, urlselection, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size()));
// check result // check result
if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap(); if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap();
@ -213,27 +213,27 @@ public class indexCachedRI implements indexRI {
return c; return c;
} }
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public boolean removeEntry(String wordHash, String urlHash) {
boolean removed = false; boolean removed = false;
removed = removed | (riIntern.removeEntry(wordHash, urlHash, deleteComplete)); removed = removed | (riIntern.removeEntry(wordHash, urlHash));
removed = removed | (riExtern.removeEntry(wordHash, urlHash, deleteComplete)); removed = removed | (riExtern.removeEntry(wordHash, urlHash));
removed = removed | (backend.removeEntry(wordHash, urlHash, deleteComplete)); removed = removed | (backend.removeEntry(wordHash, urlHash));
return removed; return removed;
} }
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public int removeEntries(String wordHash, Set urlHashes) {
int removed = 0; int removed = 0;
removed += riIntern.removeEntries(wordHash, urlHashes, deleteComplete); removed += riIntern.removeEntries(wordHash, urlHashes);
removed += riExtern.removeEntries(wordHash, urlHashes, deleteComplete); removed += riExtern.removeEntries(wordHash, urlHashes);
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete); removed += backend.removeEntries(wordHash, urlHashes);
return removed; return removed;
} }
public String removeEntriesExpl(String wordHash, Set urlHashes, boolean deleteComplete) { public String removeEntriesExpl(String wordHash, Set urlHashes) {
String removed = ""; String removed = "";
removed += riIntern.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; removed += riIntern.removeEntries(wordHash, urlHashes) + ", ";
removed += riExtern.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; removed += riExtern.removeEntries(wordHash, urlHashes) + ", ";
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; removed += backend.removeEntries(wordHash, urlHashes) + ", ";
return removed; return removed;
} }

@ -61,7 +61,7 @@ public class indexCollectionRI implements indexRI {
} }
public long getUpdateTime(String wordHash) { public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1); indexContainer entries = getContainer(wordHash, null, -1);
if (entries == null) return 0; if (entries == null) return 0;
return entries.updated(); return entries.updated();
} }
@ -113,9 +113,9 @@ public class indexCollectionRI implements indexRI {
} }
public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) { public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime) {
try { try {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty); kelondroRowSet collection = collectionIndex.get(wordHash.getBytes());
if (collection != null) collection.select(urlselection); if (collection != null) collection.select(urlselection);
if ((collection == null) || (collection.size() == 0)) return null; if ((collection == null) || (collection.size() == 0)) return null;
return new indexContainer(wordHash, collection); return new indexContainer(wordHash, collection);
@ -134,15 +134,15 @@ public class indexCollectionRI implements indexRI {
} }
} }
public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public synchronized boolean removeEntry(String wordHash, String urlHash) {
HashSet hs = new HashSet(); HashSet hs = new HashSet();
hs.add(urlHash.getBytes()); hs.add(urlHash.getBytes());
return removeEntries(wordHash, hs, deleteComplete) == 1; return removeEntries(wordHash, hs) == 1;
} }
public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public synchronized int removeEntries(String wordHash, Set urlHashes) {
try { try {
return collectionIndex.remove(wordHash.getBytes(), urlHashes, deleteComplete); return collectionIndex.remove(wordHash.getBytes(), urlHashes);
} catch (kelondroOutOfLimitsException e) { } catch (kelondroOutOfLimitsException e) {
e.printStackTrace(); e.printStackTrace();
return 0; return 0;

@ -145,12 +145,12 @@ public class indexContainer extends kelondroRowSet {
return new indexRWIEntryNew(entry); return new indexRWIEntryNew(entry);
} }
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public boolean removeEntry(String wordHash, String urlHash) {
if (!wordHash.equals(this.wordHash)) return false; if (!wordHash.equals(this.wordHash)) return false;
return remove(urlHash) != null; return remove(urlHash) != null;
} }
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public int removeEntries(String wordHash, Set urlHashes) {
if (!wordHash.equals(this.wordHash)) return 0; if (!wordHash.equals(this.wordHash)) return 0;
int count = 0; int count = 0;
Iterator i = urlHashes.iterator(); Iterator i = urlHashes.iterator();

@ -98,7 +98,7 @@ public final class indexRAMRI implements indexRI {
public synchronized long getUpdateTime(String wordHash) { public synchronized long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1); indexContainer entries = getContainer(wordHash, null, -1);
if (entries == null) return 0; if (entries == null) return 0;
return entries.updated(); return entries.updated();
} }
@ -334,7 +334,7 @@ public final class indexRAMRI implements indexRI {
return (((long) intTime) * (long) 1000) + initTime; return (((long) intTime) * (long) 1000) + initTime;
} }
public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) { public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime_dummy) {
// retrieve container // retrieve container
indexContainer container = (indexContainer) cache.get(wordHash); indexContainer container = (indexContainer) cache.get(wordHash);
@ -359,11 +359,11 @@ public final class indexRAMRI implements indexRI {
return container; return container;
} }
public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public synchronized boolean removeEntry(String wordHash, String urlHash) {
indexContainer c = (indexContainer) cache.get(wordHash); indexContainer c = (indexContainer) cache.get(wordHash);
if ((c != null) && (c.removeEntry(wordHash, urlHash, deleteComplete))) { if ((c != null) && (c.removeEntry(wordHash, urlHash))) {
// removal successful // removal successful
if ((c.size() == 0) && (deleteComplete)) { if (c.size() == 0) {
deleteContainer(wordHash); deleteContainer(wordHash);
} else { } else {
cache.put(wordHash, c); cache.put(wordHash, c);
@ -375,13 +375,13 @@ public final class indexRAMRI implements indexRI {
return false; return false;
} }
public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public synchronized int removeEntries(String wordHash, Set urlHashes) {
if (urlHashes.size() == 0) return 0; if (urlHashes.size() == 0) return 0;
indexContainer c = (indexContainer) cache.get(wordHash); indexContainer c = (indexContainer) cache.get(wordHash);
int count; int count;
if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes, deleteComplete)) > 0)) { if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes)) > 0)) {
// removal successful // removal successful
if ((c.size() == 0) && (deleteComplete)) { if (c.size() == 0) {
deleteContainer(wordHash); deleteContainer(wordHash);
} else { } else {
cache.put(wordHash, c); cache.put(wordHash, c);

@ -39,11 +39,11 @@ public interface indexRI {
public long getUpdateTime(String wordHash); public long getUpdateTime(String wordHash);
public int indexSize(String wordHash); public int indexSize(String wordHash);
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime); public indexContainer getContainer(String wordHash, Set urlselection, long maxtime);
public indexContainer deleteContainer(String wordHash); public indexContainer deleteContainer(String wordHash);
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete); public boolean removeEntry(String wordHash, String urlHash);
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete); public int removeEntries(String wordHash, Set urlHashes);
public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtCase); public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtCase);
public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase); public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase);

@ -262,7 +262,7 @@ public class kelondroCollectionIndex {
putmergeremove(key, collection, true, null); putmergeremove(key, collection, true, null);
} }
public synchronized int remove(byte[] key, Set removekeys, boolean deletecomplete) throws IOException, kelondroOutOfLimitsException { public synchronized int remove(byte[] key, Set removekeys) throws IOException, kelondroOutOfLimitsException {
return putmergeremove(key, null, false, removekeys); return putmergeremove(key, null, false, removekeys);
} }
@ -406,7 +406,7 @@ public class kelondroCollectionIndex {
return (int) indexrow.getColLong(idx_col_chunkcount); return (int) indexrow.getColLong(idx_col_chunkcount);
} }
public synchronized kelondroRowSet get(byte[] key, boolean deleteIfEmpty) throws IOException { public synchronized kelondroRowSet get(byte[] key) throws IOException {
// find an entry, if one exists // find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key); kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null; if (indexrow == null) return null;

@ -71,7 +71,6 @@ public class plasmaDHTChunk {
private plasmaWordIndex wordIndex; private plasmaWordIndex wordIndex;
private serverLog log; private serverLog log;
private plasmaCrawlLURL lurls;
private int status = chunkStatus_UNDEFINED; private int status = chunkStatus_UNDEFINED;
private String startPointHash; private String startPointHash;
@ -124,11 +123,10 @@ public class plasmaDHTChunk {
return this.status; return this.status;
} }
public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL lurls, int minCount, int maxCount, int maxtime) { public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, int minCount, int maxCount, int maxtime) {
try { try {
this.log = log; this.log = log;
this.wordIndex = wordIndex; this.wordIndex = wordIndex;
this.lurls = lurls;
this.startPointHash = selectTransferStart(); this.startPointHash = selectTransferStart();
log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, this.startPointHash)); log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, this.startPointHash));
selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime); selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime);
@ -144,11 +142,10 @@ public class plasmaDHTChunk {
} }
} }
public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL lurls, int minCount, int maxCount, int maxtime, String startHash) { public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, int minCount, int maxCount, int maxtime, String startHash) {
try { try {
this.log = log; this.log = log;
this.wordIndex = wordIndex; this.wordIndex = wordIndex;
this.lurls = lurls;
log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, this.startPointHash)); log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, this.startPointHash));
selectTransferContainers(startHash, minCount, maxCount, maxtime); selectTransferContainers(startHash, minCount, maxCount, maxtime);
@ -233,12 +230,12 @@ public class plasmaDHTChunk {
urlIter.remove(); urlIter.remove();
continue; continue;
} }
lurl = lurls.load(iEntry.urlHash(), iEntry); lurl = wordIndex.loadedURL.load(iEntry.urlHash(), iEntry);
if ((lurl == null) || (lurl.comp().url() == null)) { if ((lurl == null) || (lurl.comp().url() == null)) {
//yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + iEntry.urlHash() + "' for word hash " + container.getWordHash()); //yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + iEntry.urlHash() + "' for word hash " + container.getWordHash());
notBoundCounter++; notBoundCounter++;
urlIter.remove(); urlIter.remove();
wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash(), true); wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash());
} else { } else {
urlCache.put(iEntry.urlHash(), lurl); urlCache.put(iEntry.urlHash(), lurl);
//yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + iEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash()); //yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + iEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash());
@ -302,7 +299,7 @@ public class plasmaDHTChunk {
urlHashes.add(iEntry.urlHash()); urlHashes.add(iEntry.urlHash());
} }
String wordHash = indexContainers[i].getWordHash(); String wordHash = indexContainers[i].getWordHash();
count = wordIndex.removeEntriesExpl(this.indexContainers[i].getWordHash(), urlHashes, true); count = wordIndex.removeEntriesExpl(this.indexContainers[i].getWordHash(), urlHashes);
if (log.isFine()) if (log.isFine())
log.logFine("Deleted partial index (" + c + " URLs) for word " + wordHash + "; " + this.wordIndex.indexSize(wordHash) + " entries left"); log.logFine("Deleted partial index (" + c + " URLs) for word " + wordHash + "; " + this.wordIndex.indexSize(wordHash) + " entries left");
this.indexContainers[i] = null; this.indexContainers[i] = null;

@ -169,7 +169,7 @@ public class plasmaDHTFlush extends Thread {
// selecting 500 words to transfer // selecting 500 words to transfer
this.status = "Running: Selecting chunk " + iteration; this.status = "Running: Selecting chunk " + iteration;
newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.sb.wordIndex.loadedURL, this.chunkSize/3*2, this.chunkSize, -1, this.startPointHash); newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.chunkSize/3*2, this.chunkSize, -1, this.startPointHash);
/* If we havn't selected a word chunk this could be because of /* If we havn't selected a word chunk this could be because of
* a) no words are left in the index * a) no words are left in the index

@ -421,7 +421,7 @@ public class plasmaRankingCRProcess {
if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6); if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6);
// update domain-specific entry // update domain-specific entry
rci_entry = rci.get(anchorDom.getBytes(), false); rci_entry = rci.get(anchorDom.getBytes());
if (rci_entry == null) rci_entry = new kelondroRowSet(RCI_coli, 0); if (rci_entry == null) rci_entry = new kelondroRowSet(RCI_coli, 0);
rci_entry.add(refereeDom.getBytes()); rci_entry.add(refereeDom.getBytes());

@ -401,7 +401,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
log.logFine("filtered out " + page.comp().url().toString()); log.logFine("filtered out " + page.comp().url().toString());
// filter out bad results // filter out bad results
Iterator wi = query.queryHashes.iterator(); Iterator wi = query.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash(), true); while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
} else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) { } else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addResult(page, preranking); if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addResult(page, preranking);
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addResult(page, preranking); else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addResult(page, preranking);

@ -119,7 +119,6 @@ import java.util.HashMap;
import java.util.Hashtable; import java.util.Hashtable;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.data.blogBoard; import de.anomic.data.blogBoard;
@ -1003,10 +1002,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
//return processStack.size() + cacheLoader.size() + noticeURL.stackSize(); //return processStack.size() + cacheLoader.size() + noticeURL.stackSize();
} }
public int cacheSizeMin() {
return wordIndex.size();
}
public void enQueue(Object job) { public void enQueue(Object job) {
if (!(job instanceof plasmaSwitchboardQueue.Entry)) { if (!(job instanceof plasmaSwitchboardQueue.Entry)) {
System.out.println("internal error at plasmaSwitchboard.enQueue: wrong job type"); System.out.println("internal error at plasmaSwitchboard.enQueue: wrong job type");
@ -1063,7 +1058,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
) { ) {
// generate new chunk // generate new chunk
int minChunkSize = (int) getConfigLong("indexDistribution.minChunkSize", 30); int minChunkSize = (int) getConfigLong("indexDistribution.minChunkSize", 30);
dhtTransferChunk = new plasmaDHTChunk(this.log, wordIndex, wordIndex.loadedURL, minChunkSize, dhtTransferIndexCount, 5000); dhtTransferChunk = new plasmaDHTChunk(this.log, wordIndex, minChunkSize, dhtTransferIndexCount, 5000);
doneSomething = true; doneSomething = true;
} }
@ -2115,7 +2110,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
filename = comp.url().getFile(); filename = comp.url().getFile();
if ((seed == null) || ((address = seed.getAddress()) == null)) { if ((seed == null) || ((address = seed.getAddress()) == null)) {
// seed is not known from here // seed is not known from here
removeReferences(urlentry.hash(), plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8")); wordIndex.removeReferences(plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8"), urlentry.hash());
wordIndex.loadedURL.remove(urlentry.hash()); // clean up wordIndex.loadedURL.remove(urlentry.hash()); // clean up
continue; // next result continue; // next result
} }
@ -2264,7 +2259,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// delete all word references // delete all word references
int count = 0; int count = 0;
if (witer != null) count = removeReferences(urlhash, witer); if (witer != null) count = wordIndex.removeReferences(witer, urlhash);
// finally delete the url entry itself // finally delete the url entry itself
wordIndex.loadedURL.remove(urlhash); wordIndex.loadedURL.remove(urlhash);
@ -2275,39 +2270,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (resourceContent != null) try { resourceContent.close(); } catch (Exception e) {/* ignore this */} if (resourceContent != null) try { resourceContent.close(); } catch (Exception e) {/* ignore this */}
} }
} }
public int removeReferences(URL url, Set words) {
return removeReferences(plasmaURL.urlHash(url), words);
}
public int removeReferences(final String urlhash, final Set words) {
// sequentially delete all word references
// returns number of deletions
Iterator iter = words.iterator();
String word;
int count = 0;
while (iter.hasNext()) {
word = (String) iter.next();
// delete the URL reference in this word index
if (wordIndex.removeEntry(plasmaCondenser.word2hash(word), urlhash, true)) count++;
}
return count;
}
public int removeReferences(final String urlhash, final Iterator wordStatPropIterator) {
// sequentially delete all word references
// returns number of deletions
Map.Entry entry;
String word;
int count = 0;
while (wordStatPropIterator.hasNext()) {
entry = (Map.Entry) wordStatPropIterator.next();
word = (String) entry.getKey();
// delete the URL reference in this word index
if (wordIndex.removeEntry(plasmaCondenser.word2hash(word), urlhash, true)) count++;
}
return count;
}
public int adminAuthenticated(httpHeader header) { public int adminAuthenticated(httpHeader header) {
@ -2402,7 +2364,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
return "no DHT distribution: not enough words - wordIndex.size() = " + wordIndex.size(); return "no DHT distribution: not enough words - wordIndex.size() = " + wordIndex.size();
} }
if ((getConfig("allowDistributeIndexWhileCrawling","false").equalsIgnoreCase("false")) && if ((getConfig("allowDistributeIndexWhileCrawling","false").equalsIgnoreCase("false")) &&
((noticeURL.stackSize() > 0) || (sbQueue.size() > 3))) { ((noticeURL.stackSize() > 0) /*|| (sbQueue.size() > 3)*/)) {
return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + noticeURL.stackSize() + ", sbQueue.size() = " + sbQueue.size(); return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + noticeURL.stackSize() + ", sbQueue.size() = " + sbQueue.size();
} }
return null; return null;

@ -139,7 +139,7 @@ public final class plasmaWordIndex implements indexRI {
} }
public long getUpdateTime(String wordHash) { public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1); indexContainer entries = getContainer(wordHash, null, -1);
if (entries == null) return 0; if (entries == null) return 0;
return entries.updated(); return entries.updated();
} }
@ -163,21 +163,6 @@ public final class plasmaWordIndex implements indexRI {
} }
} }
/*
private indexContainer convertOld2New(indexContainer entries) {
// convert old entries to new entries
indexContainer newentries = new indexContainer(entries.getWordHash(), indexRWIEntryNew.urlEntryRow);
Iterator i = entries.entries();
indexRWIEntryOld old;
while (i.hasNext()) {
old = (indexRWIEntryOld) i.next();
if (old.urlHash() != null) {
newentries.add(new indexRWIEntryNew(old));
}
}
return newentries;
}
*/
public void addEntries(indexContainer entries, long updateTime, boolean dhtInCase) { public void addEntries(indexContainer entries, long updateTime, boolean dhtInCase) {
assert (entries.row().objectsize() == indexRWIEntryNew.urlEntryRow.objectsize()); assert (entries.row().objectsize() == indexRWIEntryNew.urlEntryRow.objectsize());
@ -306,21 +291,21 @@ public final class plasmaWordIndex implements indexRI {
return condenser.RESULT_SIMI_WORDS; return condenser.RESULT_SIMI_WORDS;
} }
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { public indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
// get from cache // get from cache
indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, true, -1); indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, -1);
if (container == null) { if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection, true, -1); container = dhtInCache.getContainer(wordHash, urlselection, -1);
} else { } else {
container.add(dhtInCache.getContainer(wordHash, urlselection, true, -1), -1); container.add(dhtInCache.getContainer(wordHash, urlselection, -1), -1);
} }
// get from collection index // get from collection index
if (container == null) { if (container == null) {
container = collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime); container = collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else { } else {
container.add(collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1); container.add(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), -1);
} }
return container; return container;
} }
@ -345,7 +330,7 @@ public final class plasmaWordIndex implements indexRI {
singleHash = (String) i.next(); singleHash = (String) i.next();
// retrieve index // retrieve index
singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); singleContainer = getContainer(singleHash, urlselection, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size()));
// check result // check result
if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap(); if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap();
@ -383,39 +368,66 @@ public final class plasmaWordIndex implements indexRI {
return c; return c;
} }
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public boolean removeEntry(String wordHash, String urlHash) {
boolean removed = false; boolean removed = false;
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete)); removed = removed | (dhtInCache.removeEntry(wordHash, urlHash));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete)); removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash));
removed = removed | (collections.removeEntry(wordHash, urlHash, deleteComplete)); removed = removed | (collections.removeEntry(wordHash, urlHash));
return removed; return removed;
} }
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public int removeEntries(String wordHash, Set urlHashes) {
int removed = 0; int removed = 0;
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete); removed += dhtInCache.removeEntries(wordHash, urlHashes);
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete); removed += dhtOutCache.removeEntries(wordHash, urlHashes);
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete); removed += collections.removeEntries(wordHash, urlHashes);
return removed; return removed;
} }
public String removeEntriesExpl(String wordHash, Set urlHashes, boolean deleteComplete) { public String removeEntriesExpl(String wordHash, Set urlHashes) {
String removed = ""; String removed = "";
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; removed += dhtInCache.removeEntries(wordHash, urlHashes) + ", ";
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; removed += dhtOutCache.removeEntries(wordHash, urlHashes) + ", ";
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete); removed += collections.removeEntries(wordHash, urlHashes);
return removed; return removed;
} }
public static final int RL_RAMCACHE = 0; public int removeReferences(Set words, String urlhash) {
public static final int RL_COLLECTIONS = 1; // sequentially delete all word references
// returns number of deletions
Iterator iter = words.iterator();
String word;
int count = 0;
while (iter.hasNext()) {
word = (String) iter.next();
// delete the URL reference in this word index
if (removeEntry(plasmaCondenser.word2hash(word), urlhash)) count++;
}
return count;
}
public int removeReferences(Iterator wordStatPropIterator, String urlhash) {
// sequentially delete all word references
// returns number of deletions
Map.Entry entry;
String word;
int count = 0;
while (wordStatPropIterator.hasNext()) {
entry = (Map.Entry) wordStatPropIterator.next();
word = (String) entry.getKey();
// delete the URL reference in this word index
if (removeEntry(plasmaCondenser.word2hash(word), urlhash)) count++;
}
return count;
}
public int tryRemoveURLs(String urlHash) { public int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this // this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh // urlHash assigned. This can only work if the entry is really fresh
// and can be found in the RAM cache // and can be found in the RAM cache
// this returns the number of deletion that had been possible // this returns the number of deletion that had been possible
return dhtInCache.tryRemoveURLs(urlHash) | dhtOutCache.tryRemoveURLs(urlHash); int d = dhtInCache.tryRemoveURLs(urlHash);
if (d > 0) return d; else return dhtOutCache.tryRemoveURLs(urlHash);
} }
public TreeSet indexContainerSet(String startHash, boolean ram, boolean rot, int count) { public TreeSet indexContainerSet(String startHash, boolean ram, boolean rot, int count) {
@ -540,7 +552,7 @@ public final class plasmaWordIndex implements indexRI {
} }
} }
if (urlHashs.size() > 0) { if (urlHashs.size() > 0) {
int removed = removeEntries(container.getWordHash(), urlHashs, true); int removed = removeEntries(container.getWordHash(), urlHashs);
serverLog.logFine("INDEXCLEANER", container.getWordHash() + ": " + removed + " of " + container.size() + " URL-entries deleted"); serverLog.logFine("INDEXCLEANER", container.getWordHash() + ": " + removed + " of " + container.size() + " URL-entries deleted");
lastWordHash = container.getWordHash(); lastWordHash = container.getWordHash();
lastDeletionCounter = urlHashs.size(); lastDeletionCounter = urlHashs.size();

@ -64,12 +64,10 @@ public final class plasmaWordIndexFile {
private final String theWordHash; private final String theWordHash;
private kelondroTree theIndex; private kelondroTree theIndex;
private File theLocation; private File theLocation;
private boolean delete;
public plasmaWordIndexFile(File databaseRoot, String wordHash, boolean deleteIfEmpty) { public plasmaWordIndexFile(File databaseRoot, String wordHash) {
theWordHash = wordHash; theWordHash = wordHash;
theIndex = indexFile(databaseRoot, wordHash); theIndex = indexFile(databaseRoot, wordHash);
delete = deleteIfEmpty;
} }
public static boolean removePlasmaIndex(File databaseRoot, String wordHash) { public static boolean removePlasmaIndex(File databaseRoot, String wordHash) {
@ -112,7 +110,7 @@ public final class plasmaWordIndexFile {
public int size() { public int size() {
if (theIndex == null) return 0; if (theIndex == null) return 0;
int size = theIndex.size(); int size = theIndex.size();
if ((size == 0) && (delete)) { if (size == 0) {
deleteComplete(); deleteComplete();
return 0; return 0;
} else { } else {

@ -95,7 +95,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
} }
public Object next() { public Object next() {
return getContainer((String) wordIterator.next(), null, true, 100); return getContainer((String) wordIterator.next(), null, 100);
} }
public void remove() { public void remove() {
@ -224,11 +224,11 @@ public class plasmaWordIndexFileCluster implements indexRI {
return plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists(); return plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists();
} }
public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute
if (exists(wordHash)) { if (exists(wordHash)) {
plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10); plasmaWordIndexFile entity = this.getEntity(wordHash, (maxTime < 0) ? -1 : maxTime * 9 / 10);
indexContainer container = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow); indexContainer container = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow);
indexRWIEntryNew entry; indexRWIEntryNew entry;
Iterator i = entity.elements(true); Iterator i = entity.elements(true);
@ -242,8 +242,8 @@ public class plasmaWordIndexFileCluster implements indexRI {
} }
} }
public plasmaWordIndexFile getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) { public plasmaWordIndexFile getEntity(String wordHash, long maxTime) {
return new plasmaWordIndexFile(databaseRoot, wordHash, deleteIfEmpty); return new plasmaWordIndexFile(databaseRoot, wordHash);
} }
public long getUpdateTime(String wordHash) { public long getUpdateTime(String wordHash) {
@ -256,11 +256,11 @@ public class plasmaWordIndexFileCluster implements indexRI {
return null; return null;
} }
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public boolean removeEntry(String wordHash, String urlHash) {
throw new UnsupportedOperationException("word files are not supported in YaCy 0.491 and above"); throw new UnsupportedOperationException("word files are not supported in YaCy 0.491 and above");
} }
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public int removeEntries(String wordHash, Set urlHashes) {
throw new UnsupportedOperationException("word files are not supported in YaCy 0.491 and above"); throw new UnsupportedOperationException("word files are not supported in YaCy 0.491 and above");
} }

@ -136,7 +136,7 @@ public class yacyPeerActions {
seedDB.mySeed.put(yacySeed.UPTIME, Long.toString(uptime/60)); // the number of minutes that the peer is up in minutes/day (moving average MA30) seedDB.mySeed.put(yacySeed.UPTIME, Long.toString(uptime/60)); // the number of minutes that the peer is up in minutes/day (moving average MA30)
seedDB.mySeed.put(yacySeed.LCOUNT, Integer.toString(sb.wordIndex.loadedURL.size())); // the number of links that the peer has stored (LURL's) seedDB.mySeed.put(yacySeed.LCOUNT, Integer.toString(sb.wordIndex.loadedURL.size())); // the number of links that the peer has stored (LURL's)
seedDB.mySeed.put(yacySeed.NCOUNT, Integer.toString(sb.noticeURL.stackSize())); // the number of links that the peer has noticed, but not loaded (NURL's) seedDB.mySeed.put(yacySeed.NCOUNT, Integer.toString(sb.noticeURL.stackSize())); // the number of links that the peer has noticed, but not loaded (NURL's)
seedDB.mySeed.put(yacySeed.ICOUNT, Integer.toString(sb.cacheSizeMin())); // the minimum number of words that the peer has indexed (as it says) seedDB.mySeed.put(yacySeed.ICOUNT, Integer.toString(sb.wordIndex.size())); // the minimum number of words that the peer has indexed (as it says)
seedDB.mySeed.put(yacySeed.SCOUNT, Integer.toString(seedDB.sizeConnected())); // the number of seeds that the peer has stored seedDB.mySeed.put(yacySeed.SCOUNT, Integer.toString(seedDB.sizeConnected())); // the number of seeds that the peer has stored
seedDB.mySeed.put(yacySeed.CCOUNT, Double.toString(((int) ((seedDB.sizeConnected() + seedDB.sizeDisconnected() + seedDB.sizePotential()) * 60.0 / (uptime + 1.01)) * 100) / 100.0)); // the number of clients that the peer connects (as connects/hour) seedDB.mySeed.put(yacySeed.CCOUNT, Double.toString(((int) ((seedDB.sizeConnected() + seedDB.sizeDisconnected() + seedDB.sizePotential()) * 60.0 / (uptime + 1.01)) * 100) / 100.0)); // the number of clients that the peer connects (as connects/hour)
seedDB.mySeed.put(yacySeed.VERSION, sb.getConfig("version", "")); seedDB.mySeed.put(yacySeed.VERSION, sb.getConfig("version", ""));

@ -697,7 +697,7 @@ public final class yacy {
} }
plasmaWordIndexFile entity = null; plasmaWordIndexFile entity = null;
try { try {
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true); entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash);
int size = entity.size(); int size = entity.size();
indexContainer container = new indexContainer(wordhash, indexRWIEntryNew.urlEntryRow); indexContainer container = new indexContainer(wordhash, indexRWIEntryNew.urlEntryRow);

Loading…
Cancel
Save