some redesign of the search-fail-response mechanism:

when a search fails for a single url because the snippet cannot be generated, then the url reference is deleted from the index. This mechanism was redesign and enhanced. The process now also writes into the work tables into the table searchfl to prepare a re-indexing mechanism.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7364 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 4915d1781a
commit db99db4be9

@ -291,7 +291,7 @@ public final class search {
yacyChannel.channels(yacyChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));
// make event
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, null, abstracts.length() > 0, sb.loader);
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, sb.crawlResults, null, abstracts.length() > 0, sb.loader);
// set statistic details of search result and find best result index set
joincount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount();

@ -527,7 +527,7 @@ public class yacysearch {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0;
}
final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader);
final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, sb.crawlResults, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader);
try {Thread.sleep(global ? 100 : 10);} catch (InterruptedException e1) {} // wait a little time to get first results in the search
if (offset == 0) {

@ -36,8 +36,12 @@ import java.util.Map;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.util.DateFormatter;
import de.anomic.server.serverObjects;
@ -64,6 +68,12 @@ public class WorkTables extends Tables {
public final static String TABLE_ACTIVECRAWLS_NAME = "crawljobsActive";
public final static String TABLE_PASSIVECRAWLS_NAME = "crawljobsPassive";
public final static String TABLE_SEARCH_FAILURE_NAME = "searchfl";
public final static String TABLE_SEARCH_FAILURE_COL_URL = "url";
public final static String TABLE_SEARCH_FAILURE_COL_DATE = "date";
public final static String TABLE_SEARCH_FAILURE_COL_WORDS = "words";
public final static String TABLE_SEARCH_FAILURE_COL_COMMENT = "comment";
public YMarkTables bookmarks;
@ -283,4 +293,35 @@ public class WorkTables extends Tables {
d -= d % 60000; // remove seconds
row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, new Date(d));
}
public void failURLsRegisterMissingWord(IndexCell<WordReference> indexCell, final DigestURI url, HandleSet queryHashes, final String reason) {
// remove words from index
for (byte[] word: queryHashes) {
indexCell.removeDelayed(word, url.hash());
}
// insert information about changed url into database
try {
// create and insert new entry
Data data = new Data();
byte[] date = DateFormatter.formatShortMilliSecond(new Date()).getBytes();
data.put(TABLE_SEARCH_FAILURE_COL_URL, url.toNormalform(true, false));
data.put(TABLE_SEARCH_FAILURE_COL_DATE, date);
data.put(TABLE_SEARCH_FAILURE_COL_WORDS, queryHashes.export());
data.put(TABLE_SEARCH_FAILURE_COL_COMMENT, reason.getBytes());
super.insert(TABLE_SEARCH_FAILURE_NAME, url.hash(), data);
} catch (IOException e) {
Log.logException(e);
}
}
public boolean failURLsContains(byte[] urlhash) {
try {
return super.has(TABLE_SEARCH_FAILURE_NAME, urlhash);
} catch (IOException e) {
Log.logException(e);
return false;
}
}
}

@ -83,8 +83,8 @@ public final class RankingProcess extends Thread {
//private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final DynamicScore<String> ref; // reference score computation for the commonSense heuristic
private final DynamicScore<String> hostNavigator;
private final Map<String, String> hostResolver;
private final DynamicScore<String> hostNavigator; // a counter for the appearance of the host hash
private final Map<String, String> hostResolver; // a mapping from a host hash (6 bytes) to the full url hash of one of these urls that have the host hash
private final DynamicScore<String> authorNavigator;
private final DynamicScore<String> namespaceNavigator;
private final ReferenceOrder order;
@ -406,10 +406,10 @@ public final class RankingProcess extends Thread {
// in case that we do not have e catchall filter for urls
// we must also construct the domain navigator here
if (query.sitehash == null) {
this.hostNavigator.inc(new String(urlhash, 6, 6));
this.hostResolver.put(new String(urlhash, 6, 6), new String(urlhash));
}
//if (query.sitehash == null) {
// this.hostNavigator.inc(new String(urlhash, 6, 6));
// this.hostResolver.put(new String(urlhash, 6, 6), new String(urlhash));
//}
}
// check for more errors
@ -433,7 +433,9 @@ public final class RankingProcess extends Thread {
(query.constraint.get(Condenser.flag_cat_indexof)) &&
(!(pagetitle.startsWith("index of")))) {
final Iterator<byte[]> wi = query.queryHashes.iterator();
while (wi.hasNext()) try { this.query.getSegment().termIndex().remove(wi.next(), page.hash()); } catch (IOException e) {}
while (wi.hasNext()) {
this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash());
}
continue;
}
@ -526,7 +528,7 @@ public final class RankingProcess extends Thread {
public int getRemoteResourceSize() {
// the number of all hits in all the remote peers
return this.remote_resourceSize;
return Math.max(this.remote_resourceSize, this.remote_indexCount);
}
public int getRemotePeerCount() {
@ -560,7 +562,9 @@ public final class RankingProcess extends Thread {
urlhash = this.hostResolver.get(domhash);
row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash.getBytes(), null, 0);
hostname = row == null ? null : row.metadata().url().getHost();
if (hostname != null) result.set(hostname, this.hostNavigator.get(domhash));
if (hostname != null) {
result.set(hostname, this.hostNavigator.get(domhash));
}
}
if (result.size() < 2) result.clear(); // navigators with one entry are not useful
return result;

@ -45,6 +45,7 @@ import net.yacy.kelondro.util.EventTracker;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.CrawlProfile;
import de.anomic.data.WorkTables;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.graphics.ProfilingGraph;
@ -54,13 +55,13 @@ public class ResultFetcher {
final RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
QueryParams query;
private final yacySeedDB peers;
private final WorkTables workTables;
// result values
protected final LoaderDispatcher loader;
protected Worker[] workerThreads;
protected final WeakPriorityBlockingQueue<ResultEntry> result;
protected final WeakPriorityBlockingQueue<MediaSnippet> images; // container to sort images by size
protected final HandleSet failedURLs; // a set of urlhashes that could not been verified during search
protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets
long urlRetrievalAllTime;
long snippetComputationAllTime;
@ -71,19 +72,20 @@ public class ResultFetcher {
RankingProcess rankedCache,
final QueryParams query,
final yacySeedDB peers,
final WorkTables workTables,
final int taketimeout) {
assert query != null;
this.loader = loader;
this.rankingProcess = rankedCache;
this.query = query;
this.peers = peers;
this.workTables = workTables;
this.taketimeout = taketimeout;
this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0;
this.result = new WeakPriorityBlockingQueue<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new WeakPriorityBlockingQueue<MediaSnippet>(-1);
this.failedURLs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); // a set of url hashes where a worker thread tried to work on, but failed.
// snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search
@ -331,7 +333,7 @@ public class ResultFetcher {
System.out.println("page == null");
break; // no more available
}
if (failedURLs.has(page.hash())) continue;
if (workTables.failURLsContains(page.hash())) continue;
loops++;
final ResultEntry resultEntry = fetchSnippet(page, cacheStrategy); // does not fetch snippets if snippetMode == 0
@ -408,7 +410,9 @@ public class ResultFetcher {
return new ResultEntry(page, query.getSegment(), peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet
} else {
// problems with snippet fetch
registerFailure(page.hash(), "no text snippet for URL " + metadata.url() + "; errorCode = " + snippet.getErrorCode());
String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
this.workTables.failURLsRegisterMissingWord(query.getSegment().termIndex(), metadata.url(), query.queryHashes, reason);
Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason);
return null;
}
} else {
@ -425,19 +429,12 @@ public class ResultFetcher {
return new ResultEntry(page, query.getSegment(), peers, null, null, dbRetrievalTime, snippetComputationTime);
} else {
// problems with snippet fetch
registerFailure(page.hash(), "no media snippet for URL " + metadata.url());
String reason = "no media snippet";
this.workTables.failURLsRegisterMissingWord(query.getSegment().termIndex(), metadata.url(), query.queryHashes, reason);
Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason);
return null;
}
}
// finished, no more actions possible here
}
private void registerFailure(final byte[] urlhash, final String reason) {
try {
this.failedURLs.put(urlhash);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
Log.logInfo("SEARCH", "sorted out urlhash " + new String(urlhash) + " during search: " + reason);
}
}

@ -26,7 +26,6 @@
package de.anomic.search;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
@ -39,8 +38,6 @@ import java.util.concurrent.TimeUnit;
import net.yacy.cora.storage.StaticScore;
import net.yacy.document.LargeNumberCache;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.rwi.ReferenceContainer;
@ -50,6 +47,7 @@ import net.yacy.kelondro.util.SetTools;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.ResultURLs;
import de.anomic.data.WorkTables;
import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.dht.FlatWordPartitionScheme;
@ -68,6 +66,7 @@ public final class SearchEvent {
private long eventTime;
private QueryParams query;
private final yacySeedDB peers;
private final WorkTables workTables;
private RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher resultFetcher;
@ -86,12 +85,14 @@ public final class SearchEvent {
public SearchEvent(final QueryParams query,
final yacySeedDB peers,
final WorkTables workTables,
final ResultURLs crawlResults,
final SortedMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts,
final LoaderDispatcher loader) {
this.eventTime = System.currentTimeMillis(); // for lifetime check
this.peers = peers;
this.workTables = workTables;
this.crawlResults = crawlResults;
this.query = query;
this.secondarySearchSuperviser = (query.queryHashes.size() > 1) ? new SecondarySearchSuperviser() : null; // generate abstracts only for combined searches
@ -153,7 +154,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, peers, 3000);
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 3000);
} else {
// do a local search
this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
@ -197,7 +198,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, peers, 500);
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 500);
}
// clean up events
@ -253,28 +254,6 @@ public final class SearchEvent {
if (this.IACount != null) this.IACount.clear();
if (this.IAResults != null) this.IAResults.clear();
if (this.heuristics != null) this.heuristics.clear();
// execute deletion of failed words
int rw = this.resultFetcher.failedURLs.size();
if (rw > 0) {
long start = System.currentTimeMillis();
final HandleSet removeWords = query.queryHashes;
try {
removeWords.putAll(query.excludeHashes);
} catch (RowSpaceExceededException e1) {
Log.logException(e1);
}
try {
final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words
while (j.hasNext()) {
this.query.getSegment().termIndex().remove(j.next(), this.resultFetcher.failedURLs);
}
} catch (IOException e) {
Log.logException(e);
}
Log.logInfo("SearchEvents", "cleaning up event " + query.id(true) + ", removed " + rw + " URL references on " + removeWords.size() + " words in " + (System.currentTimeMillis() - start) + " milliseconds");
}
}
public Iterator<Map.Entry<byte[], String>> abstractsString() {

@ -38,6 +38,7 @@ import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.ResultURLs;
import de.anomic.data.WorkTables;
import de.anomic.yacy.yacySeedDB;
public class SearchEventCache {
@ -101,6 +102,7 @@ public class SearchEventCache {
public static SearchEvent getEvent(
final QueryParams query,
final yacySeedDB peers,
final WorkTables workTables,
final ResultURLs crawlResults,
final SortedMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts,
@ -126,7 +128,7 @@ public class SearchEventCache {
}
if (event == null) {
// start a new event
event = new SearchEvent(query, peers, crawlResults, preselectedPeerHashes, generateAbstracts, loader);
event = new SearchEvent(query, peers, workTables, crawlResults, preselectedPeerHashes, generateAbstracts, loader);
}
return event;

@ -230,7 +230,7 @@ public final class Switchboard extends serverSwitch {
public LinkedBlockingQueue<String> trail;
public yacySeedDB peers;
public WorkTables tables;
public SortedMap<byte[], DigestURI> intranetURLs = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
public SortedMap<byte[], DigestURI> intranetURLs = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
public WorkflowProcessor<indexingQueueEntry> indexingDocumentProcessor;
public WorkflowProcessor<indexingQueueEntry> indexingCondensementProcessor;

@ -64,10 +64,19 @@ public final class HandleSet implements Iterable<byte[]>, Cloneable {
this.index = index;
}
public HandleSet(Row rowdef, byte[] b) {
this.rowdef = rowdef;
this.index = RowSet.importRowSet(b, this.rowdef);
}
public HandleSet clone() {
return new HandleSet(this.rowdef, this.index.clone());
}
public byte[] export() {
return index.exportCollection();
}
/**
* initialize a HandleSet with the content of a dump
* @param keylength

@ -46,6 +46,15 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
this.factory = factory;
}
public void removeDelayed(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException {
// remove the same url hashes for multiple words
// this is mainly used when correcting a index after a search
final Iterator<byte[]> i = termHashes.iterator();
while (i.hasNext()) {
removeDelayed(i.next(), urlHashBytes);
}
}
public int remove(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException {
// remove the same url hashes for multiple words
// this is mainly used when correcting a index after a search

@ -106,6 +106,7 @@ public interface Index <ReferenceType extends Reference> {
* @throws IOException
*/
public boolean remove(byte[] termHash, byte[] referenceHash) throws IOException;
public void removeDelayed(byte[] termHash, byte[] referenceHash) throws IOException;
/**
* remove a set of reference entries for a given word
@ -115,8 +116,11 @@ public interface Index <ReferenceType extends Reference> {
* @throws IOException
*/
public int remove(final byte[] termHash, HandleSet referenceHashes) throws IOException;
public void removeDelayed(final byte[] termHash, HandleSet referenceHashes) throws IOException;
public int remove(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException;
public void removeDelayed(final HandleSet termHashes, final byte[] urlHashBytes) throws IOException;
public void removeDelayed() throws IOException;
/**
* iterate all references from the beginning of a specific word hash
* @param startHash

@ -28,6 +28,8 @@ package net.yacy.kelondro.rwi;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.Semaphore;
import net.yacy.cora.storage.ComparableARC;
@ -71,6 +73,8 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
private final int writeBufferSize;
private Semaphore dumperSemaphore = new Semaphore(1);
private Semaphore cleanerSemaphore = new Semaphore(1);
private final Map<byte[], HandleSet> failedURLs; // mapping from word hashes to a list of url hashes
public IndexCell(
final File cellPath,
@ -96,7 +100,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
this.targetFileSize = targetFileSize;
this.maxFileSize = maxFileSize;
this.writeBufferSize = writeBufferSize;
//cleanCache();
this.failedURLs = new TreeMap<byte[], HandleSet>(URIMetadataRow.rowdef.objectOrder);
}
@ -169,6 +173,13 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
ReferenceContainer<ReferenceType> countRam = this.ram.get(termHash, null);
assert countRam == null || countRam.size() >= 0;
int c = countRam == null ? countFile : countFile + countRam.size();
// exclude entries from delayed remove
synchronized (this.failedURLs) {
HandleSet s = this.failedURLs.get(termHash);
if (s != null) c -= s.size();
if (c < 0) c = 0;
}
// put count result into cache
this.countCache.put(termHash, c);
return c;
}
@ -188,22 +199,31 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
} catch (RowSpaceExceededException e2) {
Log.logException(e2);
}
if (c1 == null) {
if (c0 == null) return null;
return c0;
}
if (c0 == null) return c1;
try {
return c1.merge(c0);
} catch (RowSpaceExceededException e) {
// try to free some ram
ReferenceContainer<ReferenceType> result = null;
if (c0 != null && c1 != null) {
try {
return c1.merge(c0);
} catch (RowSpaceExceededException e1) {
// go silently over the problem
return (c1.size() > c0.size()) ? c1: c0;
result = c1.merge(c0);
} catch (RowSpaceExceededException e) {
// try to free some ram
try {
result = c1.merge(c0);
} catch (RowSpaceExceededException e1) {
// go silently over the problem
result = (c1.size() > c0.size()) ? c1: c0;
}
}
} else if (c0 != null) {
result = c0;
} else if (c1 != null) {
result = c1;
}
if (result == null) return null;
// remove the failed urls
synchronized (this.failedURLs) {
HandleSet s = this.failedURLs.get(termHash);
if (s != null) result.removeEntries(s);
}
return result;
}
/**
@ -212,6 +232,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
* @throws IOException
*/
public ReferenceContainer<ReferenceType> delete(byte[] termHash) throws IOException {
removeDelayed();
ReferenceContainer<ReferenceType> c1 = null;
try {
c1 = this.array.get(termHash);
@ -238,6 +259,60 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
}
}
public void removeDelayed(byte[] termHash, HandleSet urlHashes) {
HandleSet r;
synchronized (failedURLs) {
r = this.failedURLs.get(termHash);
}
if (r == null) {
r = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
}
try {
r.putAll(urlHashes);
} catch (RowSpaceExceededException e) {
try {remove(termHash, urlHashes);} catch (IOException e1) {}
return;
}
synchronized (failedURLs) {
this.failedURLs.put(termHash, r);
}
}
public void removeDelayed(byte[] termHash, byte[] urlHashBytes) {
HandleSet r;
synchronized (failedURLs) {
r = this.failedURLs.get(termHash);
}
if (r == null) {
r = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
}
try {
r.put(urlHashBytes);
} catch (RowSpaceExceededException e) {
try {remove(termHash, urlHashBytes);} catch (IOException e1) {}
return;
}
synchronized (failedURLs) {
this.failedURLs.put(termHash, r);
}
}
public void removeDelayed() throws IOException {
HandleSet words = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); // a set of url hashes where a worker thread tried to work on, but failed.
synchronized (failedURLs) {
for (byte[] b: failedURLs.keySet()) try {words.put(b);} catch (RowSpaceExceededException e) {}
}
for (byte[] b: words) {
HandleSet urls;
synchronized (failedURLs) {
urls = failedURLs.remove(b);
}
remove(b, urls);
}
this.countCache.clear();
}
/**
* remove url references from a selected word hash. this deletes also in the BLOB
* files, which means that there exists new gap entries after the deletion
@ -246,6 +321,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
* @throws IOException
*/
public int remove(byte[] termHash, HandleSet urlHashes) throws IOException {
this.countCache.remove(termHash);
int removed = this.ram.remove(termHash, urlHashes);
int reduced;
//final long am = this.array.mem();
@ -260,6 +336,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
}
public boolean remove(byte[] termHash, byte[] urlHashBytes) throws IOException {
this.countCache.remove(termHash);
boolean removed = this.ram.remove(termHash, urlHashBytes);
int reduced;
//final long am = this.array.mem();
@ -333,6 +410,8 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
* @throws IOException
*/
public synchronized void clear() throws IOException {
this.countCache.clear();
this.failedURLs.clear();
this.ram.clear();
this.array.clear();
}
@ -343,6 +422,8 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
* and is composed of the current date and the cell salt
*/
public synchronized void close() {
this.countCache.clear();
try {removeDelayed();} catch (IOException e) {}
if (!this.ram.isEmpty()) this.ram.dump(this.array.newContainerBLOBFile(), (int) Math.min(MemoryControl.available() / 3, writeBufferSize), true);
// close all
this.ram.close();
@ -395,6 +476,8 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
(this.ram.size() > 3000 && !MemoryControl.request(80L * 1024L * 1024L, false)) ||
(this.ram.size() > 0 && this.lastDump + dumpCycle < t)) try {
this.lastDump = System.currentTimeMillis();
// removed delayed
try {removeDelayed();} catch (IOException e) {}
// dump the ram
File dumpFile = this.array.newContainerBLOBFile();
// a critical point: when the ram is handed to the dump job,

@ -338,6 +338,9 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
return cache.remove(new ByteArray(termHash));
}
public void removeDelayed(final byte[] termHash, final byte[] urlHashBytes) {
remove(termHash, urlHashBytes);
}
public boolean remove(final byte[] termHash, final byte[] urlHashBytes) {
assert this.cache != null;
ByteArray tha = new ByteArray(termHash);
@ -355,7 +358,11 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
}
return false;
}
public void removeDelayed(final byte[] termHash, final HandleSet urlHashes) {
remove(termHash, urlHashes);
}
public int remove(final byte[] termHash, final HandleSet urlHashes) {
assert this.cache != null;
if (urlHashes.isEmpty()) return 0;
@ -376,6 +383,8 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
return 0;
}
public void removeDelayed() {}
public void add(final ReferenceContainer<ReferenceType> container) throws RowSpaceExceededException {
// this puts the entries into the cache
assert this.cache != null;

Loading…
Cancel
Save