- added a search option to filter only specific network protocols. i.e. get only results from ftp servers. Just add '/ftp' to your search.

for example search for "passwd /ftp". This can also be done with /http /https and /smb
- fixed some search throttling processes that should protect your peer against search DoS or strong search load

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7794 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 4b425ffdd2
commit 31283ecd07

@ -308,6 +308,26 @@ public class yacysearch {
querystring = querystring.replace("/date", ""); querystring = querystring.replace("/date", "");
ranking.coeff_date = RankingProfile.COEFF_MAX; ranking.coeff_date = RankingProfile.COEFF_MAX;
} }
if (querystring.indexOf("/http") >= 0) {
querystring = querystring.replace("/http", "");
urlmask = "http://.*";
}
if (querystring.indexOf("/https") >= 0) {
querystring = querystring.replace("/https", "");
urlmask = "https://.*";
}
if (querystring.indexOf("/ftp") >= 0) {
querystring = querystring.replace("/ftp", "");
urlmask = "ftp://.*";
}
if (querystring.indexOf("/smb") >= 0) {
querystring = querystring.replace("/smb", "");
urlmask = "smb://.*";
}
if (querystring.indexOf("/file") >= 0) {
querystring = querystring.replace("/file", "");
urlmask = "file://.*";
}
if (querystring.indexOf("/location") >= 0) { if (querystring.indexOf("/location") >= 0) {
querystring = querystring.replace("/location", ""); querystring = querystring.replace("/location", "");
if (constraint == null) { if (constraint == null) {

@ -92,7 +92,7 @@ public final class QueryParams {
public final int maxDistance; public final int maxDistance;
public final Bitfield constraint; public final Bitfield constraint;
public final boolean allofconstraint; public final boolean allofconstraint;
public final CacheStrategy snippetCacheStrategy; public CacheStrategy snippetCacheStrategy;
public final RankingProfile ranking; public final RankingProfile ranking;
private final Segment indexSegment; private final Segment indexSegment;
public final String host; // this is the client host that starts the query, not a site operator public final String host; // this is the client host that starts the query, not a site operator

@ -48,6 +48,7 @@ import net.yacy.cora.storage.ScoreMap;
import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.cora.storage.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReference;
@ -189,9 +190,12 @@ public final class RankingProcess extends Thread {
// apply all constraints // apply all constraints
try { try {
WordReferenceVars iEntry; WordReferenceVars iEntry;
while (true) { final String pattern = this.query.urlMask.pattern();
final boolean httpPattern = pattern.equals("http://.*");
final boolean noHttpButProtocolPattern = pattern.equals("https://.*") || pattern.equals("ftp://.*") || pattern.equals("smb://.*") || pattern.equals("file://.*");
pollloop: while (true) {
iEntry = decodedEntries.poll(1, TimeUnit.SECONDS); iEntry = decodedEntries.poll(1, TimeUnit.SECONDS);
if (iEntry == null || iEntry == WordReferenceVars.poison) break; if (iEntry == null || iEntry == WordReferenceVars.poison) break pollloop;
assert (iEntry.urlhash().length == index.row().primaryKeyLength); assert (iEntry.urlhash().length == index.row().primaryKeyLength);
//if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue; //if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue;
@ -202,15 +206,15 @@ public final class RankingProcess extends Thread {
// check constraints // check constraints
if (!testFlags(iEntry)) { if (!testFlags(iEntry)) {
continue; continue pollloop;
} }
// check document domain // check document domain
if (this.query.contentdom != ContentDomain.TEXT) { if (this.query.contentdom != ContentDomain.TEXT) {
if ((this.query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) { continue; } if ((this.query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) { continue pollloop; }
if ((this.query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) { continue; } if ((this.query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) { continue pollloop; }
if ((this.query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) { continue; } if ((this.query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) { continue pollloop; }
if ((this.query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) { continue; } if ((this.query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) { continue pollloop; }
} }
// check tld domain // check tld domain
@ -236,8 +240,15 @@ public final class RankingProcess extends Thread {
} else { } else {
if (!hosthash.equals(this.query.sitehash)) { if (!hosthash.equals(this.query.sitehash)) {
// filter out all domains that do not match with the site constraint // filter out all domains that do not match with the site constraint
continue; continue pollloop;
}
} }
// check protocol
if (!this.query.urlMask_isCatchall) {
final boolean httpFlagSet = DigestURI.flag4HTTPset(iEntry.urlHash);
if (httpPattern && !httpFlagSet) continue pollloop;
if (noHttpButProtocolPattern && httpFlagSet) continue pollloop;
} }
// finally make a double-check and insert result to stack // finally make a double-check and insert result to stack

@ -69,6 +69,7 @@ public class ResultFetcher {
long snippetComputationAllTime; long snippetComputationAllTime;
int taketimeout; int taketimeout;
private final boolean deleteIfSnippetFail; private final boolean deleteIfSnippetFail;
private boolean cleanupState;
public ResultFetcher( public ResultFetcher(
final LoaderDispatcher loader, final LoaderDispatcher loader,
@ -86,6 +87,7 @@ public class ResultFetcher {
this.workTables = workTables; this.workTables = workTables;
this.taketimeout = taketimeout; this.taketimeout = taketimeout;
this.deleteIfSnippetFail = deleteIfSnippetFail; this.deleteIfSnippetFail = deleteIfSnippetFail;
this.cleanupState = false;
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
@ -112,6 +114,10 @@ public class ResultFetcher {
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false);
} }
public void setCleanupState() {
this.cleanupState = true;
}
public long getURLRetrievalTime() { public long getURLRetrievalTime() {
return this.urlRetrievalAllTime; return this.urlRetrievalAllTime;
} }
@ -140,7 +146,7 @@ public class ResultFetcher {
WeakPriorityBlockingQueue.Element<ResultEntry> entry = null; WeakPriorityBlockingQueue.Element<ResultEntry> entry = null;
while (System.currentTimeMillis() < finishTime) { while (System.currentTimeMillis() < finishTime) {
if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break; if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break;
try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {Log.logException(e);} try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {break;}
if (entry != null) break; if (entry != null) break;
if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break; if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break;
} }
@ -258,6 +264,7 @@ public class ResultFetcher {
public void deployWorker(int deployCount, final int neededResults) { public void deployWorker(int deployCount, final int neededResults) {
if (this.cleanupState) return; // we do not start another worker if we are in cleanup state
if (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) return; if (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) return;
if (this.result.sizeAvailable() >= neededResults) return; if (this.result.sizeAvailable() >= neededResults) return;
@ -311,6 +318,7 @@ public class ResultFetcher {
private final CacheStrategy cacheStrategy; private final CacheStrategy cacheStrategy;
private final int neededResults; private final int neededResults;
private final Pattern snippetPattern; private final Pattern snippetPattern;
private boolean shallrun;
public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) { public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) {
this.id = id; this.id = id;
@ -319,6 +327,7 @@ public class ResultFetcher {
this.snippetPattern = snippetPattern; this.snippetPattern = snippetPattern;
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults; this.neededResults = neededResults;
this.shallrun = true;
} }
@Override @Override
@ -331,7 +340,7 @@ public class ResultFetcher {
try { try {
//System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis())); //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
int loops = 0; int loops = 0;
while (System.currentTimeMillis() < this.timeout) { while (this.shallrun && System.currentTimeMillis() < this.timeout) {
this.lastLifeSign = System.currentTimeMillis(); this.lastLifeSign = System.currentTimeMillis();
// check if we have enough // check if we have enough
@ -381,6 +390,10 @@ public class ResultFetcher {
Log.logInfo("SEARCH", "resultWorker thread " + this.id + " terminated"); Log.logInfo("SEARCH", "resultWorker thread " + this.id + " terminated");
} }
public void pleaseStop() {
this.shallrun = false;
}
/** /**
* calculate the time since the worker has had the latest activity * calculate the time since the worker has had the latest activity
* @return time in milliseconds lasted since latest activity * @return time in milliseconds lasted since latest activity

@ -48,8 +48,8 @@ import net.yacy.kelondro.util.EventTracker;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.SetTools; import net.yacy.kelondro.util.SetTools;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
import de.anomic.search.ResultFetcher.Worker;
import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.dht.FlatWordPartitionScheme; import de.anomic.yacy.dht.FlatWordPartitionScheme;
@ -139,8 +139,8 @@ public final class SearchEvent {
query.maxDistance, query.maxDistance,
query.getSegment(), query.getSegment(),
peers, peers,
rankingProcess, this.rankingProcess,
secondarySearchSuperviser, this.secondarySearchSuperviser,
Switchboard.urlBlacklist, Switchboard.urlBlacklist,
query.ranking, query.ranking,
query.constraint, query.constraint,
@ -152,7 +152,7 @@ public final class SearchEvent {
this.rankingProcess.moreFeeders(this.primarySearchThreads.length); this.rankingProcess.moreFeeders(this.primarySearchThreads.length);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false);
// finished searching // finished searching
Log.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); Log.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + this.primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
} else { } else {
// no search since query is empty, user might have entered no data or filters have removed all search words // no search since query is empty, user might have entered no data or filters have removed all search words
Log.logFine("SEARCH_EVENT", "NO SEARCH STARTED DUE TO EMPTY SEARCH REQUEST."); Log.logFine("SEARCH_EVENT", "NO SEARCH STARTED DUE TO EMPTY SEARCH REQUEST.");
@ -177,17 +177,17 @@ public final class SearchEvent {
final ReferenceContainer<WordReference> container = entry.getValue(); final ReferenceContainer<WordReference> container = entry.getValue();
assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + ASCII.String(container.getTermHash()) + ", wordhash = " + ASCII.String(wordhash); assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + ASCII.String(container.getTermHash()) + ", wordhash = " + ASCII.String(wordhash);
if (container.size() > maxcount) { if (container.size() > maxcount) {
IAmaxcounthash = wordhash; this.IAmaxcounthash = wordhash;
maxcount = container.size(); maxcount = container.size();
} }
l = FlatWordPartitionScheme.std.dhtDistance(wordhash, null, peers.mySeed()); l = FlatWordPartitionScheme.std.dhtDistance(wordhash, null, peers.mySeed());
if (l < mindhtdistance) { if (l < mindhtdistance) {
// calculate the word hash that is closest to our dht position // calculate the word hash that is closest to our dht position
mindhtdistance = l; mindhtdistance = l;
IAneardhthash = wordhash; this.IAneardhthash = wordhash;
} }
IACount.put(wordhash, LargeNumberCache.valueOf(container.size())); this.IACount.put(wordhash, LargeNumberCache.valueOf(container.size()));
IAResults.put(wordhash, WordReferenceFactory.compressIndex(container, null, 1000).toString()); this.IAResults.put(wordhash, WordReferenceFactory.compressIndex(container, null, 1000).toString());
} }
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
} else { } else {
@ -196,7 +196,7 @@ public final class SearchEvent {
// before a reading process wants to get results from it // before a reading process wants to get results from it
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
if (!this.rankingProcess.isAlive()) break; if (!this.rankingProcess.isAlive()) break;
try {Thread.sleep(10);} catch (InterruptedException e) {} try {Thread.sleep(10);} catch (final InterruptedException e) {}
} }
// this will reduce the maximum waiting time until results are available to 100 milliseconds // this will reduce the maximum waiting time until results are available to 100 milliseconds
// while we always get a good set of ranked data // while we always get a good set of ranked data
@ -215,6 +215,7 @@ public final class SearchEvent {
SearchEventCache.put(query.id(false), this); SearchEventCache.put(query.id(false), this);
} }
public ReferenceOrder getOrder() { public ReferenceOrder getOrder() {
return this.order; return this.order;
} }
@ -231,21 +232,23 @@ public final class SearchEvent {
return this.query; return this.query;
} }
public void setQuery(QueryParams query) { public void setQuery(final QueryParams query) {
this.query = query; this.query = query;
this.resultFetcher.query = query; this.resultFetcher.query = query;
} }
public void cleanup() { public void cleanup() {
this.resultFetcher.setCleanupState();
// stop all threads // stop all threads
if (primarySearchThreads != null) { if (this.primarySearchThreads != null) {
for (final yacySearch search : this.primarySearchThreads) { for (final yacySearch search : this.primarySearchThreads) {
if (search != null) synchronized (search) { if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt(); if (search.isAlive()) search.interrupt();
} }
} }
} }
if (secondarySearchThreads != null) { if (this.secondarySearchThreads != null) {
for (final yacySearch search : this.secondarySearchThreads) { for (final yacySearch search : this.secondarySearchThreads) {
if (search != null) synchronized (search) { if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt(); if (search.isAlive()) search.interrupt();
@ -253,6 +256,18 @@ public final class SearchEvent {
} }
} }
// call the worker threads and ask them to stop
for (final Worker w: this.resultFetcher.workerThreads) {
if (w != null && w.isAlive()) {
w.pleaseStop();
w.interrupt();
// the interrupt may occur during a MD5 computation which is resistant against interruption
// therefore set some more interrupts on the process
int ic = 10;
while (ic-- > 0 & w.isAlive()) w.interrupt();
}
}
// clear all data structures // clear all data structures
if (this.preselectedPeerHashes != null) this.preselectedPeerHashes.clear(); if (this.preselectedPeerHashes != null) this.preselectedPeerHashes.clear();
if (this.localSearchThread != null) if (this.localSearchThread.isAlive()) this.localSearchThread.interrupt(); if (this.localSearchThread != null) if (this.localSearchThread.isAlive()) this.localSearchThread.interrupt();
@ -265,7 +280,7 @@ public final class SearchEvent {
return this.IAResults.entrySet().iterator(); return this.IAResults.entrySet().iterator();
} }
public String abstractsString(byte[] hash) { public String abstractsString(final byte[] hash) {
return this.IAResults.get(hash); return this.IAResults.get(hash);
} }
@ -273,8 +288,8 @@ public final class SearchEvent {
return this.IACount.entrySet().iterator(); return this.IACount.entrySet().iterator();
} }
public int abstractsCount(byte[] hash) { public int abstractsCount(final byte[] hash) {
Integer i = this.IACount.get(hash); final Integer i = this.IACount.get(hash);
if (i == null) return -1; if (i == null) return -1;
return i.intValue(); return i.intValue();
} }
@ -290,7 +305,7 @@ public final class SearchEvent {
boolean anyRemoteSearchAlive() { boolean anyRemoteSearchAlive() {
// check primary search threads // check primary search threads
if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) { if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) {
for (final yacySearch primarySearchThread : primarySearchThreads) { for (final yacySearch primarySearchThread : this.primarySearchThreads) {
if ((primarySearchThread != null) && (primarySearchThread.isAlive())) return true; if ((primarySearchThread != null) && (primarySearchThread.isAlive())) return true;
} }
} }
@ -304,11 +319,11 @@ public final class SearchEvent {
} }
public yacySearch[] getPrimarySearchThreads() { public yacySearch[] getPrimarySearchThreads() {
return primarySearchThreads; return this.primarySearchThreads;
} }
public yacySearch[] getSecondarySearchThreads() { public yacySearch[] getSecondarySearchThreads() {
return secondarySearchThreads; return this.secondarySearchThreads;
} }
public RankingProcess getRankingResult() { public RankingProcess getRankingResult() {
@ -323,7 +338,7 @@ public final class SearchEvent {
return this.rankingProcess.getHostNavigator(); return this.rankingProcess.getHostNavigator();
} }
public ScoreMap<String> getTopicNavigator(int count) { public ScoreMap<String> getTopicNavigator(final int count) {
// returns a set of words that are computed as toplist // returns a set of words that are computed as toplist
return this.rankingProcess.getTopicNavigator(count); return this.rankingProcess.getTopicNavigator(count);
} }
@ -333,27 +348,27 @@ public final class SearchEvent {
return this.rankingProcess.getAuthorNavigator(); return this.rankingProcess.getAuthorNavigator();
} }
public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) { public void addHeuristic(final byte[] urlhash, final String heuristicName, final boolean redundant) {
synchronized (this.heuristics) { synchronized (this.heuristics) {
this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant)); this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant));
} }
} }
public HeuristicResult getHeuristic(byte[] urlhash) { public HeuristicResult getHeuristic(final byte[] urlhash) {
synchronized (this.heuristics) { synchronized (this.heuristics) {
return this.heuristics.get(urlhash); return this.heuristics.get(urlhash);
} }
} }
public ResultEntry oneResult(final int item, long timeout) { public ResultEntry oneResult(final int item, final long timeout) {
if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || if ((this.query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ||
(query.domType == QueryParams.SEARCHDOM_CLUSTERALL)) { (this.query.domType == QueryParams.SEARCHDOM_CLUSTERALL)) {
// this is a search using remote search threads. Also the local // this is a search using remote search threads. Also the local
// search thread is started as background process // search thread is started as background process
if ((localSearchThread != null) && (localSearchThread.isAlive())) { if ((this.localSearchThread != null) && (this.localSearchThread.isAlive())) {
// in case that the local search takes longer than some other // in case that the local search takes longer than some other
// remote search requests, wait that the local process terminates first // remote search requests, wait that the local process terminates first
try {localSearchThread.join();} catch (InterruptedException e) {} try {this.localSearchThread.join();} catch (final InterruptedException e) {}
} }
} }
return this.resultFetcher.oneResult(item, timeout); return this.resultFetcher.oneResult(item, timeout);
@ -363,7 +378,7 @@ public final class SearchEvent {
public static class HeuristicResult /*implements Comparable<HeuristicResult>*/ { public static class HeuristicResult /*implements Comparable<HeuristicResult>*/ {
public final byte[] urlhash; public final String heuristicName; public final boolean redundant; public final byte[] urlhash; public final String heuristicName; public final boolean redundant;
public HeuristicResult(byte[] urlhash, String heuristicName, boolean redundant) { public HeuristicResult(final byte[] urlhash, final String heuristicName, final boolean redundant) {
this.urlhash = urlhash; this.heuristicName = heuristicName; this.redundant = redundant; this.urlhash = urlhash; this.heuristicName = heuristicName; this.redundant = redundant;
}/* }/*
public int compareTo(HeuristicResult o) { public int compareTo(HeuristicResult o) {
@ -396,13 +411,13 @@ public final class SearchEvent {
* @param wordhash * @param wordhash
* @param singleAbstract // a mapping from url-hashes to a string of peer-hashes * @param singleAbstract // a mapping from url-hashes to a string of peer-hashes
*/ */
public void addAbstract(String wordhash, final TreeMap<String, StringBuilder> singleAbstract) { public void addAbstract(final String wordhash, final TreeMap<String, StringBuilder> singleAbstract) {
final SortedMap<String, StringBuilder> oldAbstract; final SortedMap<String, StringBuilder> oldAbstract;
synchronized (abstractsCache) { synchronized (this.abstractsCache) {
oldAbstract = abstractsCache.get(wordhash); oldAbstract = this.abstractsCache.get(wordhash);
if (oldAbstract == null) { if (oldAbstract == null) {
// new abstracts in the cache // new abstracts in the cache
abstractsCache.put(wordhash, singleAbstract); this.abstractsCache.put(wordhash, singleAbstract);
return; return;
} }
} }
@ -465,7 +480,7 @@ public final class SearchEvent {
t++; t++;
if (t > 10) break; if (t > 10) break;
} }
} catch (InterruptedException e) { } catch (final InterruptedException e) {
// the thread was interrupted // the thread was interrupted
// do nohing // do nohing
} }
@ -473,7 +488,7 @@ public final class SearchEvent {
} }
private void prepareSecondarySearch() { private void prepareSecondarySearch() {
if (abstractsCache == null || abstractsCache.size() != query.queryHashes.size()) return; // secondary search not possible (yet) if (this.abstractsCache == null || this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size()) return; // secondary search not possible (yet)
// catch up index abstracts and join them; then call peers again to submit their urls // catch up index abstracts and join them; then call peers again to submit their urls
/* /*
@ -484,10 +499,10 @@ public final class SearchEvent {
*/ */
// find out if there are enough references for all words that are searched // find out if there are enough references for all words that are searched
if (abstractsCache.size() != query.queryHashes.size()) return; if (this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size()) return;
// join all the urlhash:peerlist relations: the resulting map has values with a combined peer-list list // join all the urlhash:peerlist relations: the resulting map has values with a combined peer-list list
final SortedMap<String, StringBuilder> abstractJoin = SetTools.joinConstructive(abstractsCache.values(), true); final SortedMap<String, StringBuilder> abstractJoin = SetTools.joinConstructive(this.abstractsCache.values(), true);
if (abstractJoin.isEmpty()) return; if (abstractJoin.isEmpty()) return;
// the join result is now a urlhash: peer-list relation // the join result is now a urlhash: peer-list relation
@ -495,10 +510,10 @@ public final class SearchEvent {
final SortedMap<String, StringBuilder> secondarySearchURLs = new TreeMap<String, StringBuilder>(); // a (peerhash:urlhash-liststring) mapping final SortedMap<String, StringBuilder> secondarySearchURLs = new TreeMap<String, StringBuilder>(); // a (peerhash:urlhash-liststring) mapping
String url, peer; String url, peer;
StringBuilder urls, peerlist; StringBuilder urls, peerlist;
final String mypeerhash = peers.mySeed().hash; final String mypeerhash = SearchEvent.this.peers.mySeed().hash;
boolean mypeerinvolved = false; boolean mypeerinvolved = false;
int mypeercount; int mypeercount;
for (Map.Entry<String, StringBuilder> entry: abstractJoin.entrySet()) { for (final Map.Entry<String, StringBuilder> entry: abstractJoin.entrySet()) {
url = entry.getKey(); url = entry.getKey();
peerlist = entry.getValue(); peerlist = entry.getValue();
//System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peerlist); //System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peerlist);
@ -522,22 +537,22 @@ public final class SearchEvent {
// compute words for secondary search and start the secondary searches // compute words for secondary search and start the secondary searches
String words; String words;
secondarySearchThreads = new yacySearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs.size()]; SearchEvent.this.secondarySearchThreads = new yacySearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs.size()];
int c = 0; int c = 0;
for (Map.Entry<String, StringBuilder> entry: secondarySearchURLs.entrySet()) { for (final Map.Entry<String, StringBuilder> entry: secondarySearchURLs.entrySet()) {
peer = entry.getKey(); peer = entry.getKey();
if (peer.equals(mypeerhash)) continue; // we don't need to ask ourself if (peer.equals(mypeerhash)) continue; // we don't need to ask ourself
if (checkedPeers.contains(peer)) continue; // do not ask a peer again if (this.checkedPeers.contains(peer)) continue; // do not ask a peer again
urls = entry.getValue(); urls = entry.getValue();
words = wordsFromPeer(peer, urls); words = wordsFromPeer(peer, urls);
if (words.length() == 0) continue; // ??? if (words.length() == 0) continue; // ???
assert words.length() >= 12 : "words = " + words; assert words.length() >= 12 : "words = " + words;
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words); //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words);
rankingProcess.moreFeeders(1); SearchEvent.this.rankingProcess.moreFeeders(1);
checkedPeers.add(peer); this.checkedPeers.add(peer);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( SearchEvent.this.secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls.toString(), 6000, query.getSegment(), peers, rankingProcess, peer, Switchboard.urlBlacklist, words, urls.toString(), 6000, SearchEvent.this.query.getSegment(), SearchEvent.this.peers, SearchEvent.this.rankingProcess, peer, Switchboard.urlBlacklist,
query.ranking, query.constraint, preselectedPeerHashes); SearchEvent.this.query.ranking, SearchEvent.this.query.constraint, SearchEvent.this.preselectedPeerHashes);
} }
} }
@ -548,4 +563,10 @@ public final class SearchEvent {
return this.resultFetcher; return this.resultFetcher;
} }
public boolean workerAlive() {
if (this.resultFetcher== null || this.resultFetcher.workerThreads == null) return false;
for (final Worker w: this.resultFetcher.workerThreads) if (w != null && w.isAlive()) return true;
return false;
}
} }

@ -36,7 +36,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
import de.anomic.search.ResultFetcher.Worker;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
public class SearchEventCache { public class SearchEventCache {
@ -67,20 +66,24 @@ public class SearchEventCache {
// the less memory is there, the less time is acceptable for elements in the cache // the less memory is there, the less time is acceptable for elements in the cache
final long memx = MemoryControl.available(); final long memx = MemoryControl.available();
final long acceptTime = memx > memlimitHigh ? eventLifetimeBigMem : memx > memlimitMedium ? eventLifetimeMediumMem : eventLifetimeShortMem; final long acceptTime = memx > memlimitHigh ? eventLifetimeBigMem : memx > memlimitMedium ? eventLifetimeMediumMem : eventLifetimeShortMem;
Map.Entry<String, SearchEvent> event; Map.Entry<String, SearchEvent> eventEntry;
final Iterator<Map.Entry<String, SearchEvent>> i = lastEvents.entrySet().iterator(); final Iterator<Map.Entry<String, SearchEvent>> i = lastEvents.entrySet().iterator();
SearchEvent event;
while (i.hasNext()) { while (i.hasNext()) {
event = i.next(); eventEntry = i.next();
if (all || event.getValue().getEventTime() + acceptTime < System.currentTimeMillis()) { event = eventEntry.getValue();
if (workerAlive(event.getValue())) { if (event == null) continue;
event.getValue().cleanup(); if (all || event.getEventTime() + acceptTime < System.currentTimeMillis()) {
} else { if (event.workerAlive()) {
event.cleanup();
}
}
if (!event.workerAlive()) {
i.remove(); i.remove();
cacheDelete++; cacheDelete++;
} }
} }
} }
}
public static SearchEvent getEvent(final String eventID) { public static SearchEvent getEvent(final String eventID) {
final SearchEvent event = lastEvents.get(eventID); final SearchEvent event = lastEvents.get(eventID);
@ -91,17 +94,11 @@ public class SearchEventCache {
public static int countAliveThreads() { public static int countAliveThreads() {
int alive = 0; int alive = 0;
for (final SearchEvent e: SearchEventCache.lastEvents.values()) { for (final SearchEvent e: SearchEventCache.lastEvents.values()) {
if (workerAlive(e)) alive++; if (e.workerAlive()) alive++;
} }
return alive; return alive;
} }
private static boolean workerAlive(final SearchEvent e) {
if (e == null || e.result() == null || e.result().workerThreads == null) return false;
for (final Worker w: e.result().workerThreads) if (w != null && w.isAlive()) return true;
return false;
}
private static SearchEvent dummyEvent = null; private static SearchEvent dummyEvent = null;
private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) { private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) {
@ -147,14 +144,14 @@ public class SearchEventCache {
int waitcount = 0; int waitcount = 0;
throttling : while (true) { throttling : while (true) {
final int allowedThreads = (int) Math.max(1, MemoryControl.available() / (query.snippetCacheStrategy == null ? 10 : 100) / 1024 / 1024); final int allowedThreads = (int) Math.max(1, MemoryControl.available() / (query.snippetCacheStrategy == null ? 3 : 30) / 1024 / 1024);
// make room if there are too many search events (they need a lot of RAM) // make room if there are too many search events (they need a lot of RAM)
if (SearchEventCache.lastEvents.size() > allowedThreads) { if (SearchEventCache.lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 1: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); Log.logWarning("SearchEventCache", "throttling phase 1: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
cleanupEvents(false); cleanupEvents(false);
} else break throttling; } else break throttling;
// if there are still some then delete just all // if there are still some then delete just all
if (SearchEventCache.lastEvents.size() > allowedThreads) { if (SearchEventCache.lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 2: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); Log.logWarning("SearchEventCache", "throttling phase 2: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
cleanupEvents(true); cleanupEvents(true);
} else break throttling; } else break throttling;
@ -167,6 +164,11 @@ public class SearchEventCache {
if (waitcount >= 10) return getDummyEvent(workTables, loader, query.getSegment()); if (waitcount >= 10) return getDummyEvent(workTables, loader, query.getSegment());
} }
if (waitcount > 0) {
// do not fetch snippets because that is most time-expensive
query.snippetCacheStrategy = null;
}
// check if there are too many other searches alive now // check if there are too many other searches alive now
Log.logInfo("SearchEventCache", "getEvent: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive"); Log.logInfo("SearchEventCache", "getEvent: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive");

@ -65,7 +65,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
DigestURI url = null; DigestURI url = null;
try { try {
url = new DigestURI(h); url = new DigestURI(h);
} catch (MalformedURLException e) { } catch (final MalformedURLException e) {
Log.logException(e); Log.logException(e);
return null; return null;
} }
@ -116,7 +116,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
this.hash = hash; this.hash = hash;
} }
public DigestURI(final MultiProtocolURI baseURL, String relPath) throws MalformedURLException { public DigestURI(final MultiProtocolURI baseURL, final String relPath) throws MalformedURLException {
super(baseURL, relPath); super(baseURL, relPath);
this.hash = null; this.hash = null;
} }
@ -131,7 +131,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
*/ */
@Override @Override
public int hashCode() { public int hashCode() {
return ByteArray.hashCode(this.hash()); return ByteArray.hashCode(hash());
} }
public static final int flagTypeID(final String hash) { public static final int flagTypeID(final String hash) {
@ -175,10 +175,10 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
assert this.hash == null; // should only be called if the hash was not computed before assert this.hash == null; // should only be called if the hash was not computed before
final int id = Domains.getDomainID(host); // id=7: tld is local final int id = Domains.getDomainID(this.host); // id=7: tld is local
final boolean isHTTP = isHTTP(); final boolean isHTTP = isHTTP();
int p = (host == null) ? -1 : this.host.lastIndexOf('.'); int p = (this.host == null) ? -1 : this.host.lastIndexOf('.');
String dom = (p > 0) ? dom = host.substring(0, p) : ""; String dom = (p > 0) ? dom = this.host.substring(0, p) : "";
p = dom.lastIndexOf('.'); // locate subdomain p = dom.lastIndexOf('.'); // locate subdomain
String subdom = ""; String subdom = "";
if (p > 0) { if (p > 0) {
@ -196,7 +196,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
p = this.path.indexOf('/', rootpathStart); p = this.path.indexOf('/', rootpathStart);
String rootpath = ""; String rootpath = "";
if (p > 0 && p < rootpathEnd) { if (p > 0 && p < rootpathEnd) {
rootpath = path.substring(rootpathStart, p); rootpath = this.path.substring(rootpathStart, p);
} }
// we collected enough information to compute the fragments that are // we collected enough information to compute the fragments that are
@ -209,27 +209,39 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
final StringBuilder hashs = new StringBuilder(12); final StringBuilder hashs = new StringBuilder(12);
assert hashs.length() == 0; assert hashs.length() == 0;
// form the 'local' part of the hash // form the 'local' part of the hash
String normalform = toNormalform(true, true, false, true); final String normalform = toNormalform(true, true, false, true);
String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform)); final String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform));
if (b64l.length() < 5) return null; if (b64l.length() < 5) return null;
hashs.append(b64l.substring(0, 5)); // 5 chars hashs.append(b64l.substring(0, 5)); // 5 chars
assert hashs.length() == 5; assert hashs.length() == 5;
hashs.append(subdomPortPath(subdom, port, rootpath)); // 1 char hashs.append(subdomPortPath(subdom, this.port, rootpath)); // 1 char
assert hashs.length() == 6; assert hashs.length() == 6;
// form the 'global' part of the hash // form the 'global' part of the hash
hashs.append(hosthash5(this.protocol, host, port)); // 5 chars hashs.append(hosthash5(this.protocol, this.host, this.port)); // 5 chars
assert hashs.length() == 11; assert hashs.length() == 11;
hashs.append(Base64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char hashs.append(Base64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char
assert hashs.length() == 12; assert hashs.length() == 12;
// return result hash // return result hash
byte[] b = ASCII.getBytes(hashs.toString()); final byte[] b = ASCII.getBytes(hashs.toString());
assert b.length == 12; assert b.length == 12;
return b; return b;
} }
/**
* return true if the protocol of the URL was 'http'
* this is not true if the protocol was 'https'
* @param hash
* @return true for url hashes that point to http services; false otherwise
*/
public static final boolean flag4HTTPset(final byte[] hash) {
assert hash.length == 12;
final byte flagbyte = hash[11];
return (flagbyte & 32) == 1;
}
private static char subdomPortPath(final String subdom, final int port, final String rootpath) { private static char subdomPortPath(final String subdom, final int port, final String rootpath) {
StringBuilder sb = new StringBuilder(subdom.length() + rootpath.length() + 8); final StringBuilder sb = new StringBuilder(subdom.length() + rootpath.length() + 8);
sb.append(subdom).append(':').append(Integer.toString(port)).append(':').append(rootpath); sb.append(subdom).append(':').append(Integer.toString(port)).append(':').append(rootpath);
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).charAt(0); return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).charAt(0);
} }
@ -237,13 +249,13 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
private static final char rootURLFlag0 = subdomPortPath("", 80, ""); private static final char rootURLFlag0 = subdomPortPath("", 80, "");
private static final char rootURLFlag1 = subdomPortPath("www", 80, ""); private static final char rootURLFlag1 = subdomPortPath("www", 80, "");
public static final boolean probablyRootURL(String urlHash) { public static final boolean probablyRootURL(final String urlHash) {
char c = urlHash.charAt(5); final char c = urlHash.charAt(5);
return c == rootURLFlag0 || c == rootURLFlag1; return c == rootURLFlag0 || c == rootURLFlag1;
} }
public static final boolean probablyRootURL(final byte[] urlHash) { public static final boolean probablyRootURL(final byte[] urlHash) {
char c = (char) urlHash[5]; final char c = (char) urlHash[5];
return c == rootURLFlag0 || c == rootURLFlag1; return c == rootURLFlag0 || c == rootURLFlag1;
} }
@ -251,7 +263,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
if (host == null) { if (host == null) {
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(protocol)).substring(0, 5); return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(protocol)).substring(0, 5);
} else { } else {
StringBuilder sb = new StringBuilder(host.length() + 15); final StringBuilder sb = new StringBuilder(host.length() + 15);
sb.append(protocol).append(':').append(host).append(':').append(Integer.toString(port)); sb.append(protocol).append(':').append(host).append(':').append(Integer.toString(port));
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).substring(0, 5); return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).substring(0, 5);
} }

@ -63,9 +63,9 @@ public final class HandleMap implements Iterable<Row.Entry> {
* @param objectOrder * @param objectOrder
* @param space * @param space
*/ */
public HandleMap(final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace, String name) { public HandleMap(final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace, final String name) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-" + idxbytes + " {b256}")}, objectOrder); this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-" + idxbytes + " {b256}")}, objectOrder);
this.index = new RAMIndexCluster(name, rowdef, spread(expectedspace)); this.index = new RAMIndexCluster(name, this.rowdef, spread(expectedspace));
} }
/** /**
@ -82,7 +82,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
InputStream is; InputStream is;
try { try {
is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
} catch (OutOfMemoryError e) { } catch (final OutOfMemoryError e) {
is = new FileInputStream(file); is = new FileInputStream(file);
} }
if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is); if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is);
@ -105,10 +105,10 @@ public final class HandleMap implements Iterable<Row.Entry> {
} }
public long mem() { public long mem() {
return index.mem(); return this.index.mem();
} }
private static final int spread(int expectedspace) { private static final int spread(final int expectedspace) {
return Math.min(Runtime.getRuntime().availableProcessors(), Math.max(1, expectedspace / 3000)); return Math.min(Runtime.getRuntime().availableProcessors(), Math.max(1, expectedspace / 3000));
} }
@ -117,7 +117,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
int valm = this.rowdef.width(1); int valm = this.rowdef.width(1);
int valc; int valc;
byte[] lastk = null, thisk; byte[] lastk = null, thisk;
for (Row.Entry row: this) { for (final Row.Entry row: this) {
// check length of key // check length of key
if (lastk == null) { if (lastk == null) {
lastk = row.bytes(); lastk = row.bytes();
@ -159,7 +159,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
OutputStream os; OutputStream os;
try { try {
os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024); os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024);
} catch (OutOfMemoryError e) { } catch (final OutOfMemoryError e) {
os = new FileOutputStream(tmp); os = new FileOutputStream(tmp);
} }
if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os); if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os);
@ -177,29 +177,29 @@ public final class HandleMap implements Iterable<Row.Entry> {
} }
public final Row row() { public final Row row() {
return index.row(); return this.index.row();
} }
public final void clear() { public final void clear() {
index.clear(); this.index.clear();
} }
public final byte[] smallestKey() { public final byte[] smallestKey() {
return index.smallestKey(); return this.index.smallestKey();
} }
public final byte[] largestKey() { public final byte[] largestKey() {
return index.largestKey(); return this.index.largestKey();
} }
public final boolean has(final byte[] key) { public final boolean has(final byte[] key) {
assert (key != null); assert (key != null);
return index.has(key); return this.index.has(key);
} }
public final long get(final byte[] key) { public final long get(final byte[] key) {
assert (key != null); assert (key != null);
final Row.Entry indexentry = index.get(key); final Row.Entry indexentry = this.index.get(key);
if (indexentry == null) return -1; if (indexentry == null) return -1;
return indexentry.getColLong(1); return indexentry.getColLong(1);
} }
@ -218,7 +218,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
final Row.Entry newentry = this.rowdef.newEntry(); final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, l); newentry.setCol(1, l);
final Row.Entry oldentry = index.replace(newentry); final Row.Entry oldentry = this.index.replace(newentry);
if (oldentry == null) return -1; if (oldentry == null) return -1;
return oldentry.getColLong(1); return oldentry.getColLong(1);
} }
@ -229,24 +229,24 @@ public final class HandleMap implements Iterable<Row.Entry> {
final Row.Entry newentry = this.rowdef.newEntry(); final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, l); newentry.setCol(1, l);
index.addUnique(newentry); this.index.addUnique(newentry);
} }
public final long add(final byte[] key, final long a) throws RowSpaceExceededException { public final long add(final byte[] key, final long a) throws RowSpaceExceededException {
assert key != null; assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
synchronized (index) { synchronized (this.index) {
final Row.Entry indexentry = index.get(key); final Row.Entry indexentry = this.index.get(key);
if (indexentry == null) { if (indexentry == null) {
final Row.Entry newentry = this.rowdef.newEntry(); final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, a); newentry.setCol(1, a);
index.addUnique(newentry); this.index.addUnique(newentry);
return 1; return 1;
} }
final long i = indexentry.getColLong(1) + a; final long i = indexentry.getColLong(1) + a;
indexentry.setCol(1, i); indexentry.setCol(1, i);
index.put(indexentry); this.index.put(indexentry);
return i; return i;
} }
} }
@ -264,12 +264,12 @@ public final class HandleMap implements Iterable<Row.Entry> {
long[] is; long[] is;
int c; int c;
long l; long l;
final int initialSize = this.size(); final int initialSize = size();
ArrayList<RowCollection> rd = index.removeDoubles(); final ArrayList<RowCollection> rd = this.index.removeDoubles();
for (final RowCollection rowset: rd) { for (final RowCollection rowset: rd) {
is = new long[rowset.size()]; is = new long[rowset.size()];
c = 0; c = 0;
for (Row.Entry e: rowset) { for (final Row.Entry e: rowset) {
l = e.getColLong(1); l = e.getColLong(1);
assert l < initialSize : "l = " + l + ", initialSize = " + initialSize; assert l < initialSize : "l = " + l + ", initialSize = " + initialSize;
is[c++] = l; is[c++] = l;
@ -279,10 +279,10 @@ public final class HandleMap implements Iterable<Row.Entry> {
return report; return report;
} }
public final ArrayList<byte[]> top(int count) { public final ArrayList<byte[]> top(final int count) {
List<Row.Entry> list0 = index.top(count); final List<Row.Entry> list0 = this.index.top(count);
ArrayList<byte[]> list = new ArrayList<byte[]>(); final ArrayList<byte[]> list = new ArrayList<byte[]>();
for (Row.Entry entry: list0) { for (final Row.Entry entry: list0) {
list.add(entry.getPrimaryKeyBytes()); list.add(entry.getPrimaryKeyBytes());
} }
return list; return list;
@ -291,45 +291,45 @@ public final class HandleMap implements Iterable<Row.Entry> {
public final synchronized long remove(final byte[] key) { public final synchronized long remove(final byte[] key) {
assert (key != null); assert (key != null);
final Row.Entry indexentry; final Row.Entry indexentry;
synchronized (index) { synchronized (this.index) {
final boolean exist = index.has(key); final boolean exist = this.index.has(key);
if (!exist) return -1; if (!exist) return -1;
final int s = index.size(); final int s = this.index.size();
final long m = index.mem(); final long m = this.index.mem();
indexentry = index.remove(key); indexentry = this.index.remove(key);
assert (indexentry != null); assert (indexentry != null);
assert index.size() < s : "s = " + s + ", index.size() = " + index.size(); assert this.index.size() < s : "s = " + s + ", index.size() = " + this.index.size();
assert index.mem() <= m : "m = " + m + ", index.mem() = " + index.mem(); assert this.index.mem() <= m : "m = " + m + ", index.mem() = " + this.index.mem();
} }
if (indexentry == null) return -1; if (indexentry == null) return -1;
return indexentry.getColLong(1); return indexentry.getColLong(1);
} }
public final long removeone() { public final long removeone() {
final Row.Entry indexentry = index.removeOne(); final Row.Entry indexentry = this.index.removeOne();
if (indexentry == null) return -1; if (indexentry == null) return -1;
return indexentry.getColLong(1); return indexentry.getColLong(1);
} }
public final int size() { public final int size() {
return index.size(); return this.index.size();
} }
public final boolean isEmpty() { public final boolean isEmpty() {
return index.isEmpty(); return this.index.isEmpty();
} }
public final CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) { public final CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
return index.keys(up, firstKey); return this.index.keys(up, firstKey);
} }
public final CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) { public final CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) {
return index.rows(up, firstKey); return this.index.rows(up, firstKey);
} }
public final void close() { public final void close() {
index.close(); this.index.close();
index = null; this.index = null;
} }
/** /**
@ -342,7 +342,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
* @param bufferSize * @param bufferSize
* @return * @return
*/ */
public final static initDataConsumer asynchronusInitializer(String name, final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace) { public final static initDataConsumer asynchronusInitializer(final String name, final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace) {
final initDataConsumer initializer = new initDataConsumer(new HandleMap(keylength, objectOrder, idxbytes, expectedspace, name)); final initDataConsumer initializer = new initDataConsumer(new HandleMap(keylength, objectOrder, idxbytes, expectedspace, name));
final ExecutorService service = Executors.newSingleThreadExecutor(); final ExecutorService service = Executors.newSingleThreadExecutor();
initializer.setResult(service.submit(initializer)); initializer.setResult(service.submit(initializer));
@ -369,7 +369,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
public initDataConsumer(final HandleMap map) { public initDataConsumer(final HandleMap map) {
this.map = map; this.map = map;
cache = new LinkedBlockingQueue<entry>(); this.cache = new LinkedBlockingQueue<entry>();
} }
protected final void setResult(final Future<HandleMap> result) { protected final void setResult(final Future<HandleMap> result) {
@ -382,10 +382,11 @@ public final class HandleMap implements Iterable<Row.Entry> {
* @param l * @param l
*/ */
public final void consume(final byte[] key, final long l) { public final void consume(final byte[] key, final long l) {
try { while (true) try {
cache.put(new entry(key, l)); this.cache.put(new entry(key, l));
} catch (InterruptedException e) { break;
Log.logException(e); } catch (final InterruptedException e) {
continue;
} }
} }
@ -394,10 +395,11 @@ public final class HandleMap implements Iterable<Row.Entry> {
* this method must be called. The process will not terminate if this is not called before. * this method must be called. The process will not terminate if this is not called before.
*/ */
public final void finish() { public final void finish() {
try { while (true) try {
cache.put(poisonEntry); this.cache.put(poisonEntry);
} catch (InterruptedException e) { break;
Log.logException(e); } catch (final InterruptedException e) {
continue;
} }
} }
@ -415,16 +417,21 @@ public final class HandleMap implements Iterable<Row.Entry> {
public final HandleMap call() throws IOException { public final HandleMap call() throws IOException {
try { try {
finishloop: while (true) {
entry c; entry c;
while ((c = cache.take()) != poisonEntry) { try {
map.putUnique(c.key, c.l); while ((c = this.cache.take()) != poisonEntry) {
this.map.putUnique(c.key, c.l);
} }
} catch (InterruptedException e) { break finishloop;
Log.logException(e); } catch (final InterruptedException e) {
} catch (RowSpaceExceededException e) { continue finishloop;
}
}
} catch (final RowSpaceExceededException e) {
Log.logException(e); Log.logException(e);
} }
return map; return this.map;
} }
public void close() { public void close() {
@ -433,6 +440,6 @@ public final class HandleMap implements Iterable<Row.Entry> {
} }
public Iterator<Row.Entry> iterator() { public Iterator<Row.Entry> iterator() {
return this.rows(true, null); return rows(true, null);
} }
} }

@ -225,7 +225,7 @@ public final class Log {
} else { } else {
try { try {
logQueue.put(new logEntry(logger, level, message, thrown)); logQueue.put(new logEntry(logger, level, message, thrown));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
logger.log(level, message, thrown); logger.log(level, message, thrown);
} }
} }
@ -237,7 +237,7 @@ public final class Log {
} else { } else {
try { try {
logQueue.put(new logEntry(logger, level, message)); logQueue.put(new logEntry(logger, level, message));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
logger.log(level, message); logger.log(level, message);
} }
} }
@ -249,7 +249,7 @@ public final class Log {
} else { } else {
try { try {
logQueue.put(new logEntry(loggername, level, message, thrown)); logQueue.put(new logEntry(loggername, level, message, thrown));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Logger.getLogger(loggername).log(level, message, thrown); Logger.getLogger(loggername).log(level, message, thrown);
} }
} }
@ -261,7 +261,7 @@ public final class Log {
} else { } else {
try { try {
logQueue.put(new logEntry(loggername, level, message)); logQueue.put(new logEntry(loggername, level, message));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Logger.getLogger(loggername).log(level, message); Logger.getLogger(loggername).log(level, message);
} }
} }
@ -344,7 +344,7 @@ public final class Log {
} }
} }
} }
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
@ -403,8 +403,8 @@ public final class Log {
if (logRunnerThread == null || !logRunnerThread.isAlive()) return; if (logRunnerThread == null || !logRunnerThread.isAlive()) return;
try { try {
logQueue.put(poison); logQueue.put(poison);
logRunnerThread.join(10000); logRunnerThread.join(1000);
} catch (InterruptedException e) { } catch (final InterruptedException e) {
} }
} }

@ -55,10 +55,10 @@ public class Digest {
static { static {
for (int i = 0; i < digestThreads; i++) for (int i = 0; i < digestThreads; i++)
try { try {
MessageDigest digest = MessageDigest.getInstance("MD5"); final MessageDigest digest = MessageDigest.getInstance("MD5");
digest.reset(); digest.reset();
digestPool.add(digest); digestPool.add(digest);
} catch (NoSuchAlgorithmException e) { } catch (final NoSuchAlgorithmException e) {
Log.logException(e); Log.logException(e);
} }
} }
@ -72,9 +72,9 @@ public class Digest {
public static String encodeOctal(final byte[] in) { public static String encodeOctal(final byte[] in) {
if (in == null) return ""; if (in == null) return "";
final StringBuilder result = new StringBuilder(in.length * 8 / 3); final StringBuilder result = new StringBuilder(in.length * 8 / 3);
for (int i = 0; i < in.length; i++) { for (final byte element : in) {
if ((0Xff & in[i]) < 8) result.append('0'); if ((0Xff & element) < 8) result.append('0');
result.append(Integer.toOctalString(0Xff & in[i])); result.append(Integer.toOctalString(0Xff & element));
} }
return result.toString(); return result.toString();
} }
@ -82,9 +82,9 @@ public class Digest {
public static String encodeHex(final byte[] in) { public static String encodeHex(final byte[] in) {
if (in == null) return ""; if (in == null) return "";
final StringBuilder result = new StringBuilder(in.length * 2); final StringBuilder result = new StringBuilder(in.length * 2);
for (int i = 0; i < in.length; i++) { for (final byte element : in) {
if ((0Xff & in[i]) < 16) result.append('0'); if ((0Xff & element) < 16) result.append('0');
result.append(Integer.toHexString(0Xff & in[i])); result.append(Integer.toHexString(0Xff & element));
} }
return result.toString(); return result.toString();
} }
@ -122,16 +122,16 @@ public class Digest {
digest = MessageDigest.getInstance("MD5"); digest = MessageDigest.getInstance("MD5");
digest.reset(); digest.reset();
fromPool = false; fromPool = false;
} catch (NoSuchAlgorithmException e) { } catch (final NoSuchAlgorithmException e) {
} }
} }
if (digest == null) try { if (digest == null) try {
digest = digestPool.take(); digest = digestPool.take();
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logWarning("Digest", "using generic instead of pooled digest"); Log.logWarning("Digest", "using generic instead of pooled digest");
try { try {
digest = MessageDigest.getInstance("MD5"); digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e1) { } catch (final NoSuchAlgorithmException e1) {
Log.logException(e1); Log.logException(e1);
} }
digest.reset(); digest.reset();
@ -140,13 +140,18 @@ public class Digest {
byte[] keyBytes; byte[] keyBytes;
keyBytes = UTF8.getBytes(key); keyBytes = UTF8.getBytes(key);
digest.update(keyBytes); digest.update(keyBytes);
byte[] result = digest.digest(); final byte[] result = digest.digest();
digest.reset(); digest.reset();
if (fromPool) if (fromPool) {
returntopool: while (true) {
try { try {
digestPool.put(digest); digestPool.put(digest);
} catch (InterruptedException e) { break returntopool;
Log.logException(e); } catch (final InterruptedException e) {
// we MUST return that digest to the pool
continue returntopool;
}
}
} }
return result; return result;
} }
@ -163,9 +168,9 @@ public class Digest {
// create a concurrent thread that consumes data as it is read // create a concurrent thread that consumes data as it is read
// and computes the md5 while doing IO // and computes the md5 while doing IO
md5FilechunkConsumer md5consumer = new md5FilechunkConsumer(1024 * 64, 8); final md5FilechunkConsumer md5consumer = new md5FilechunkConsumer(1024 * 64, 8);
ExecutorService service = Executors.newSingleThreadExecutor(); final ExecutorService service = Executors.newSingleThreadExecutor();
Future<MessageDigest> md5result = service.submit(md5consumer); final Future<MessageDigest> md5result = service.submit(md5consumer);
service.shutdown(); service.shutdown();
filechunk c; filechunk c;
@ -189,10 +194,10 @@ public class Digest {
// return the md5 digest from future task // return the md5 digest from future task
try { try {
return md5result.get().digest(); return md5result.get().digest();
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
throw new IOException(e); throw new IOException(e);
} catch (ExecutionException e) { } catch (final ExecutionException e) {
Log.logException(e); Log.logException(e);
throw new IOException(e); throw new IOException(e);
} }
@ -201,9 +206,9 @@ public class Digest {
private static class filechunk { private static class filechunk {
public byte[] b; public byte[] b;
public int n; public int n;
public filechunk(int len) { public filechunk(final int len) {
b = new byte[len]; this.b = new byte[len];
n = 0; this.n = 0;
} }
} }
@ -214,32 +219,32 @@ public class Digest {
protected static filechunk poison = new filechunk(0); protected static filechunk poison = new filechunk(0);
private MessageDigest digest; private MessageDigest digest;
public md5FilechunkConsumer(int bufferSize, int bufferCount) { public md5FilechunkConsumer(final int bufferSize, final int bufferCount) {
empty = new ArrayBlockingQueue<filechunk>(bufferCount); this.empty = new ArrayBlockingQueue<filechunk>(bufferCount);
filed = new LinkedBlockingQueue<filechunk>(); this.filed = new LinkedBlockingQueue<filechunk>();
// fill the empty queue // fill the empty queue
for (int i = 0; i < bufferCount; i++) empty.add(new filechunk(bufferSize)); for (int i = 0; i < bufferCount; i++) this.empty.add(new filechunk(bufferSize));
// init digest // init digest
try { try {
digest = MessageDigest.getInstance("MD5"); this.digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) { } catch (final NoSuchAlgorithmException e) {
System.out.println("Internal Error at md5:" + e.getMessage()); System.out.println("Internal Error at md5:" + e.getMessage());
} }
digest.reset(); this.digest.reset();
} }
public void consume(filechunk c) { public void consume(final filechunk c) {
try { try {
filed.put(c); this.filed.put(c);
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
} }
public filechunk nextFree() throws IOException { public filechunk nextFree() throws IOException {
try { try {
return empty.take(); return this.empty.take();
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
throw new IOException(e); throw new IOException(e);
} }
@ -249,35 +254,35 @@ public class Digest {
try { try {
filechunk c; filechunk c;
while(true) { while(true) {
c = filed.take(); c = this.filed.take();
if (c == poison) break; if (c == poison) break;
digest.update(c.b, 0, c.n); this.digest.update(c.b, 0, c.n);
empty.put(c); this.empty.put(c);
} }
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
return digest; return this.digest;
} }
} }
public static String fastFingerprintHex(final File file, boolean includeDate) { public static String fastFingerprintHex(final File file, final boolean includeDate) {
try { try {
return encodeHex(fastFingerprintRaw(file, includeDate)); return encodeHex(fastFingerprintRaw(file, includeDate));
} catch (IOException e) { } catch (final IOException e) {
return null; return null;
} }
} }
public static String fastFingerprintB64(final File file, boolean includeDate) { public static String fastFingerprintB64(final File file, final boolean includeDate) {
try { try {
byte[] b = fastFingerprintRaw(file, includeDate); final byte[] b = fastFingerprintRaw(file, includeDate);
assert b != null : "file = " + file.toString(); assert b != null : "file = " + file.toString();
if (b == null || b.length == 0) return null; if (b == null || b.length == 0) return null;
assert b.length != 0 : "file = " + file.toString(); assert b.length != 0 : "file = " + file.toString();
return Base64Order.enhancedCoder.encode(b); return Base64Order.enhancedCoder.encode(b);
} catch (IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
return null; return null;
} }
@ -300,19 +305,19 @@ public class Digest {
* @return fingerprint in md5 raw format * @return fingerprint in md5 raw format
* @throws IOException * @throws IOException
*/ */
public static byte[] fastFingerprintRaw(final File file, boolean includeDate) throws IOException { public static byte[] fastFingerprintRaw(final File file, final boolean includeDate) throws IOException {
final int mb = 16 * 1024; final int mb = 16 * 1024;
final long fl = file.length(); final long fl = file.length();
if (fl <= 2 * mb) return encodeMD5Raw(file); if (fl <= 2 * mb) return encodeMD5Raw(file);
MessageDigest digest; MessageDigest digest;
try { try {
digest = MessageDigest.getInstance("MD5"); digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) { } catch (final NoSuchAlgorithmException e) {
Log.logException(e); Log.logException(e);
return null; return null;
} }
RandomAccessFile raf = new RandomAccessFile(file, "r"); final RandomAccessFile raf = new RandomAccessFile(file, "r");
byte[] a = new byte[mb]; final byte[] a = new byte[mb];
try { try {
raf.seek(0); raf.seek(0);
raf.readFully(a, 0, mb); raf.readFully(a, 0, mb);
@ -324,7 +329,7 @@ public class Digest {
if (includeDate) digest.update(NaturalOrder.encodeLong(file.lastModified(), 8), 0, 8); if (includeDate) digest.update(NaturalOrder.encodeLong(file.lastModified(), 8), 0, 8);
} finally { } finally {
raf.close(); raf.close();
try {raf.getChannel().close();} catch (IOException e) {} try {raf.getChannel().close();} catch (final IOException e) {}
} }
return digest.digest(); return digest.digest();
} }
@ -355,7 +360,7 @@ public class Digest {
// java -classpath classes de.anomic.kelondro.kelondroDigest -fb64 DATA/HTCACHE/responseHeader.heap // java -classpath classes de.anomic.kelondro.kelondroDigest -fb64 DATA/HTCACHE/responseHeader.heap
// compare with: // compare with:
// md5 readme.txt // md5 readme.txt
long start = System.currentTimeMillis(); final long start = System.currentTimeMillis();
if (s.length == 0) { if (s.length == 0) {
System.out.println("usage: -[md5|fingerprint] <arg>"); System.out.println("usage: -[md5|fingerprint] <arg>");
@ -364,22 +369,22 @@ public class Digest {
if (s[0].equals("-md5")) { if (s[0].equals("-md5")) {
// generate a md5 from a given file // generate a md5 from a given file
File f = new File(s[1]); final File f = new File(s[1]);
try { try {
System.out.println("MD5 (" + f.getName() + ") = " + encodeMD5Hex(f)); System.out.println("MD5 (" + f.getName() + ") = " + encodeMD5Hex(f));
} catch (IOException e) { } catch (final IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
if (s[0].equals("-fhex")) { if (s[0].equals("-fhex")) {
// generate a fast fingerprint from a given file // generate a fast fingerprint from a given file
File f = new File(s[1]); final File f = new File(s[1]);
System.out.println("fingerprint hex (" + f.getName() + ") = " + fastFingerprintHex(f, true)); System.out.println("fingerprint hex (" + f.getName() + ") = " + fastFingerprintHex(f, true));
} }
if (s[0].equals("-fb64")) { if (s[0].equals("-fb64")) {
// generate a fast fingerprint from a given file // generate a fast fingerprint from a given file
File f = new File(s[1]); final File f = new File(s[1]);
System.out.println("fingerprint b64 (" + f.getName() + ") = " + fastFingerprintB64(f, true)); System.out.println("fingerprint b64 (" + f.getName() + ") = " + fastFingerprintB64(f, true));
} }

@ -98,7 +98,7 @@ public abstract class AbstractThread extends Thread implements WorkflowThread {
public long getMemoryUse() { public long getMemoryUse() {
// returns the sum of all memory usage differences before and after one busy job // returns the sum of all memory usage differences before and after one busy job
return memuse; return this.memuse;
} }
public boolean shutdownInProgress() { public boolean shutdownInProgress() {
@ -110,12 +110,11 @@ public abstract class AbstractThread extends Thread implements WorkflowThread {
this.running = false; this.running = false;
// interrupting the thread // interrupting the thread
this.interrupt(); interrupt();
// wait for termination // wait for termination
if (waitFor) { if (waitFor) {
// Busy waiting removed: while (this.isAlive()) try {this.sleep(100);} catch (InterruptedException e) {break;} try { this.join(3000); } catch (final InterruptedException e) { return; }
try { this.join(3000); } catch (final InterruptedException e) {return;}
} }
// If we reach this point, the process is closed // If we reach this point, the process is closed
@ -129,7 +128,7 @@ public abstract class AbstractThread extends Thread implements WorkflowThread {
public void jobExceptionHandler(final Exception e) { public void jobExceptionHandler(final Exception e) {
if (!(e instanceof ClosedByInterruptException)) { if (!(e instanceof ClosedByInterruptException)) {
// default handler for job exceptions. shall be overridden for own handler // default handler for job exceptions. shall be overridden for own handler
logError("thread '" + this.getName() + "': " + e.toString(),e); logError("thread '" + getName() + "': " + e.toString(),e);
} }
} }

Loading…
Cancel
Save