- added a search option to filter only specific network protocols. i.e. get only results from ftp servers. Just add '/ftp' to your search.

for example search for "passwd /ftp". This can also be done with /http /https and /smb
- fixed some search throttling processes that should protect your peer against search DoS or strong search load

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7794 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 4b425ffdd2
commit 31283ecd07

@ -308,6 +308,26 @@ public class yacysearch {
querystring = querystring.replace("/date", ""); querystring = querystring.replace("/date", "");
ranking.coeff_date = RankingProfile.COEFF_MAX; ranking.coeff_date = RankingProfile.COEFF_MAX;
} }
if (querystring.indexOf("/http") >= 0) {
querystring = querystring.replace("/http", "");
urlmask = "http://.*";
}
if (querystring.indexOf("/https") >= 0) {
querystring = querystring.replace("/https", "");
urlmask = "https://.*";
}
if (querystring.indexOf("/ftp") >= 0) {
querystring = querystring.replace("/ftp", "");
urlmask = "ftp://.*";
}
if (querystring.indexOf("/smb") >= 0) {
querystring = querystring.replace("/smb", "");
urlmask = "smb://.*";
}
if (querystring.indexOf("/file") >= 0) {
querystring = querystring.replace("/file", "");
urlmask = "file://.*";
}
if (querystring.indexOf("/location") >= 0) { if (querystring.indexOf("/location") >= 0) {
querystring = querystring.replace("/location", ""); querystring = querystring.replace("/location", "");
if (constraint == null) { if (constraint == null) {

@ -92,7 +92,7 @@ public final class QueryParams {
public final int maxDistance; public final int maxDistance;
public final Bitfield constraint; public final Bitfield constraint;
public final boolean allofconstraint; public final boolean allofconstraint;
public final CacheStrategy snippetCacheStrategy; public CacheStrategy snippetCacheStrategy;
public final RankingProfile ranking; public final RankingProfile ranking;
private final Segment indexSegment; private final Segment indexSegment;
public final String host; // this is the client host that starts the query, not a site operator public final String host; // this is the client host that starts the query, not a site operator

@ -48,6 +48,7 @@ import net.yacy.cora.storage.ScoreMap;
import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.cora.storage.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReference;
@ -189,9 +190,12 @@ public final class RankingProcess extends Thread {
// apply all constraints // apply all constraints
try { try {
WordReferenceVars iEntry; WordReferenceVars iEntry;
while (true) { final String pattern = this.query.urlMask.pattern();
final boolean httpPattern = pattern.equals("http://.*");
final boolean noHttpButProtocolPattern = pattern.equals("https://.*") || pattern.equals("ftp://.*") || pattern.equals("smb://.*") || pattern.equals("file://.*");
pollloop: while (true) {
iEntry = decodedEntries.poll(1, TimeUnit.SECONDS); iEntry = decodedEntries.poll(1, TimeUnit.SECONDS);
if (iEntry == null || iEntry == WordReferenceVars.poison) break; if (iEntry == null || iEntry == WordReferenceVars.poison) break pollloop;
assert (iEntry.urlhash().length == index.row().primaryKeyLength); assert (iEntry.urlhash().length == index.row().primaryKeyLength);
//if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue; //if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue;
@ -202,15 +206,15 @@ public final class RankingProcess extends Thread {
// check constraints // check constraints
if (!testFlags(iEntry)) { if (!testFlags(iEntry)) {
continue; continue pollloop;
} }
// check document domain // check document domain
if (this.query.contentdom != ContentDomain.TEXT) { if (this.query.contentdom != ContentDomain.TEXT) {
if ((this.query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) { continue; } if ((this.query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) { continue pollloop; }
if ((this.query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) { continue; } if ((this.query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) { continue pollloop; }
if ((this.query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) { continue; } if ((this.query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) { continue pollloop; }
if ((this.query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) { continue; } if ((this.query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) { continue pollloop; }
} }
// check tld domain // check tld domain
@ -236,10 +240,17 @@ public final class RankingProcess extends Thread {
} else { } else {
if (!hosthash.equals(this.query.sitehash)) { if (!hosthash.equals(this.query.sitehash)) {
// filter out all domains that do not match with the site constraint // filter out all domains that do not match with the site constraint
continue; continue pollloop;
} }
} }
// check protocol
if (!this.query.urlMask_isCatchall) {
final boolean httpFlagSet = DigestURI.flag4HTTPset(iEntry.urlHash);
if (httpPattern && !httpFlagSet) continue pollloop;
if (noHttpButProtocolPattern && httpFlagSet) continue pollloop;
}
// finally make a double-check and insert result to stack // finally make a double-check and insert result to stack
if (this.urlhashes.add(iEntry.urlhash())) { if (this.urlhashes.add(iEntry.urlhash())) {
rankingtryloop: while (true) { rankingtryloop: while (true) {

@ -69,6 +69,7 @@ public class ResultFetcher {
long snippetComputationAllTime; long snippetComputationAllTime;
int taketimeout; int taketimeout;
private final boolean deleteIfSnippetFail; private final boolean deleteIfSnippetFail;
private boolean cleanupState;
public ResultFetcher( public ResultFetcher(
final LoaderDispatcher loader, final LoaderDispatcher loader,
@ -86,6 +87,7 @@ public class ResultFetcher {
this.workTables = workTables; this.workTables = workTables;
this.taketimeout = taketimeout; this.taketimeout = taketimeout;
this.deleteIfSnippetFail = deleteIfSnippetFail; this.deleteIfSnippetFail = deleteIfSnippetFail;
this.cleanupState = false;
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
@ -112,6 +114,10 @@ public class ResultFetcher {
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false);
} }
public void setCleanupState() {
this.cleanupState = true;
}
public long getURLRetrievalTime() { public long getURLRetrievalTime() {
return this.urlRetrievalAllTime; return this.urlRetrievalAllTime;
} }
@ -140,7 +146,7 @@ public class ResultFetcher {
WeakPriorityBlockingQueue.Element<ResultEntry> entry = null; WeakPriorityBlockingQueue.Element<ResultEntry> entry = null;
while (System.currentTimeMillis() < finishTime) { while (System.currentTimeMillis() < finishTime) {
if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break; if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break;
try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {Log.logException(e);} try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {break;}
if (entry != null) break; if (entry != null) break;
if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break; if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break;
} }
@ -258,6 +264,7 @@ public class ResultFetcher {
public void deployWorker(int deployCount, final int neededResults) { public void deployWorker(int deployCount, final int neededResults) {
if (this.cleanupState) return; // we do not start another worker if we are in cleanup state
if (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) return; if (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) return;
if (this.result.sizeAvailable() >= neededResults) return; if (this.result.sizeAvailable() >= neededResults) return;
@ -311,6 +318,7 @@ public class ResultFetcher {
private final CacheStrategy cacheStrategy; private final CacheStrategy cacheStrategy;
private final int neededResults; private final int neededResults;
private final Pattern snippetPattern; private final Pattern snippetPattern;
private boolean shallrun;
public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) { public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) {
this.id = id; this.id = id;
@ -319,6 +327,7 @@ public class ResultFetcher {
this.snippetPattern = snippetPattern; this.snippetPattern = snippetPattern;
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults; this.neededResults = neededResults;
this.shallrun = true;
} }
@Override @Override
@ -331,7 +340,7 @@ public class ResultFetcher {
try { try {
//System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis())); //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
int loops = 0; int loops = 0;
while (System.currentTimeMillis() < this.timeout) { while (this.shallrun && System.currentTimeMillis() < this.timeout) {
this.lastLifeSign = System.currentTimeMillis(); this.lastLifeSign = System.currentTimeMillis();
// check if we have enough // check if we have enough
@ -381,6 +390,10 @@ public class ResultFetcher {
Log.logInfo("SEARCH", "resultWorker thread " + this.id + " terminated"); Log.logInfo("SEARCH", "resultWorker thread " + this.id + " terminated");
} }
public void pleaseStop() {
this.shallrun = false;
}
/** /**
* calculate the time since the worker has had the latest activity * calculate the time since the worker has had the latest activity
* @return time in milliseconds lasted since latest activity * @return time in milliseconds lasted since latest activity

@ -9,7 +9,7 @@
// $LastChangedBy$ // $LastChangedBy$
// //
// LICENSE // LICENSE
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or // the Free Software Foundation; either version 2 of the License, or
@ -48,22 +48,22 @@ import net.yacy.kelondro.util.EventTracker;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.SetTools; import net.yacy.kelondro.util.SetTools;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
import de.anomic.search.ResultFetcher.Worker;
import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.dht.FlatWordPartitionScheme; import de.anomic.yacy.dht.FlatWordPartitionScheme;
import de.anomic.yacy.graphics.ProfilingGraph; import de.anomic.yacy.graphics.ProfilingGraph;
public final class SearchEvent { public final class SearchEvent {
public enum Type { public enum Type {
INITIALIZATION, COLLECTION, JOIN, PRESORT, URLFETCH, NORMALIZING, FINALIZATION, INITIALIZATION, COLLECTION, JOIN, PRESORT, URLFETCH, NORMALIZING, FINALIZATION,
REMOTESEARCH_START, REMOTESEARCH_TERMINATE, ABSTRACTS, CLEANUP, SNIPPETFETCH_START, ONERESULT, REFERENCECOLLECTION, RESULTLIST; REMOTESEARCH_START, REMOTESEARCH_TERMINATE, ABSTRACTS, CLEANUP, SNIPPETFETCH_START, ONERESULT, REFERENCECOLLECTION, RESULTLIST;
} }
public static final int max_results_preparation = 3000; public static final int max_results_preparation = 3000;
// class variables that may be implemented with an abstract class // class variables that may be implemented with an abstract class
private long eventTime; private long eventTime;
private QueryParams query; private QueryParams query;
@ -71,8 +71,8 @@ public final class SearchEvent {
private final WorkTables workTables; private final WorkTables workTables;
private RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container private RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher resultFetcher; private ResultFetcher resultFetcher;
private final SecondarySearchSuperviser secondarySearchSuperviser; private final SecondarySearchSuperviser secondarySearchSuperviser;
// class variables for remote searches // class variables for remote searches
private yacySearch[] primarySearchThreads, secondarySearchThreads; private yacySearch[] primarySearchThreads, secondarySearchThreads;
@ -83,7 +83,7 @@ public final class SearchEvent {
private final SortedMap<byte[], HeuristicResult> heuristics; private final SortedMap<byte[], HeuristicResult> heuristics;
private byte[] IAmaxcounthash, IAneardhthash; private byte[] IAmaxcounthash, IAneardhthash;
private final ReferenceOrder order; private final ReferenceOrder order;
protected SearchEvent(final QueryParams query, protected SearchEvent(final QueryParams query,
final yacySeedDB peers, final yacySeedDB peers,
final WorkTables workTables, final WorkTables workTables,
@ -119,10 +119,10 @@ public final class SearchEvent {
// initialize a ranking process that is the target for data // initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads // that is generated concurrently from local and global search threads
this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation); this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
// start a local search concurrently // start a local search concurrently
this.rankingProcess.start(); this.rankingProcess.start();
// start global searches // start global searches
final long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
this.primarySearchThreads = (query.queryHashes.isEmpty()) ? null : yacySearch.primaryRemoteSearches( this.primarySearchThreads = (query.queryHashes.isEmpty()) ? null : yacySearch.primaryRemoteSearches(
@ -139,8 +139,8 @@ public final class SearchEvent {
query.maxDistance, query.maxDistance,
query.getSegment(), query.getSegment(),
peers, peers,
rankingProcess, this.rankingProcess,
secondarySearchSuperviser, this.secondarySearchSuperviser,
Switchboard.urlBlacklist, Switchboard.urlBlacklist,
query.ranking, query.ranking,
query.constraint, query.constraint,
@ -152,18 +152,18 @@ public final class SearchEvent {
this.rankingProcess.moreFeeders(this.primarySearchThreads.length); this.rankingProcess.moreFeeders(this.primarySearchThreads.length);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false);
// finished searching // finished searching
Log.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); Log.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + this.primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
} else { } else {
// no search since query is empty, user might have entered no data or filters have removed all search words // no search since query is empty, user might have entered no data or filters have removed all search words
Log.logFine("SEARCH_EVENT", "NO SEARCH STARTED DUE TO EMPTY SEARCH REQUEST."); Log.logFine("SEARCH_EVENT", "NO SEARCH STARTED DUE TO EMPTY SEARCH REQUEST.");
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 3000, deleteIfSnippetFail); this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 3000, deleteIfSnippetFail);
} else { } else {
// do a local search // do a local search
this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation); this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
if (generateAbstracts) { if (generateAbstracts) {
this.rankingProcess.run(); // this is not started concurrently here on purpose! this.rankingProcess.run(); // this is not started concurrently here on purpose!
// compute index abstracts // compute index abstracts
@ -177,17 +177,17 @@ public final class SearchEvent {
final ReferenceContainer<WordReference> container = entry.getValue(); final ReferenceContainer<WordReference> container = entry.getValue();
assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + ASCII.String(container.getTermHash()) + ", wordhash = " + ASCII.String(wordhash); assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + ASCII.String(container.getTermHash()) + ", wordhash = " + ASCII.String(wordhash);
if (container.size() > maxcount) { if (container.size() > maxcount) {
IAmaxcounthash = wordhash; this.IAmaxcounthash = wordhash;
maxcount = container.size(); maxcount = container.size();
} }
l = FlatWordPartitionScheme.std.dhtDistance(wordhash, null, peers.mySeed()); l = FlatWordPartitionScheme.std.dhtDistance(wordhash, null, peers.mySeed());
if (l < mindhtdistance) { if (l < mindhtdistance) {
// calculate the word hash that is closest to our dht position // calculate the word hash that is closest to our dht position
mindhtdistance = l; mindhtdistance = l;
IAneardhthash = wordhash; this.IAneardhthash = wordhash;
} }
IACount.put(wordhash, LargeNumberCache.valueOf(container.size())); this.IACount.put(wordhash, LargeNumberCache.valueOf(container.size()));
IAResults.put(wordhash, WordReferenceFactory.compressIndex(container, null, 1000).toString()); this.IAResults.put(wordhash, WordReferenceFactory.compressIndex(container, null, 1000).toString());
} }
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
} else { } else {
@ -196,63 +196,78 @@ public final class SearchEvent {
// before a reading process wants to get results from it // before a reading process wants to get results from it
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
if (!this.rankingProcess.isAlive()) break; if (!this.rankingProcess.isAlive()) break;
try {Thread.sleep(10);} catch (InterruptedException e) {} try {Thread.sleep(10);} catch (final InterruptedException e) {}
} }
// this will reduce the maximum waiting time until results are available to 100 milliseconds // this will reduce the maximum waiting time until results are available to 100 milliseconds
// while we always get a good set of ranked data // while we always get a good set of ranked data
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 500, deleteIfSnippetFail); this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 500, deleteIfSnippetFail);
} }
// clean up events // clean up events
SearchEventCache.cleanupEvents(false); SearchEventCache.cleanupEvents(false);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.CLEANUP, "", 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.CLEANUP, "", 0, 0), false);
// store this search to a cache so it can be re-used // store this search to a cache so it can be re-used
if (MemoryControl.available() < 1024 * 1024 * 100) SearchEventCache.cleanupEvents(true); if (MemoryControl.available() < 1024 * 1024 * 100) SearchEventCache.cleanupEvents(true);
SearchEventCache.put(query.id(false), this); SearchEventCache.put(query.id(false), this);
} }
public ReferenceOrder getOrder() { public ReferenceOrder getOrder() {
return this.order; return this.order;
} }
public long getEventTime() { public long getEventTime() {
return this.eventTime; return this.eventTime;
} }
public void resetEventTime() { public void resetEventTime() {
this.eventTime = System.currentTimeMillis(); this.eventTime = System.currentTimeMillis();
} }
public QueryParams getQuery() { public QueryParams getQuery() {
return this.query; return this.query;
} }
public void setQuery(QueryParams query) { public void setQuery(final QueryParams query) {
this.query = query; this.query = query;
this.resultFetcher.query = query; this.resultFetcher.query = query;
} }
public void cleanup() { public void cleanup() {
this.resultFetcher.setCleanupState();
// stop all threads // stop all threads
if (primarySearchThreads != null) { if (this.primarySearchThreads != null) {
for (final yacySearch search : this.primarySearchThreads) { for (final yacySearch search : this.primarySearchThreads) {
if (search != null) synchronized (search) { if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt(); if (search.isAlive()) search.interrupt();
} }
} }
} }
if (secondarySearchThreads != null) { if (this.secondarySearchThreads != null) {
for (final yacySearch search : this.secondarySearchThreads) { for (final yacySearch search : this.secondarySearchThreads) {
if (search != null) synchronized (search) { if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt(); if (search.isAlive()) search.interrupt();
} }
} }
} }
// call the worker threads and ask them to stop
for (final Worker w: this.resultFetcher.workerThreads) {
if (w != null && w.isAlive()) {
w.pleaseStop();
w.interrupt();
// the interrupt may occur during a MD5 computation which is resistant against interruption
// therefore set some more interrupts on the process
int ic = 10;
while (ic-- > 0 & w.isAlive()) w.interrupt();
}
}
// clear all data structures // clear all data structures
if (this.preselectedPeerHashes != null) this.preselectedPeerHashes.clear(); if (this.preselectedPeerHashes != null) this.preselectedPeerHashes.clear();
if (this.localSearchThread != null) if (this.localSearchThread.isAlive()) this.localSearchThread.interrupt(); if (this.localSearchThread != null) if (this.localSearchThread.isAlive()) this.localSearchThread.interrupt();
@ -260,37 +275,37 @@ public final class SearchEvent {
if (this.IAResults != null) this.IAResults.clear(); if (this.IAResults != null) this.IAResults.clear();
if (this.heuristics != null) this.heuristics.clear(); if (this.heuristics != null) this.heuristics.clear();
} }
public Iterator<Map.Entry<byte[], String>> abstractsString() { public Iterator<Map.Entry<byte[], String>> abstractsString() {
return this.IAResults.entrySet().iterator(); return this.IAResults.entrySet().iterator();
} }
public String abstractsString(byte[] hash) { public String abstractsString(final byte[] hash) {
return this.IAResults.get(hash); return this.IAResults.get(hash);
} }
public Iterator<Map.Entry<byte[], Integer>> abstractsCount() { public Iterator<Map.Entry<byte[], Integer>> abstractsCount() {
return this.IACount.entrySet().iterator(); return this.IACount.entrySet().iterator();
} }
public int abstractsCount(byte[] hash) { public int abstractsCount(final byte[] hash) {
Integer i = this.IACount.get(hash); final Integer i = this.IACount.get(hash);
if (i == null) return -1; if (i == null) return -1;
return i.intValue(); return i.intValue();
} }
public byte[] getAbstractsMaxCountHash() { public byte[] getAbstractsMaxCountHash() {
return this.IAmaxcounthash; return this.IAmaxcounthash;
} }
public byte[] getAbstractsNearDHTHash() { public byte[] getAbstractsNearDHTHash() {
return this.IAneardhthash; return this.IAneardhthash;
} }
boolean anyRemoteSearchAlive() { boolean anyRemoteSearchAlive() {
// check primary search threads // check primary search threads
if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) { if ((this.primarySearchThreads != null) && (this.primarySearchThreads.length != 0)) {
for (final yacySearch primarySearchThread : primarySearchThreads) { for (final yacySearch primarySearchThread : this.primarySearchThreads) {
if ((primarySearchThread != null) && (primarySearchThread.isAlive())) return true; if ((primarySearchThread != null) && (primarySearchThread.isAlive())) return true;
} }
} }
@ -302,15 +317,15 @@ public final class SearchEvent {
} }
return false; return false;
} }
public yacySearch[] getPrimarySearchThreads() { public yacySearch[] getPrimarySearchThreads() {
return primarySearchThreads; return this.primarySearchThreads;
} }
public yacySearch[] getSecondarySearchThreads() { public yacySearch[] getSecondarySearchThreads() {
return secondarySearchThreads; return this.secondarySearchThreads;
} }
public RankingProcess getRankingResult() { public RankingProcess getRankingResult() {
return this.rankingProcess; return this.rankingProcess;
} }
@ -318,52 +333,52 @@ public final class SearchEvent {
public ScoreMap<String> getNamespaceNavigator() { public ScoreMap<String> getNamespaceNavigator() {
return this.rankingProcess.getNamespaceNavigator(); return this.rankingProcess.getNamespaceNavigator();
} }
public ScoreMap<String> getHostNavigator() { public ScoreMap<String> getHostNavigator() {
return this.rankingProcess.getHostNavigator(); return this.rankingProcess.getHostNavigator();
} }
public ScoreMap<String> getTopicNavigator(int count) { public ScoreMap<String> getTopicNavigator(final int count) {
// returns a set of words that are computed as toplist // returns a set of words that are computed as toplist
return this.rankingProcess.getTopicNavigator(count); return this.rankingProcess.getTopicNavigator(count);
} }
public ScoreMap<String> getAuthorNavigator() { public ScoreMap<String> getAuthorNavigator() {
// returns a list of authors so far seen on result set // returns a list of authors so far seen on result set
return this.rankingProcess.getAuthorNavigator(); return this.rankingProcess.getAuthorNavigator();
} }
public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) { public void addHeuristic(final byte[] urlhash, final String heuristicName, final boolean redundant) {
synchronized (this.heuristics) { synchronized (this.heuristics) {
this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant)); this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant));
} }
} }
public HeuristicResult getHeuristic(byte[] urlhash) { public HeuristicResult getHeuristic(final byte[] urlhash) {
synchronized (this.heuristics) { synchronized (this.heuristics) {
return this.heuristics.get(urlhash); return this.heuristics.get(urlhash);
} }
} }
public ResultEntry oneResult(final int item, long timeout) { public ResultEntry oneResult(final int item, final long timeout) {
if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || if ((this.query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ||
(query.domType == QueryParams.SEARCHDOM_CLUSTERALL)) { (this.query.domType == QueryParams.SEARCHDOM_CLUSTERALL)) {
// this is a search using remote search threads. Also the local // this is a search using remote search threads. Also the local
// search thread is started as background process // search thread is started as background process
if ((localSearchThread != null) && (localSearchThread.isAlive())) { if ((this.localSearchThread != null) && (this.localSearchThread.isAlive())) {
// in case that the local search takes longer than some other // in case that the local search takes longer than some other
// remote search requests, wait that the local process terminates first // remote search requests, wait that the local process terminates first
try {localSearchThread.join();} catch (InterruptedException e) {} try {this.localSearchThread.join();} catch (final InterruptedException e) {}
} }
} }
return this.resultFetcher.oneResult(item, timeout); return this.resultFetcher.oneResult(item, timeout);
} }
boolean secondarySearchStartet = false; boolean secondarySearchStartet = false;
public static class HeuristicResult /*implements Comparable<HeuristicResult>*/ { public static class HeuristicResult /*implements Comparable<HeuristicResult>*/ {
public final byte[] urlhash; public final String heuristicName; public final boolean redundant; public final byte[] urlhash; public final String heuristicName; public final boolean redundant;
public HeuristicResult(byte[] urlhash, String heuristicName, boolean redundant) { public HeuristicResult(final byte[] urlhash, final String heuristicName, final boolean redundant) {
this.urlhash = urlhash; this.heuristicName = heuristicName; this.redundant = redundant; this.urlhash = urlhash; this.heuristicName = heuristicName; this.redundant = redundant;
}/* }/*
public int compareTo(HeuristicResult o) { public int compareTo(HeuristicResult o) {
@ -376,33 +391,33 @@ public final class SearchEvent {
return Base64Order.enhancedCoder.equal(this.urlhash, ((HeuristicResult) o).urlhash); return Base64Order.enhancedCoder.equal(this.urlhash, ((HeuristicResult) o).urlhash);
}*/ }*/
} }
public class SecondarySearchSuperviser extends Thread { public class SecondarySearchSuperviser extends Thread {
// cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
// this relation contains the information where specific urls can be found in specific peers // this relation contains the information where specific urls can be found in specific peers
SortedMap<String, SortedMap<String, StringBuilder>> abstractsCache; SortedMap<String, SortedMap<String, StringBuilder>> abstractsCache;
SortedSet<String> checkedPeers; SortedSet<String> checkedPeers;
Semaphore trigger; Semaphore trigger;
public SecondarySearchSuperviser() { public SecondarySearchSuperviser() {
this.abstractsCache = new TreeMap<String, SortedMap<String, StringBuilder>>(); this.abstractsCache = new TreeMap<String, SortedMap<String, StringBuilder>>();
this.checkedPeers = new TreeSet<String>(); this.checkedPeers = new TreeSet<String>();
this.trigger = new Semaphore(0); this.trigger = new Semaphore(0);
} }
/** /**
* add a single abstract to the existing set of abstracts * add a single abstract to the existing set of abstracts
* @param wordhash * @param wordhash
* @param singleAbstract // a mapping from url-hashes to a string of peer-hashes * @param singleAbstract // a mapping from url-hashes to a string of peer-hashes
*/ */
public void addAbstract(String wordhash, final TreeMap<String, StringBuilder> singleAbstract) { public void addAbstract(final String wordhash, final TreeMap<String, StringBuilder> singleAbstract) {
final SortedMap<String, StringBuilder> oldAbstract; final SortedMap<String, StringBuilder> oldAbstract;
synchronized (abstractsCache) { synchronized (this.abstractsCache) {
oldAbstract = abstractsCache.get(wordhash); oldAbstract = this.abstractsCache.get(wordhash);
if (oldAbstract == null) { if (oldAbstract == null) {
// new abstracts in the cache // new abstracts in the cache
abstractsCache.put(wordhash, singleAbstract); this.abstractsCache.put(wordhash, singleAbstract);
return; return;
} }
} }
@ -421,11 +436,11 @@ public final class SearchEvent {
}.start(); }.start();
// abstractsCache.put(wordhash, oldAbstract); // put not necessary since it is sufficient to just change the value content (it stays assigned) // abstractsCache.put(wordhash, oldAbstract); // put not necessary since it is sufficient to just change the value content (it stays assigned)
} }
public void commitAbstract() { public void commitAbstract() {
this.trigger.release(); this.trigger.release();
} }
private String wordsFromPeer(final String peerhash, final StringBuilder urls) { private String wordsFromPeer(final String peerhash, final StringBuilder urls) {
Map.Entry<String, SortedMap<String, StringBuilder>> entry; Map.Entry<String, SortedMap<String, StringBuilder>> entry;
String word, url, wordlist = ""; String word, url, wordlist = "";
@ -454,7 +469,7 @@ public final class SearchEvent {
} }
return wordlist; return wordlist;
} }
@Override @Override
public void run() { public void run() {
try { try {
@ -465,16 +480,16 @@ public final class SearchEvent {
t++; t++;
if (t > 10) break; if (t > 10) break;
} }
} catch (InterruptedException e) { } catch (final InterruptedException e) {
// the thread was interrupted // the thread was interrupted
// do nohing // do nohing
} }
// the time-out was reached // the time-out was reached
} }
private void prepareSecondarySearch() { private void prepareSecondarySearch() {
if (abstractsCache == null || abstractsCache.size() != query.queryHashes.size()) return; // secondary search not possible (yet) if (this.abstractsCache == null || this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size()) return; // secondary search not possible (yet)
// catch up index abstracts and join them; then call peers again to submit their urls // catch up index abstracts and join them; then call peers again to submit their urls
/* /*
System.out.println("DEBUG-INDEXABSTRACT: " + abstractsCache.size() + " word references caught, " + query.queryHashes.size() + " needed"); System.out.println("DEBUG-INDEXABSTRACT: " + abstractsCache.size() + " word references caught, " + query.queryHashes.size() + " needed");
@ -482,23 +497,23 @@ public final class SearchEvent {
System.out.println("DEBUG-INDEXABSTRACT: hash " + entry.getKey() + ": " + ((query.queryHashes.has(entry.getKey().getBytes()) ? "NEEDED" : "NOT NEEDED") + "; " + entry.getValue().size() + " entries")); System.out.println("DEBUG-INDEXABSTRACT: hash " + entry.getKey() + ": " + ((query.queryHashes.has(entry.getKey().getBytes()) ? "NEEDED" : "NOT NEEDED") + "; " + entry.getValue().size() + " entries"));
} }
*/ */
// find out if there are enough references for all words that are searched // find out if there are enough references for all words that are searched
if (abstractsCache.size() != query.queryHashes.size()) return; if (this.abstractsCache.size() != SearchEvent.this.query.queryHashes.size()) return;
// join all the urlhash:peerlist relations: the resulting map has values with a combined peer-list list // join all the urlhash:peerlist relations: the resulting map has values with a combined peer-list list
final SortedMap<String, StringBuilder> abstractJoin = SetTools.joinConstructive(abstractsCache.values(), true); final SortedMap<String, StringBuilder> abstractJoin = SetTools.joinConstructive(this.abstractsCache.values(), true);
if (abstractJoin.isEmpty()) return; if (abstractJoin.isEmpty()) return;
// the join result is now a urlhash: peer-list relation // the join result is now a urlhash: peer-list relation
// generate a list of peers that have the urls for the joined search result // generate a list of peers that have the urls for the joined search result
final SortedMap<String, StringBuilder> secondarySearchURLs = new TreeMap<String, StringBuilder>(); // a (peerhash:urlhash-liststring) mapping final SortedMap<String, StringBuilder> secondarySearchURLs = new TreeMap<String, StringBuilder>(); // a (peerhash:urlhash-liststring) mapping
String url, peer; String url, peer;
StringBuilder urls, peerlist; StringBuilder urls, peerlist;
final String mypeerhash = peers.mySeed().hash; final String mypeerhash = SearchEvent.this.peers.mySeed().hash;
boolean mypeerinvolved = false; boolean mypeerinvolved = false;
int mypeercount; int mypeercount;
for (Map.Entry<String, StringBuilder> entry: abstractJoin.entrySet()) { for (final Map.Entry<String, StringBuilder> entry: abstractJoin.entrySet()) {
url = entry.getKey(); url = entry.getKey();
peerlist = entry.getValue(); peerlist = entry.getValue();
//System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peerlist); //System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peerlist);
@ -519,33 +534,39 @@ public final class SearchEvent {
} }
if (mypeercount == 1) mypeerinvolved = true; if (mypeercount == 1) mypeerinvolved = true;
} }
// compute words for secondary search and start the secondary searches // compute words for secondary search and start the secondary searches
String words; String words;
secondarySearchThreads = new yacySearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs.size()]; SearchEvent.this.secondarySearchThreads = new yacySearch[(mypeerinvolved) ? secondarySearchURLs.size() - 1 : secondarySearchURLs.size()];
int c = 0; int c = 0;
for (Map.Entry<String, StringBuilder> entry: secondarySearchURLs.entrySet()) { for (final Map.Entry<String, StringBuilder> entry: secondarySearchURLs.entrySet()) {
peer = entry.getKey(); peer = entry.getKey();
if (peer.equals(mypeerhash)) continue; // we don't need to ask ourself if (peer.equals(mypeerhash)) continue; // we don't need to ask ourself
if (checkedPeers.contains(peer)) continue; // do not ask a peer again if (this.checkedPeers.contains(peer)) continue; // do not ask a peer again
urls = entry.getValue(); urls = entry.getValue();
words = wordsFromPeer(peer, urls); words = wordsFromPeer(peer, urls);
if (words.length() == 0) continue; // ??? if (words.length() == 0) continue; // ???
assert words.length() >= 12 : "words = " + words; assert words.length() >= 12 : "words = " + words;
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words); //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words);
rankingProcess.moreFeeders(1); SearchEvent.this.rankingProcess.moreFeeders(1);
checkedPeers.add(peer); this.checkedPeers.add(peer);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( SearchEvent.this.secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls.toString(), 6000, query.getSegment(), peers, rankingProcess, peer, Switchboard.urlBlacklist, words, urls.toString(), 6000, SearchEvent.this.query.getSegment(), SearchEvent.this.peers, SearchEvent.this.rankingProcess, peer, Switchboard.urlBlacklist,
query.ranking, query.constraint, preselectedPeerHashes); SearchEvent.this.query.ranking, SearchEvent.this.query.constraint, SearchEvent.this.preselectedPeerHashes);
} }
} }
} }
public ResultFetcher result() { public ResultFetcher result() {
return this.resultFetcher; return this.resultFetcher;
} }
public boolean workerAlive() {
if (this.resultFetcher== null || this.resultFetcher.workerThreads == null) return false;
for (final Worker w: this.resultFetcher.workerThreads) if (w != null && w.isAlive()) return true;
return false;
}
} }

@ -36,7 +36,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
import de.anomic.search.ResultFetcher.Worker;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
public class SearchEventCache { public class SearchEventCache {
@ -67,18 +66,22 @@ public class SearchEventCache {
// the less memory is there, the less time is acceptable for elements in the cache // the less memory is there, the less time is acceptable for elements in the cache
final long memx = MemoryControl.available(); final long memx = MemoryControl.available();
final long acceptTime = memx > memlimitHigh ? eventLifetimeBigMem : memx > memlimitMedium ? eventLifetimeMediumMem : eventLifetimeShortMem; final long acceptTime = memx > memlimitHigh ? eventLifetimeBigMem : memx > memlimitMedium ? eventLifetimeMediumMem : eventLifetimeShortMem;
Map.Entry<String, SearchEvent> event; Map.Entry<String, SearchEvent> eventEntry;
final Iterator<Map.Entry<String, SearchEvent>> i = lastEvents.entrySet().iterator(); final Iterator<Map.Entry<String, SearchEvent>> i = lastEvents.entrySet().iterator();
SearchEvent event;
while (i.hasNext()) { while (i.hasNext()) {
event = i.next(); eventEntry = i.next();
if (all || event.getValue().getEventTime() + acceptTime < System.currentTimeMillis()) { event = eventEntry.getValue();
if (workerAlive(event.getValue())) { if (event == null) continue;
event.getValue().cleanup(); if (all || event.getEventTime() + acceptTime < System.currentTimeMillis()) {
} else { if (event.workerAlive()) {
i.remove(); event.cleanup();
cacheDelete++;
} }
} }
if (!event.workerAlive()) {
i.remove();
cacheDelete++;
}
} }
} }
@ -91,17 +94,11 @@ public class SearchEventCache {
public static int countAliveThreads() { public static int countAliveThreads() {
int alive = 0; int alive = 0;
for (final SearchEvent e: SearchEventCache.lastEvents.values()) { for (final SearchEvent e: SearchEventCache.lastEvents.values()) {
if (workerAlive(e)) alive++; if (e.workerAlive()) alive++;
} }
return alive; return alive;
} }
private static boolean workerAlive(final SearchEvent e) {
if (e == null || e.result() == null || e.result().workerThreads == null) return false;
for (final Worker w: e.result().workerThreads) if (w != null && w.isAlive()) return true;
return false;
}
private static SearchEvent dummyEvent = null; private static SearchEvent dummyEvent = null;
private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) { private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) {
@ -147,14 +144,14 @@ public class SearchEventCache {
int waitcount = 0; int waitcount = 0;
throttling : while (true) { throttling : while (true) {
final int allowedThreads = (int) Math.max(1, MemoryControl.available() / (query.snippetCacheStrategy == null ? 10 : 100) / 1024 / 1024); final int allowedThreads = (int) Math.max(1, MemoryControl.available() / (query.snippetCacheStrategy == null ? 3 : 30) / 1024 / 1024);
// make room if there are too many search events (they need a lot of RAM) // make room if there are too many search events (they need a lot of RAM)
if (SearchEventCache.lastEvents.size() > allowedThreads) { if (SearchEventCache.lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 1: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); Log.logWarning("SearchEventCache", "throttling phase 1: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
cleanupEvents(false); cleanupEvents(false);
} else break throttling; } else break throttling;
// if there are still some then delete just all // if there are still some then delete just all
if (SearchEventCache.lastEvents.size() > allowedThreads) { if (SearchEventCache.lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 2: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed"); Log.logWarning("SearchEventCache", "throttling phase 2: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
cleanupEvents(true); cleanupEvents(true);
} else break throttling; } else break throttling;
@ -167,6 +164,11 @@ public class SearchEventCache {
if (waitcount >= 10) return getDummyEvent(workTables, loader, query.getSegment()); if (waitcount >= 10) return getDummyEvent(workTables, loader, query.getSegment());
} }
if (waitcount > 0) {
// do not fetch snippets because that is most time-expensive
query.snippetCacheStrategy = null;
}
// check if there are too many other searches alive now // check if there are too many other searches alive now
Log.logInfo("SearchEventCache", "getEvent: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive"); Log.logInfo("SearchEventCache", "getEvent: " + SearchEventCache.lastEvents.size() + " in cache; " + countAliveThreads() + " alive");

@ -7,7 +7,7 @@
// $LastChangedBy$ // $LastChangedBy$
// //
// LICENSE // LICENSE
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or // the Free Software Foundation; either version 2 of the License, or
@ -41,19 +41,19 @@ import net.yacy.kelondro.util.ByteArray;
/** /**
* URI-object providing YaCy-hash computation * URI-object providing YaCy-hash computation
* *
* Hashes for URIs are split in several parts * Hashes for URIs are split in several parts
* For URIs pointing to resources not globally available, * For URIs pointing to resources not globally available,
* the domainhash-part gets one reserved value * the domainhash-part gets one reserved value
*/ */
public class DigestURI extends MultiProtocolURI implements Serializable { public class DigestURI extends MultiProtocolURI implements Serializable {
private static final long serialVersionUID = -1173233022912141885L; private static final long serialVersionUID = -1173233022912141885L;
public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter
// class variables // class variables
private byte[] hash; private byte[] hash;
/** /**
* Shortcut, calculate hash for shorted url/hostname * Shortcut, calculate hash for shorted url/hostname
* @param host * @param host
@ -65,13 +65,13 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
DigestURI url = null; DigestURI url = null;
try { try {
url = new DigestURI(h); url = new DigestURI(h);
} catch (MalformedURLException e) { } catch (final MalformedURLException e) {
Log.logException(e); Log.logException(e);
return null; return null;
} }
return (url == null) ? null : ASCII.String(url.hash(), 6, 6); return (url == null) ? null : ASCII.String(url.hash(), 6, 6);
} }
/** /**
* DigestURI from File * DigestURI from File
*/ */
@ -85,10 +85,10 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
public DigestURI(final String url) throws MalformedURLException { public DigestURI(final String url) throws MalformedURLException {
this(url, null); this(url, null);
} }
/** /**
* DigestURI from URI string, hash is already calculated * DigestURI from URI string, hash is already calculated
* @param url * @param url
* @param hash already calculated hash for url * @param hash already calculated hash for url
* @throws MalformedURLException * @throws MalformedURLException
*/ */
@ -96,7 +96,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
super(url); super(url);
this.hash = hash; this.hash = hash;
} }
/** /**
* DigestURI from general URI * DigestURI from general URI
* @param baseURL * @param baseURL
@ -105,7 +105,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
super(baseURL); super(baseURL);
this.hash = (baseURL instanceof DigestURI) ? ((DigestURI) baseURL).hash : null; this.hash = (baseURL instanceof DigestURI) ? ((DigestURI) baseURL).hash : null;
} }
/** /**
* DigestURI from general URI, hash already calculated * DigestURI from general URI, hash already calculated
* @param baseURL * @param baseURL
@ -115,12 +115,12 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
super(baseURL); super(baseURL);
this.hash = hash; this.hash = hash;
} }
public DigestURI(final MultiProtocolURI baseURL, String relPath) throws MalformedURLException { public DigestURI(final MultiProtocolURI baseURL, final String relPath) throws MalformedURLException {
super(baseURL, relPath); super(baseURL, relPath);
this.hash = null; this.hash = null;
} }
public DigestURI(final String protocol, final String host, final int port, final String path) throws MalformedURLException { public DigestURI(final String protocol, final String host, final int port, final String path) throws MalformedURLException {
super(protocol, host, port, path); super(protocol, host, port, path);
this.hash = null; this.hash = null;
@ -131,7 +131,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
*/ */
@Override @Override
public int hashCode() { public int hashCode() {
return ByteArray.hashCode(this.hash()); return ByteArray.hashCode(hash());
} }
public static final int flagTypeID(final String hash) { public static final int flagTypeID(final String hash) {
@ -163,29 +163,29 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
/** /**
* calculated YaCy-Hash of this URI * calculated YaCy-Hash of this URI
* *
* @note needs DNS lookup to check if the addresses domain is local * @note needs DNS lookup to check if the addresses domain is local
* that causes that this method may be very slow * that causes that this method may be very slow
* *
* @return hash * @return hash
*/ */
private final byte[] urlHashComputation() { private final byte[] urlHashComputation() {
// the url hash computation needs a DNS lookup to check if the addresses domain is local // the url hash computation needs a DNS lookup to check if the addresses domain is local
// that causes that this method may be very slow // that causes that this method may be very slow
assert this.hash == null; // should only be called if the hash was not computed before assert this.hash == null; // should only be called if the hash was not computed before
final int id = Domains.getDomainID(host); // id=7: tld is local final int id = Domains.getDomainID(this.host); // id=7: tld is local
final boolean isHTTP = isHTTP(); final boolean isHTTP = isHTTP();
int p = (host == null) ? -1 : this.host.lastIndexOf('.'); int p = (this.host == null) ? -1 : this.host.lastIndexOf('.');
String dom = (p > 0) ? dom = host.substring(0, p) : ""; String dom = (p > 0) ? dom = this.host.substring(0, p) : "";
p = dom.lastIndexOf('.'); // locate subdomain p = dom.lastIndexOf('.'); // locate subdomain
String subdom = ""; String subdom = "";
if (p > 0) { if (p > 0) {
subdom = dom.substring(0, p); subdom = dom.substring(0, p);
dom = dom.substring(p + 1); dom = dom.substring(p + 1);
} }
// find rootpath // find rootpath
int rootpathStart = 0; int rootpathStart = 0;
int rootpathEnd = this.path.length() - 1; int rootpathEnd = this.path.length() - 1;
@ -196,7 +196,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
p = this.path.indexOf('/', rootpathStart); p = this.path.indexOf('/', rootpathStart);
String rootpath = ""; String rootpath = "";
if (p > 0 && p < rootpathEnd) { if (p > 0 && p < rootpathEnd) {
rootpath = path.substring(rootpathStart, p); rootpath = this.path.substring(rootpathStart, p);
} }
// we collected enough information to compute the fragments that are // we collected enough information to compute the fragments that are
@ -209,27 +209,39 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
final StringBuilder hashs = new StringBuilder(12); final StringBuilder hashs = new StringBuilder(12);
assert hashs.length() == 0; assert hashs.length() == 0;
// form the 'local' part of the hash // form the 'local' part of the hash
String normalform = toNormalform(true, true, false, true); final String normalform = toNormalform(true, true, false, true);
String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform)); final String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform));
if (b64l.length() < 5) return null; if (b64l.length() < 5) return null;
hashs.append(b64l.substring(0, 5)); // 5 chars hashs.append(b64l.substring(0, 5)); // 5 chars
assert hashs.length() == 5; assert hashs.length() == 5;
hashs.append(subdomPortPath(subdom, port, rootpath)); // 1 char hashs.append(subdomPortPath(subdom, this.port, rootpath)); // 1 char
assert hashs.length() == 6; assert hashs.length() == 6;
// form the 'global' part of the hash // form the 'global' part of the hash
hashs.append(hosthash5(this.protocol, host, port)); // 5 chars hashs.append(hosthash5(this.protocol, this.host, this.port)); // 5 chars
assert hashs.length() == 11; assert hashs.length() == 11;
hashs.append(Base64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char hashs.append(Base64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char
assert hashs.length() == 12; assert hashs.length() == 12;
// return result hash // return result hash
byte[] b = ASCII.getBytes(hashs.toString()); final byte[] b = ASCII.getBytes(hashs.toString());
assert b.length == 12; assert b.length == 12;
return b; return b;
} }
/**
* return true if the protocol of the URL was 'http'
* this is not true if the protocol was 'https'
* @param hash
* @return true for url hashes that point to http services; false otherwise
*/
public static final boolean flag4HTTPset(final byte[] hash) {
assert hash.length == 12;
final byte flagbyte = hash[11];
return (flagbyte & 32) == 1;
}
private static char subdomPortPath(final String subdom, final int port, final String rootpath) { private static char subdomPortPath(final String subdom, final int port, final String rootpath) {
StringBuilder sb = new StringBuilder(subdom.length() + rootpath.length() + 8); final StringBuilder sb = new StringBuilder(subdom.length() + rootpath.length() + 8);
sb.append(subdom).append(':').append(Integer.toString(port)).append(':').append(rootpath); sb.append(subdom).append(':').append(Integer.toString(port)).append(':').append(rootpath);
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).charAt(0); return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).charAt(0);
} }
@ -237,13 +249,13 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
private static final char rootURLFlag0 = subdomPortPath("", 80, ""); private static final char rootURLFlag0 = subdomPortPath("", 80, "");
private static final char rootURLFlag1 = subdomPortPath("www", 80, ""); private static final char rootURLFlag1 = subdomPortPath("www", 80, "");
public static final boolean probablyRootURL(String urlHash) { public static final boolean probablyRootURL(final String urlHash) {
char c = urlHash.charAt(5); final char c = urlHash.charAt(5);
return c == rootURLFlag0 || c == rootURLFlag1; return c == rootURLFlag0 || c == rootURLFlag1;
} }
public static final boolean probablyRootURL(final byte[] urlHash) { public static final boolean probablyRootURL(final byte[] urlHash) {
char c = (char) urlHash[5]; final char c = (char) urlHash[5];
return c == rootURLFlag0 || c == rootURLFlag1; return c == rootURLFlag0 || c == rootURLFlag1;
} }
@ -251,12 +263,12 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
if (host == null) { if (host == null) {
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(protocol)).substring(0, 5); return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(protocol)).substring(0, 5);
} else { } else {
StringBuilder sb = new StringBuilder(host.length() + 15); final StringBuilder sb = new StringBuilder(host.length() + 15);
sb.append(protocol).append(':').append(host).append(':').append(Integer.toString(port)); sb.append(protocol).append(':').append(host).append(':').append(Integer.toString(port));
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).substring(0, 5); return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).substring(0, 5);
} }
} }
/** /**
* compute a 6-byte hash fragment that can be used to identify the domain of the url * compute a 6-byte hash fragment that can be used to identify the domain of the url
* @param protocol * @param protocol
@ -284,7 +296,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
public static final String hosthash6(final String host) { public static final String hosthash6(final String host) {
return hosthash6("http", host, 80); return hosthash6("http", host, 80);
} }
//private static String[] testTLDs = new String[] { "com", "net", "org", "uk", "fr", "de", "es", "it" }; //private static String[] testTLDs = new String[] { "com", "net", "org", "uk", "fr", "de", "es", "it" };
public static final int domLengthEstimation(final byte[] urlHashBytes) { public static final int domLengthEstimation(final byte[] urlHashBytes) {

@ -2,7 +2,7 @@
* HandleMap * HandleMap
* Copyright 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany * Copyright 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First released 08.04.2008 at http://yacy.net * First released 08.04.2008 at http://yacy.net
* *
* $LastChangedDate$ * $LastChangedDate$
* $LastChangedRevision$ * $LastChangedRevision$
* $LastChangedBy$ * $LastChangedBy$
@ -11,12 +11,12 @@
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either * License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version. * version 2.1 of the License, or (at your option) any later version.
* *
* This library is distributed in the hope that it will be useful, * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. * Lesser General Public License for more details.
* *
* You should have received a copy of the GNU Lesser General Public License * You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt * along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>. * If not, see <http://www.gnu.org/licenses/>.
@ -51,10 +51,10 @@ import net.yacy.kelondro.order.CloneableIterator;
public final class HandleMap implements Iterable<Row.Entry> { public final class HandleMap implements Iterable<Row.Entry> {
private final Row rowdef; private final Row rowdef;
private RAMIndexCluster index; private RAMIndexCluster index;
/** /**
* initialize a HandleMap * initialize a HandleMap
* This may store a key and a long value for each key. * This may store a key and a long value for each key.
@ -63,9 +63,9 @@ public final class HandleMap implements Iterable<Row.Entry> {
* @param objectOrder * @param objectOrder
* @param space * @param space
*/ */
public HandleMap(final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace, String name) { public HandleMap(final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace, final String name) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-" + idxbytes + " {b256}")}, objectOrder); this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-" + idxbytes + " {b256}")}, objectOrder);
this.index = new RAMIndexCluster(name, rowdef, spread(expectedspace)); this.index = new RAMIndexCluster(name, this.rowdef, spread(expectedspace));
} }
/** /**
@ -73,8 +73,8 @@ public final class HandleMap implements Iterable<Row.Entry> {
* @param keylength * @param keylength
* @param objectOrder * @param objectOrder
* @param file * @param file
* @throws IOException * @throws IOException
* @throws RowSpaceExceededException * @throws RowSpaceExceededException
*/ */
public HandleMap(final int keylength, final ByteOrder objectOrder, final int idxbytes, final File file) throws IOException, RowSpaceExceededException { public HandleMap(final int keylength, final ByteOrder objectOrder, final int idxbytes, final File file) throws IOException, RowSpaceExceededException {
this(keylength, objectOrder, idxbytes, (int) (file.length() / (keylength + idxbytes)), file.getAbsolutePath()); this(keylength, objectOrder, idxbytes, (int) (file.length() / (keylength + idxbytes)), file.getAbsolutePath());
@ -82,7 +82,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
InputStream is; InputStream is;
try { try {
is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
} catch (OutOfMemoryError e) { } catch (final OutOfMemoryError e) {
is = new FileInputStream(file); is = new FileInputStream(file);
} }
if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is); if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is);
@ -99,25 +99,25 @@ public final class HandleMap implements Iterable<Row.Entry> {
is = null; is = null;
assert this.index.size() == file.length() / (keylength + idxbytes); assert this.index.size() == file.length() / (keylength + idxbytes);
} }
public void trim() { public void trim() {
this.index.trim(); this.index.trim();
} }
public long mem() { public long mem() {
return index.mem(); return this.index.mem();
} }
private static final int spread(int expectedspace) { private static final int spread(final int expectedspace) {
return Math.min(Runtime.getRuntime().availableProcessors(), Math.max(1, expectedspace / 3000)); return Math.min(Runtime.getRuntime().availableProcessors(), Math.max(1, expectedspace / 3000));
} }
public final int[] saturation() { public final int[] saturation() {
int keym = 0; int keym = 0;
int valm = this.rowdef.width(1); int valm = this.rowdef.width(1);
int valc; int valc;
byte[] lastk = null, thisk; byte[] lastk = null, thisk;
for (Row.Entry row: this) { for (final Row.Entry row: this) {
// check length of key // check length of key
if (lastk == null) { if (lastk == null) {
lastk = row.bytes(); lastk = row.bytes();
@ -142,7 +142,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
} }
return a.length; return a.length;
} }
/** /**
* write a dump of the index to a file. All entries are written in order * write a dump of the index to a file. All entries are written in order
* which makes it possible to read them again in a fast way * which makes it possible to read them again in a fast way
@ -159,7 +159,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
OutputStream os; OutputStream os;
try { try {
os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024); os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024);
} catch (OutOfMemoryError e) { } catch (final OutOfMemoryError e) {
os = new FileOutputStream(tmp); os = new FileOutputStream(tmp);
} }
if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os); if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os);
@ -177,33 +177,33 @@ public final class HandleMap implements Iterable<Row.Entry> {
} }
public final Row row() { public final Row row() {
return index.row(); return this.index.row();
} }
public final void clear() { public final void clear() {
index.clear(); this.index.clear();
} }
public final byte[] smallestKey() { public final byte[] smallestKey() {
return index.smallestKey(); return this.index.smallestKey();
} }
public final byte[] largestKey() { public final byte[] largestKey() {
return index.largestKey(); return this.index.largestKey();
} }
public final boolean has(final byte[] key) { public final boolean has(final byte[] key) {
assert (key != null); assert (key != null);
return index.has(key); return this.index.has(key);
} }
public final long get(final byte[] key) { public final long get(final byte[] key) {
assert (key != null); assert (key != null);
final Row.Entry indexentry = index.get(key); final Row.Entry indexentry = this.index.get(key);
if (indexentry == null) return -1; if (indexentry == null) return -1;
return indexentry.getColLong(1); return indexentry.getColLong(1);
} }
/** /**
* Adds the key-value pair to the index. * Adds the key-value pair to the index.
* @param key the index key * @param key the index key
@ -218,58 +218,58 @@ public final class HandleMap implements Iterable<Row.Entry> {
final Row.Entry newentry = this.rowdef.newEntry(); final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, l); newentry.setCol(1, l);
final Row.Entry oldentry = index.replace(newentry); final Row.Entry oldentry = this.index.replace(newentry);
if (oldentry == null) return -1; if (oldentry == null) return -1;
return oldentry.getColLong(1); return oldentry.getColLong(1);
} }
public final void putUnique(final byte[] key, final long l) throws RowSpaceExceededException { public final void putUnique(final byte[] key, final long l) throws RowSpaceExceededException {
assert l >= 0 : "l = " + l; assert l >= 0 : "l = " + l;
assert (key != null); assert (key != null);
final Row.Entry newentry = this.rowdef.newEntry(); final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, l); newentry.setCol(1, l);
index.addUnique(newentry); this.index.addUnique(newentry);
} }
public final long add(final byte[] key, final long a) throws RowSpaceExceededException { public final long add(final byte[] key, final long a) throws RowSpaceExceededException {
assert key != null; assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
synchronized (index) { synchronized (this.index) {
final Row.Entry indexentry = index.get(key); final Row.Entry indexentry = this.index.get(key);
if (indexentry == null) { if (indexentry == null) {
final Row.Entry newentry = this.rowdef.newEntry(); final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, a); newentry.setCol(1, a);
index.addUnique(newentry); this.index.addUnique(newentry);
return 1; return 1;
} }
final long i = indexentry.getColLong(1) + a; final long i = indexentry.getColLong(1) + a;
indexentry.setCol(1, i); indexentry.setCol(1, i);
index.put(indexentry); this.index.put(indexentry);
return i; return i;
} }
} }
public final long inc(final byte[] key) throws RowSpaceExceededException { public final long inc(final byte[] key) throws RowSpaceExceededException {
return add(key, 1); return add(key, 1);
} }
public final long dec(final byte[] key) throws RowSpaceExceededException { public final long dec(final byte[] key) throws RowSpaceExceededException {
return add(key, -1); return add(key, -1);
} }
public final ArrayList<long[]> removeDoubles() throws RowSpaceExceededException { public final ArrayList<long[]> removeDoubles() throws RowSpaceExceededException {
final ArrayList<long[]> report = new ArrayList<long[]>(); final ArrayList<long[]> report = new ArrayList<long[]>();
long[] is; long[] is;
int c; int c;
long l; long l;
final int initialSize = this.size(); final int initialSize = size();
ArrayList<RowCollection> rd = index.removeDoubles(); final ArrayList<RowCollection> rd = this.index.removeDoubles();
for (final RowCollection rowset: rd) { for (final RowCollection rowset: rd) {
is = new long[rowset.size()]; is = new long[rowset.size()];
c = 0; c = 0;
for (Row.Entry e: rowset) { for (final Row.Entry e: rowset) {
l = e.getColLong(1); l = e.getColLong(1);
assert l < initialSize : "l = " + l + ", initialSize = " + initialSize; assert l < initialSize : "l = " + l + ", initialSize = " + initialSize;
is[c++] = l; is[c++] = l;
@ -278,60 +278,60 @@ public final class HandleMap implements Iterable<Row.Entry> {
} }
return report; return report;
} }
public final ArrayList<byte[]> top(int count) { public final ArrayList<byte[]> top(final int count) {
List<Row.Entry> list0 = index.top(count); final List<Row.Entry> list0 = this.index.top(count);
ArrayList<byte[]> list = new ArrayList<byte[]>(); final ArrayList<byte[]> list = new ArrayList<byte[]>();
for (Row.Entry entry: list0) { for (final Row.Entry entry: list0) {
list.add(entry.getPrimaryKeyBytes()); list.add(entry.getPrimaryKeyBytes());
} }
return list; return list;
} }
public final synchronized long remove(final byte[] key) { public final synchronized long remove(final byte[] key) {
assert (key != null); assert (key != null);
final Row.Entry indexentry; final Row.Entry indexentry;
synchronized (index) { synchronized (this.index) {
final boolean exist = index.has(key); final boolean exist = this.index.has(key);
if (!exist) return -1; if (!exist) return -1;
final int s = index.size(); final int s = this.index.size();
final long m = index.mem(); final long m = this.index.mem();
indexentry = index.remove(key); indexentry = this.index.remove(key);
assert (indexentry != null); assert (indexentry != null);
assert index.size() < s : "s = " + s + ", index.size() = " + index.size(); assert this.index.size() < s : "s = " + s + ", index.size() = " + this.index.size();
assert index.mem() <= m : "m = " + m + ", index.mem() = " + index.mem(); assert this.index.mem() <= m : "m = " + m + ", index.mem() = " + this.index.mem();
} }
if (indexentry == null) return -1; if (indexentry == null) return -1;
return indexentry.getColLong(1); return indexentry.getColLong(1);
} }
public final long removeone() { public final long removeone() {
final Row.Entry indexentry = index.removeOne(); final Row.Entry indexentry = this.index.removeOne();
if (indexentry == null) return -1; if (indexentry == null) return -1;
return indexentry.getColLong(1); return indexentry.getColLong(1);
} }
public final int size() { public final int size() {
return index.size(); return this.index.size();
} }
public final boolean isEmpty() { public final boolean isEmpty() {
return index.isEmpty(); return this.index.isEmpty();
} }
public final CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) { public final CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
return index.keys(up, firstKey); return this.index.keys(up, firstKey);
} }
public final CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) { public final CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) {
return index.rows(up, firstKey); return this.index.rows(up, firstKey);
} }
public final void close() { public final void close() {
index.close(); this.index.close();
index = null; this.index = null;
} }
/** /**
* this method creates a concurrent thread that can take entries that are used to initialize the map * this method creates a concurrent thread that can take entries that are used to initialize the map
* it should be used when a HandleMap is initialized when a file is read. Concurrency of FileIO and * it should be used when a HandleMap is initialized when a file is read. Concurrency of FileIO and
@ -342,7 +342,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
* @param bufferSize * @param bufferSize
* @return * @return
*/ */
public final static initDataConsumer asynchronusInitializer(String name, final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace) { public final static initDataConsumer asynchronusInitializer(final String name, final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace) {
final initDataConsumer initializer = new initDataConsumer(new HandleMap(keylength, objectOrder, idxbytes, expectedspace, name)); final initDataConsumer initializer = new initDataConsumer(new HandleMap(keylength, objectOrder, idxbytes, expectedspace, name));
final ExecutorService service = Executors.newSingleThreadExecutor(); final ExecutorService service = Executors.newSingleThreadExecutor();
initializer.setResult(service.submit(initializer)); initializer.setResult(service.submit(initializer));
@ -358,49 +358,51 @@ public final class HandleMap implements Iterable<Row.Entry> {
this.l = l; this.l = l;
} }
} }
protected static final entry poisonEntry = new entry(new byte[0], 0); protected static final entry poisonEntry = new entry(new byte[0], 0);
public final static class initDataConsumer implements Callable<HandleMap> { public final static class initDataConsumer implements Callable<HandleMap> {
private final BlockingQueue<entry> cache; private final BlockingQueue<entry> cache;
private final HandleMap map; private final HandleMap map;
private Future<HandleMap> result; private Future<HandleMap> result;
public initDataConsumer(final HandleMap map) { public initDataConsumer(final HandleMap map) {
this.map = map; this.map = map;
cache = new LinkedBlockingQueue<entry>(); this.cache = new LinkedBlockingQueue<entry>();
} }
protected final void setResult(final Future<HandleMap> result) { protected final void setResult(final Future<HandleMap> result) {
this.result = result; this.result = result;
} }
/** /**
* hand over another entry that shall be inserted into the HandleMap with an addl method * hand over another entry that shall be inserted into the HandleMap with an addl method
* @param key * @param key
* @param l * @param l
*/ */
public final void consume(final byte[] key, final long l) { public final void consume(final byte[] key, final long l) {
try { while (true) try {
cache.put(new entry(key, l)); this.cache.put(new entry(key, l));
} catch (InterruptedException e) { break;
Log.logException(e); } catch (final InterruptedException e) {
continue;
} }
} }
/** /**
* to signal the initialization thread that no more entries will be submitted with consumer() * to signal the initialization thread that no more entries will be submitted with consumer()
* this method must be called. The process will not terminate if this is not called before. * this method must be called. The process will not terminate if this is not called before.
*/ */
public final void finish() { public final void finish() {
try { while (true) try {
cache.put(poisonEntry); this.cache.put(poisonEntry);
} catch (InterruptedException e) { break;
Log.logException(e); } catch (final InterruptedException e) {
continue;
} }
} }
/** /**
* this must be called after a finish() was called. this method blocks until all entries * this must be called after a finish() was called. this method blocks until all entries
* had been processed, and the content was sorted. It returns the HandleMap * had been processed, and the content was sorted. It returns the HandleMap
@ -412,27 +414,32 @@ public final class HandleMap implements Iterable<Row.Entry> {
public final HandleMap result() throws InterruptedException, ExecutionException { public final HandleMap result() throws InterruptedException, ExecutionException {
return this.result.get(); return this.result.get();
} }
public final HandleMap call() throws IOException { public final HandleMap call() throws IOException {
try { try {
entry c; finishloop: while (true) {
while ((c = cache.take()) != poisonEntry) { entry c;
map.putUnique(c.key, c.l); try {
while ((c = this.cache.take()) != poisonEntry) {
this.map.putUnique(c.key, c.l);
}
break finishloop;
} catch (final InterruptedException e) {
continue finishloop;
}
} }
} catch (InterruptedException e) { } catch (final RowSpaceExceededException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e); Log.logException(e);
} }
return map; return this.map;
} }
public void close() { public void close() {
this.map.close(); this.map.close();
} }
} }
public Iterator<Row.Entry> iterator() { public Iterator<Row.Entry> iterator() {
return this.rows(true, null); return rows(true, null);
} }
} }

@ -1,4 +1,4 @@
// Log.java // Log.java
// ------------------------------------- // -------------------------------------
// (C) by Michael Peter Christen; mc@yacy.net // (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de // first published on http://www.anomic.de
@ -61,7 +61,7 @@ public final class Log {
public static final char LOGTOKEN_FINEST = 'D'; public static final char LOGTOKEN_FINEST = 'D';
private final Logger theLogger; private final Logger theLogger;
public Log(final String appName) { public Log(final String appName) {
this.theLogger = Logger.getLogger(appName); this.theLogger = Logger.getLogger(appName);
//this.theLogger.setLevel(Level.FINEST); // set a default level //this.theLogger.setLevel(Level.FINEST); // set a default level
@ -70,15 +70,15 @@ public final class Log {
public final void setLevel(final Level newLevel) { public final void setLevel(final Level newLevel) {
this.theLogger.setLevel(newLevel); this.theLogger.setLevel(newLevel);
} }
public final void logSevere(final String message) { public final void logSevere(final String message) {
enQueueLog(this.theLogger, Level.SEVERE, message); enQueueLog(this.theLogger, Level.SEVERE, message);
} }
public final void logSevere(final String message, final Throwable thrown) { public final void logSevere(final String message, final Throwable thrown) {
enQueueLog(this.theLogger, Level.SEVERE, message, thrown); enQueueLog(this.theLogger, Level.SEVERE, message, thrown);
} }
public final boolean isSevere() { public final boolean isSevere() {
return this.theLogger.isLoggable(Level.SEVERE); return this.theLogger.isLoggable(Level.SEVERE);
} }
@ -86,23 +86,23 @@ public final class Log {
public final void logWarning(final String message) { public final void logWarning(final String message) {
enQueueLog(this.theLogger, Level.WARNING, message); enQueueLog(this.theLogger, Level.WARNING, message);
} }
public final void logWarning(final String message, final Throwable thrown) { public final void logWarning(final String message, final Throwable thrown) {
enQueueLog(this.theLogger, Level.WARNING, message, thrown); enQueueLog(this.theLogger, Level.WARNING, message, thrown);
} }
public final boolean isWarning() { public final boolean isWarning() {
return this.theLogger.isLoggable(Level.WARNING); return this.theLogger.isLoggable(Level.WARNING);
} }
public final void logConfig(final String message) { public final void logConfig(final String message) {
enQueueLog(this.theLogger, Level.CONFIG, message); enQueueLog(this.theLogger, Level.CONFIG, message);
} }
public final void logConfig(final String message, final Throwable thrown) { public final void logConfig(final String message, final Throwable thrown) {
enQueueLog(this.theLogger, Level.CONFIG, message, thrown); enQueueLog(this.theLogger, Level.CONFIG, message, thrown);
} }
public final boolean isConfig() { public final boolean isConfig() {
return this.theLogger.isLoggable(Level.CONFIG); return this.theLogger.isLoggable(Level.CONFIG);
} }
@ -110,11 +110,11 @@ public final class Log {
public final void logInfo(final String message) { public final void logInfo(final String message) {
enQueueLog(this.theLogger, Level.INFO, message); enQueueLog(this.theLogger, Level.INFO, message);
} }
public final void logInfo(final String message, final Throwable thrown) { public final void logInfo(final String message, final Throwable thrown) {
enQueueLog(this.theLogger, Level.INFO, message, thrown); enQueueLog(this.theLogger, Level.INFO, message, thrown);
} }
public boolean isInfo() { public boolean isInfo() {
return this.theLogger.isLoggable(Level.INFO); return this.theLogger.isLoggable(Level.INFO);
} }
@ -122,11 +122,11 @@ public final class Log {
public final void logFine(final String message) { public final void logFine(final String message) {
enQueueLog(this.theLogger, Level.FINE, message); enQueueLog(this.theLogger, Level.FINE, message);
} }
public final void logFine(final String message, final Throwable thrown) { public final void logFine(final String message, final Throwable thrown) {
enQueueLog(this.theLogger, Level.FINE, message, thrown); enQueueLog(this.theLogger, Level.FINE, message, thrown);
} }
public final boolean isFine() { public final boolean isFine() {
return this.theLogger.isLoggable(Level.FINE); return this.theLogger.isLoggable(Level.FINE);
} }
@ -134,32 +134,32 @@ public final class Log {
public final void logFiner(final String message) { public final void logFiner(final String message) {
enQueueLog(this.theLogger, Level.FINER, message); enQueueLog(this.theLogger, Level.FINER, message);
} }
public final void logFiner(final String message, final Throwable thrown) { public final void logFiner(final String message, final Throwable thrown) {
enQueueLog(this.theLogger, Level.FINER, message, thrown); enQueueLog(this.theLogger, Level.FINER, message, thrown);
} }
public final boolean isFiner() { public final boolean isFiner() {
return this.theLogger.isLoggable(Level.FINER); return this.theLogger.isLoggable(Level.FINER);
} }
public final void logFinest(final String message) { public final void logFinest(final String message) {
enQueueLog(this.theLogger, Level.FINEST, message); enQueueLog(this.theLogger, Level.FINEST, message);
} }
public final void logFinest(final String message, final Throwable thrown) { public final void logFinest(final String message, final Throwable thrown) {
enQueueLog(this.theLogger, Level.FINEST, message, thrown); enQueueLog(this.theLogger, Level.FINEST, message, thrown);
} }
public final boolean isFinest() { public final boolean isFinest() {
return this.theLogger.isLoggable(Level.FINEST); return this.theLogger.isLoggable(Level.FINEST);
} }
public final boolean isLoggable(final Level level) { public final boolean isLoggable(final Level level) {
return this.theLogger.isLoggable(level); return this.theLogger.isLoggable(level);
} }
// static log messages // static log messages
public final static void logSevere(final String appName, final String message) { public final static void logSevere(final String appName, final String message) {
enQueueLog(appName, Level.SEVERE, message); enQueueLog(appName, Level.SEVERE, message);
@ -167,7 +167,7 @@ public final class Log {
public final static void logSevere(final String appName, final String message, final Throwable thrown) { public final static void logSevere(final String appName, final String message, final Throwable thrown) {
enQueueLog(appName, Level.SEVERE, message, thrown); enQueueLog(appName, Level.SEVERE, message, thrown);
} }
public final static void logWarning(final String appName, final String message) { public final static void logWarning(final String appName, final String message) {
enQueueLog(appName, Level.WARNING, message); enQueueLog(appName, Level.WARNING, message);
} }
@ -177,21 +177,21 @@ public final class Log {
public final static void logWarning(final String appName, final String message, final Throwable thrown) { public final static void logWarning(final String appName, final String message, final Throwable thrown) {
enQueueLog(appName, Level.WARNING, message, thrown); enQueueLog(appName, Level.WARNING, message, thrown);
} }
public final static void logConfig(final String appName, final String message) { public final static void logConfig(final String appName, final String message) {
enQueueLog(appName, Level.CONFIG, message); enQueueLog(appName, Level.CONFIG, message);
} }
public final static void logConfig(final String appName, final String message, final Throwable thrown) { public final static void logConfig(final String appName, final String message, final Throwable thrown) {
enQueueLog(appName, Level.CONFIG, message, thrown); enQueueLog(appName, Level.CONFIG, message, thrown);
} }
public final static void logInfo(final String appName, final String message) { public final static void logInfo(final String appName, final String message) {
enQueueLog(appName, Level.INFO, message); enQueueLog(appName, Level.INFO, message);
} }
public final static void logInfo(final String appName, final String message, final Throwable thrown) { public final static void logInfo(final String appName, final String message, final Throwable thrown) {
enQueueLog(appName, Level.INFO, message, thrown); enQueueLog(appName, Level.INFO, message, thrown);
} }
public final static void logFine(final String appName, final String message) { public final static void logFine(final String appName, final String message) {
enQueueLog(appName, Level.FINE, message); enQueueLog(appName, Level.FINE, message);
} }
@ -200,73 +200,73 @@ public final class Log {
} }
public final static boolean isFine(final String appName) { public final static boolean isFine(final String appName) {
return Logger.getLogger(appName).isLoggable(Level.FINE); return Logger.getLogger(appName).isLoggable(Level.FINE);
} }
public final static void logFiner(final String appName, final String message) { public final static void logFiner(final String appName, final String message) {
enQueueLog(appName, Level.FINER, message); enQueueLog(appName, Level.FINER, message);
} }
public final static void logFiner(final String appName, final String message, final Throwable thrown) { public final static void logFiner(final String appName, final String message, final Throwable thrown) {
enQueueLog(appName, Level.FINER, message, thrown); enQueueLog(appName, Level.FINER, message, thrown);
} }
public final static void logFinest(final String appName, final String message) { public final static void logFinest(final String appName, final String message) {
enQueueLog(appName, Level.FINEST, message); enQueueLog(appName, Level.FINEST, message);
} }
public final static void logFinest(final String appName, final String message, final Throwable thrown) { public final static void logFinest(final String appName, final String message, final Throwable thrown) {
enQueueLog(appName, Level.FINEST, message, thrown); enQueueLog(appName, Level.FINEST, message, thrown);
} }
public final static boolean isFinest(final String appName) { public final static boolean isFinest(final String appName) {
return Logger.getLogger(appName).isLoggable(Level.FINEST); return Logger.getLogger(appName).isLoggable(Level.FINEST);
} }
private final static void enQueueLog(final Logger logger, final Level level, final String message, final Throwable thrown) { private final static void enQueueLog(final Logger logger, final Level level, final String message, final Throwable thrown) {
if (logRunnerThread == null || !logRunnerThread.isAlive()) { if (logRunnerThread == null || !logRunnerThread.isAlive()) {
logger.log(level, message, thrown); logger.log(level, message, thrown);
} else { } else {
try { try {
logQueue.put(new logEntry(logger, level, message, thrown)); logQueue.put(new logEntry(logger, level, message, thrown));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
logger.log(level, message, thrown); logger.log(level, message, thrown);
} }
} }
} }
private final static void enQueueLog(final Logger logger, final Level level, final String message) { private final static void enQueueLog(final Logger logger, final Level level, final String message) {
if (logRunnerThread == null || !logRunnerThread.isAlive()) { if (logRunnerThread == null || !logRunnerThread.isAlive()) {
logger.log(level, message); logger.log(level, message);
} else { } else {
try { try {
logQueue.put(new logEntry(logger, level, message)); logQueue.put(new logEntry(logger, level, message));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
logger.log(level, message); logger.log(level, message);
} }
} }
} }
private final static void enQueueLog(final String loggername, final Level level, final String message, final Throwable thrown) { private final static void enQueueLog(final String loggername, final Level level, final String message, final Throwable thrown) {
if (logRunnerThread == null || !logRunnerThread.isAlive()) { if (logRunnerThread == null || !logRunnerThread.isAlive()) {
Logger.getLogger(loggername).log(level, message, thrown); Logger.getLogger(loggername).log(level, message, thrown);
} else { } else {
try { try {
logQueue.put(new logEntry(loggername, level, message, thrown)); logQueue.put(new logEntry(loggername, level, message, thrown));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Logger.getLogger(loggername).log(level, message, thrown); Logger.getLogger(loggername).log(level, message, thrown);
} }
} }
} }
private final static void enQueueLog(final String loggername, final Level level, final String message) { private final static void enQueueLog(final String loggername, final Level level, final String message) {
if (logRunnerThread == null || !logRunnerThread.isAlive()) { if (logRunnerThread == null || !logRunnerThread.isAlive()) {
Logger.getLogger(loggername).log(level, message); Logger.getLogger(loggername).log(level, message);
} else { } else {
try { try {
logQueue.put(new logEntry(loggername, level, message)); logQueue.put(new logEntry(loggername, level, message));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Logger.getLogger(loggername).log(level, message); Logger.getLogger(loggername).log(level, message);
} }
} }
} }
protected final static class logEntry { protected final static class logEntry {
public final Logger logger; public final Logger logger;
public final String loggername; public final String loggername;
@ -309,7 +309,7 @@ public final class Log {
this.thrown = null; this.thrown = null;
} }
} }
protected final static logEntry poison = new logEntry(); protected final static logEntry poison = new logEntry();
protected final static BlockingQueue<logEntry> logQueue = new LinkedBlockingQueue<logEntry>(); protected final static BlockingQueue<logEntry> logQueue = new LinkedBlockingQueue<logEntry>();
private final static logRunner logRunnerThread = new logRunner(); private final static logRunner logRunnerThread = new logRunner();
@ -322,7 +322,7 @@ public final class Log {
public logRunner() { public logRunner() {
super("Log Runner"); super("Log Runner");
} }
@Override @Override
public void run() { public void run() {
logEntry entry; logEntry entry;
@ -344,13 +344,13 @@ public final class Log {
} }
} }
} }
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
} }
} }
public final static void configureLogging(final File dataPath, final File appPath, final File loggingConfigFile) throws SecurityException, FileNotFoundException, IOException { public final static void configureLogging(final File dataPath, final File appPath, final File loggingConfigFile) throws SecurityException, FileNotFoundException, IOException {
FileInputStream fileIn = null; FileInputStream fileIn = null;
try { try {
@ -373,13 +373,13 @@ public final class Log {
// generating the root logger // generating the root logger
final Logger logger = Logger.getLogger(""); final Logger logger = Logger.getLogger("");
logger.setUseParentHandlers(false); logger.setUseParentHandlers(false);
//for (Handler h: logger.getHandlers()) logger.removeHandler(h); //for (Handler h: logger.getHandlers()) logger.removeHandler(h);
if (!dataPath.getAbsolutePath().equals(appPath.getAbsolutePath())) { if (!dataPath.getAbsolutePath().equals(appPath.getAbsolutePath())) {
final FileHandler handler = new FileHandler(logPattern, 1024*1024, 20, true); final FileHandler handler = new FileHandler(logPattern, 1024*1024, 20, true);
logger.addHandler(handler); logger.addHandler(handler);
} }
// redirect uncaught exceptions to logging // redirect uncaught exceptions to logging
final Log exceptionLog = new Log("UNCAUGHT-EXCEPTION"); final Log exceptionLog = new Log("UNCAUGHT-EXCEPTION");
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler(){ Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler(){
@ -398,16 +398,16 @@ public final class Log {
if (fileIn != null) try {fileIn.close();}catch(final Exception e){} if (fileIn != null) try {fileIn.close();}catch(final Exception e){}
} }
} }
public final static void shutdown() { public final static void shutdown() {
if (logRunnerThread == null || !logRunnerThread.isAlive()) return; if (logRunnerThread == null || !logRunnerThread.isAlive()) return;
try { try {
logQueue.put(poison); logQueue.put(poison);
logRunnerThread.join(10000); logRunnerThread.join(1000);
} catch (InterruptedException e) { } catch (final InterruptedException e) {
} }
} }
public final static String format(final String s, int n, final int fillChar) { public final static String format(final String s, int n, final int fillChar) {
final int l = s.length(); final int l = s.length();
if (l >= n) return s; if (l >= n) return s;
@ -415,11 +415,11 @@ public final class Log {
for (final int i = l + n; i > n; n--) sb.insert(0, fillChar); for (final int i = l + n; i > n; n--) sb.insert(0, fillChar);
return sb.toString(); return sb.toString();
} }
public final static boolean allZero(final byte[] a) { public final static boolean allZero(final byte[] a) {
return allZero(a, 0, a.length); return allZero(a, 0, a.length);
} }
public final static boolean allZero(final byte[] a, final int astart, final int alength) { public final static boolean allZero(final byte[] a, final int astart, final int alength) {
for (int i = 0; i < alength; i++) if (a[astart + i] != 0) return false; for (int i = 0; i < alength; i++) if (a[astart + i] != 0) return false;
return true; return true;

@ -9,7 +9,7 @@
// $LastChangedBy$ // $LastChangedBy$
// //
// LICENSE // LICENSE
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or // the Free Software Foundation; either version 2 of the License, or
@ -49,42 +49,42 @@ import net.yacy.kelondro.logging.Log;
public class Digest { public class Digest {
private final static int digestThreads = Runtime.getRuntime().availableProcessors() * 4; private final static int digestThreads = Runtime.getRuntime().availableProcessors() * 4;
public static BlockingQueue<MessageDigest> digestPool = new ArrayBlockingQueue<MessageDigest>(digestThreads); public static BlockingQueue<MessageDigest> digestPool = new ArrayBlockingQueue<MessageDigest>(digestThreads);
static { static {
for (int i = 0; i < digestThreads; i++) for (int i = 0; i < digestThreads; i++)
try { try {
MessageDigest digest = MessageDigest.getInstance("MD5"); final MessageDigest digest = MessageDigest.getInstance("MD5");
digest.reset(); digest.reset();
digestPool.add(digest); digestPool.add(digest);
} catch (NoSuchAlgorithmException e) { } catch (final NoSuchAlgorithmException e) {
Log.logException(e); Log.logException(e);
} }
} }
public static String encodeHex(final long in, final int length) { public static String encodeHex(final long in, final int length) {
String s = Long.toHexString(in); String s = Long.toHexString(in);
while (s.length() < length) s = "0" + s; while (s.length() < length) s = "0" + s;
return s; return s;
} }
public static String encodeOctal(final byte[] in) { public static String encodeOctal(final byte[] in) {
if (in == null) return ""; if (in == null) return "";
final StringBuilder result = new StringBuilder(in.length * 8 / 3); final StringBuilder result = new StringBuilder(in.length * 8 / 3);
for (int i = 0; i < in.length; i++) { for (final byte element : in) {
if ((0Xff & in[i]) < 8) result.append('0'); if ((0Xff & element) < 8) result.append('0');
result.append(Integer.toOctalString(0Xff & in[i])); result.append(Integer.toOctalString(0Xff & element));
} }
return result.toString(); return result.toString();
} }
public static String encodeHex(final byte[] in) { public static String encodeHex(final byte[] in) {
if (in == null) return ""; if (in == null) return "";
final StringBuilder result = new StringBuilder(in.length * 2); final StringBuilder result = new StringBuilder(in.length * 2);
for (int i = 0; i < in.length; i++) { for (final byte element : in) {
if ((0Xff & in[i]) < 16) result.append('0'); if ((0Xff & element) < 16) result.append('0');
result.append(Integer.toHexString(0Xff & in[i])); result.append(Integer.toHexString(0Xff & element));
} }
return result.toString(); return result.toString();
} }
@ -96,7 +96,7 @@ public class Digest {
} }
return result; return result;
} }
public static String encodeMD5Hex(final String key) { public static String encodeMD5Hex(final String key) {
// generate a hex representation from the md5 of a string // generate a hex representation from the md5 of a string
return encodeHex(encodeMD5Raw(key)); return encodeHex(encodeMD5Raw(key));
@ -122,16 +122,16 @@ public class Digest {
digest = MessageDigest.getInstance("MD5"); digest = MessageDigest.getInstance("MD5");
digest.reset(); digest.reset();
fromPool = false; fromPool = false;
} catch (NoSuchAlgorithmException e) { } catch (final NoSuchAlgorithmException e) {
} }
} }
if (digest == null) try { if (digest == null) try {
digest = digestPool.take(); digest = digestPool.take();
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logWarning("Digest", "using generic instead of pooled digest"); Log.logWarning("Digest", "using generic instead of pooled digest");
try { try {
digest = MessageDigest.getInstance("MD5"); digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e1) { } catch (final NoSuchAlgorithmException e1) {
Log.logException(e1); Log.logException(e1);
} }
digest.reset(); digest.reset();
@ -140,17 +140,22 @@ public class Digest {
byte[] keyBytes; byte[] keyBytes;
keyBytes = UTF8.getBytes(key); keyBytes = UTF8.getBytes(key);
digest.update(keyBytes); digest.update(keyBytes);
byte[] result = digest.digest(); final byte[] result = digest.digest();
digest.reset(); digest.reset();
if (fromPool) if (fromPool) {
try { returntopool: while (true) {
digestPool.put(digest); try {
} catch (InterruptedException e) { digestPool.put(digest);
Log.logException(e); break returntopool;
} } catch (final InterruptedException e) {
// we MUST return that digest to the pool
continue returntopool;
}
}
}
return result; return result;
} }
public static byte[] encodeMD5Raw(final File file) throws IOException { public static byte[] encodeMD5Raw(final File file) throws IOException {
FileInputStream in; FileInputStream in;
try { try {
@ -160,14 +165,14 @@ public class Digest {
Log.logException(e); Log.logException(e);
return null; return null;
} }
// create a concurrent thread that consumes data as it is read // create a concurrent thread that consumes data as it is read
// and computes the md5 while doing IO // and computes the md5 while doing IO
md5FilechunkConsumer md5consumer = new md5FilechunkConsumer(1024 * 64, 8); final md5FilechunkConsumer md5consumer = new md5FilechunkConsumer(1024 * 64, 8);
ExecutorService service = Executors.newSingleThreadExecutor(); final ExecutorService service = Executors.newSingleThreadExecutor();
Future<MessageDigest> md5result = service.submit(md5consumer); final Future<MessageDigest> md5result = service.submit(md5consumer);
service.shutdown(); service.shutdown();
filechunk c; filechunk c;
try { try {
while (true) { while (true) {
@ -185,61 +190,61 @@ public class Digest {
} }
// put in poison into queue to tell the consumer to stop // put in poison into queue to tell the consumer to stop
md5consumer.consume(md5FilechunkConsumer.poison); md5consumer.consume(md5FilechunkConsumer.poison);
// return the md5 digest from future task // return the md5 digest from future task
try { try {
return md5result.get().digest(); return md5result.get().digest();
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
throw new IOException(e); throw new IOException(e);
} catch (ExecutionException e) { } catch (final ExecutionException e) {
Log.logException(e); Log.logException(e);
throw new IOException(e); throw new IOException(e);
} }
} }
private static class filechunk { private static class filechunk {
public byte[] b; public byte[] b;
public int n; public int n;
public filechunk(int len) { public filechunk(final int len) {
b = new byte[len]; this.b = new byte[len];
n = 0; this.n = 0;
} }
} }
private static class md5FilechunkConsumer implements Callable<MessageDigest> { private static class md5FilechunkConsumer implements Callable<MessageDigest> {
private final BlockingQueue<filechunk> empty; private final BlockingQueue<filechunk> empty;
private final BlockingQueue<filechunk> filed; private final BlockingQueue<filechunk> filed;
protected static filechunk poison = new filechunk(0); protected static filechunk poison = new filechunk(0);
private MessageDigest digest; private MessageDigest digest;
public md5FilechunkConsumer(int bufferSize, int bufferCount) { public md5FilechunkConsumer(final int bufferSize, final int bufferCount) {
empty = new ArrayBlockingQueue<filechunk>(bufferCount); this.empty = new ArrayBlockingQueue<filechunk>(bufferCount);
filed = new LinkedBlockingQueue<filechunk>(); this.filed = new LinkedBlockingQueue<filechunk>();
// fill the empty queue // fill the empty queue
for (int i = 0; i < bufferCount; i++) empty.add(new filechunk(bufferSize)); for (int i = 0; i < bufferCount; i++) this.empty.add(new filechunk(bufferSize));
// init digest // init digest
try { try {
digest = MessageDigest.getInstance("MD5"); this.digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) { } catch (final NoSuchAlgorithmException e) {
System.out.println("Internal Error at md5:" + e.getMessage()); System.out.println("Internal Error at md5:" + e.getMessage());
} }
digest.reset(); this.digest.reset();
} }
public void consume(filechunk c) { public void consume(final filechunk c) {
try { try {
filed.put(c); this.filed.put(c);
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
} }
public filechunk nextFree() throws IOException { public filechunk nextFree() throws IOException {
try { try {
return empty.take(); return this.empty.take();
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
throw new IOException(e); throw new IOException(e);
} }
@ -249,41 +254,41 @@ public class Digest {
try { try {
filechunk c; filechunk c;
while(true) { while(true) {
c = filed.take(); c = this.filed.take();
if (c == poison) break; if (c == poison) break;
digest.update(c.b, 0, c.n); this.digest.update(c.b, 0, c.n);
empty.put(c); this.empty.put(c);
} }
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
return digest; return this.digest;
} }
} }
public static String fastFingerprintHex(final File file, boolean includeDate) { public static String fastFingerprintHex(final File file, final boolean includeDate) {
try { try {
return encodeHex(fastFingerprintRaw(file, includeDate)); return encodeHex(fastFingerprintRaw(file, includeDate));
} catch (IOException e) { } catch (final IOException e) {
return null; return null;
} }
} }
public static String fastFingerprintB64(final File file, boolean includeDate) { public static String fastFingerprintB64(final File file, final boolean includeDate) {
try { try {
byte[] b = fastFingerprintRaw(file, includeDate); final byte[] b = fastFingerprintRaw(file, includeDate);
assert b != null : "file = " + file.toString(); assert b != null : "file = " + file.toString();
if (b == null || b.length == 0) return null; if (b == null || b.length == 0) return null;
assert b.length != 0 : "file = " + file.toString(); assert b.length != 0 : "file = " + file.toString();
return Base64Order.enhancedCoder.encode(b); return Base64Order.enhancedCoder.encode(b);
} catch (IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
return null; return null;
} }
} }
/** /**
* the fast fingerprint computes a md5-like hash from a given file, * the fast fingerprint computes a md5-like hash from a given file,
* which is different from a md5 because it does not read the complete file * which is different from a md5 because it does not read the complete file
@ -298,21 +303,21 @@ public class Digest {
* array[32k + 8 .. 32k + 15] = lastModified of file as long * array[32k + 8 .. 32k + 15] = lastModified of file as long
* @param file * @param file
* @return fingerprint in md5 raw format * @return fingerprint in md5 raw format
* @throws IOException * @throws IOException
*/ */
public static byte[] fastFingerprintRaw(final File file, boolean includeDate) throws IOException { public static byte[] fastFingerprintRaw(final File file, final boolean includeDate) throws IOException {
final int mb = 16 * 1024; final int mb = 16 * 1024;
final long fl = file.length(); final long fl = file.length();
if (fl <= 2 * mb) return encodeMD5Raw(file); if (fl <= 2 * mb) return encodeMD5Raw(file);
MessageDigest digest; MessageDigest digest;
try { try {
digest = MessageDigest.getInstance("MD5"); digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) { } catch (final NoSuchAlgorithmException e) {
Log.logException(e); Log.logException(e);
return null; return null;
} }
RandomAccessFile raf = new RandomAccessFile(file, "r"); final RandomAccessFile raf = new RandomAccessFile(file, "r");
byte[] a = new byte[mb]; final byte[] a = new byte[mb];
try { try {
raf.seek(0); raf.seek(0);
raf.readFully(a, 0, mb); raf.readFully(a, 0, mb);
@ -324,7 +329,7 @@ public class Digest {
if (includeDate) digest.update(NaturalOrder.encodeLong(file.lastModified(), 8), 0, 8); if (includeDate) digest.update(NaturalOrder.encodeLong(file.lastModified(), 8), 0, 8);
} finally { } finally {
raf.close(); raf.close();
try {raf.getChannel().close();} catch (IOException e) {} try {raf.getChannel().close();} catch (final IOException e) {}
} }
return digest.digest(); return digest.digest();
} }
@ -347,39 +352,39 @@ public class Digest {
} }
return null; return null;
} }
public static void main(final String[] s) { public static void main(final String[] s) {
// usage example: // usage example:
// java -classpath classes de.anomic.kelondro.kelondroDigest -md5 DATA/HTCACHE/mediawiki/wikipedia.de.xml // java -classpath classes de.anomic.kelondro.kelondroDigest -md5 DATA/HTCACHE/mediawiki/wikipedia.de.xml
// java -classpath classes de.anomic.kelondro.kelondroDigest -md5 readme.txt // java -classpath classes de.anomic.kelondro.kelondroDigest -md5 readme.txt
// java -classpath classes de.anomic.kelondro.kelondroDigest -fb64 DATA/HTCACHE/responseHeader.heap // java -classpath classes de.anomic.kelondro.kelondroDigest -fb64 DATA/HTCACHE/responseHeader.heap
// compare with: // compare with:
// md5 readme.txt // md5 readme.txt
long start = System.currentTimeMillis(); final long start = System.currentTimeMillis();
if (s.length == 0) { if (s.length == 0) {
System.out.println("usage: -[md5|fingerprint] <arg>"); System.out.println("usage: -[md5|fingerprint] <arg>");
System.exit(0); System.exit(0);
} }
if (s[0].equals("-md5")) { if (s[0].equals("-md5")) {
// generate a md5 from a given file // generate a md5 from a given file
File f = new File(s[1]); final File f = new File(s[1]);
try { try {
System.out.println("MD5 (" + f.getName() + ") = " + encodeMD5Hex(f)); System.out.println("MD5 (" + f.getName() + ") = " + encodeMD5Hex(f));
} catch (IOException e) { } catch (final IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
if (s[0].equals("-fhex")) { if (s[0].equals("-fhex")) {
// generate a fast fingerprint from a given file // generate a fast fingerprint from a given file
File f = new File(s[1]); final File f = new File(s[1]);
System.out.println("fingerprint hex (" + f.getName() + ") = " + fastFingerprintHex(f, true)); System.out.println("fingerprint hex (" + f.getName() + ") = " + fastFingerprintHex(f, true));
} }
if (s[0].equals("-fb64")) { if (s[0].equals("-fb64")) {
// generate a fast fingerprint from a given file // generate a fast fingerprint from a given file
File f = new File(s[1]); final File f = new File(s[1]);
System.out.println("fingerprint b64 (" + f.getName() + ") = " + fastFingerprintB64(f, true)); System.out.println("fingerprint b64 (" + f.getName() + ") = " + fastFingerprintB64(f, true));
} }

@ -7,7 +7,7 @@
// $LastChangedBy$ // $LastChangedBy$
// //
// LICENSE // LICENSE
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or // the Free Software Foundation; either version 2 of the License, or
@ -46,14 +46,14 @@ public abstract class AbstractThread extends Thread implements WorkflowThread {
private String shortDescr = "", longDescr = ""; private String shortDescr = "", longDescr = "";
private String monitorURL = null; private String monitorURL = null;
private long threadBlockTimestamp = System.currentTimeMillis(); private long threadBlockTimestamp = System.currentTimeMillis();
protected final void announceThreadBlockApply() { protected final void announceThreadBlockApply() {
// shall only be used, if a thread blocks for an important reason // shall only be used, if a thread blocks for an important reason
// like a socket connect and must renew the timestamp to correct // like a socket connect and must renew the timestamp to correct
// statistics // statistics
this.threadBlockTimestamp = System.currentTimeMillis(); this.threadBlockTimestamp = System.currentTimeMillis();
} }
protected final void announceThreadBlockRelease() { protected final void announceThreadBlockRelease() {
// shall only be used, if a thread blocks for an important reason // shall only be used, if a thread blocks for an important reason
// like a socket connect and must renew the timestamp to correct // like a socket connect and must renew the timestamp to correct
@ -62,78 +62,77 @@ public abstract class AbstractThread extends Thread implements WorkflowThread {
this.blockPause += thisBlockTime; this.blockPause += thisBlockTime;
this.busytime -= thisBlockTime; this.busytime -= thisBlockTime;
} }
protected final void announceMoreExecTime(final long millis) { protected final void announceMoreExecTime(final long millis) {
this.busytime += millis; this.busytime += millis;
} }
public final void setDescription(final String shortText, final String longText, final String monitorURL) { public final void setDescription(final String shortText, final String longText, final String monitorURL) {
// sets a visible description string // sets a visible description string
this.shortDescr = shortText; this.shortDescr = shortText;
this.longDescr = longText; this.longDescr = longText;
this.monitorURL = monitorURL; this.monitorURL = monitorURL;
} }
public final String getShortDescription() { public final String getShortDescription() {
return this.shortDescr; return this.shortDescr;
} }
public final String getLongDescription() { public final String getLongDescription() {
return this.longDescr; return this.longDescr;
} }
public String getMonitorURL() { public String getMonitorURL() {
return this.monitorURL; return this.monitorURL;
} }
public final long getBlockTime() { public final long getBlockTime() {
// returns the total time that this thread has been blocked so far // returns the total time that this thread has been blocked so far
return this.blockPause; return this.blockPause;
} }
public final long getExecTime() { public final long getExecTime() {
// returns the total time that this thread has worked so far // returns the total time that this thread has worked so far
return this.busytime; return this.busytime;
} }
public long getMemoryUse() { public long getMemoryUse() {
// returns the sum of all memory usage differences before and after one busy job // returns the sum of all memory usage differences before and after one busy job
return memuse; return this.memuse;
} }
public boolean shutdownInProgress() { public boolean shutdownInProgress() {
return !this.running || Thread.currentThread().isInterrupted(); return !this.running || Thread.currentThread().isInterrupted();
} }
public void terminate(final boolean waitFor) { public void terminate(final boolean waitFor) {
// after calling this method, the thread shall terminate // after calling this method, the thread shall terminate
this.running = false; this.running = false;
// interrupting the thread // interrupting the thread
this.interrupt(); interrupt();
// wait for termination // wait for termination
if (waitFor) { if (waitFor) {
// Busy waiting removed: while (this.isAlive()) try {this.sleep(100);} catch (InterruptedException e) {break;} try { this.join(3000); } catch (final InterruptedException e) { return; }
try { this.join(3000); } catch (final InterruptedException e) {return;}
} }
// If we reach this point, the process is closed // If we reach this point, the process is closed
} }
private final void logError(final String text,final Throwable thrown) { private final void logError(final String text,final Throwable thrown) {
if (log == null) Log.logSevere("THREAD-CONTROL", text, thrown); if (log == null) Log.logSevere("THREAD-CONTROL", text, thrown);
else log.logSevere(text,thrown); else log.logSevere(text,thrown);
} }
public void jobExceptionHandler(final Exception e) { public void jobExceptionHandler(final Exception e) {
if (!(e instanceof ClosedByInterruptException)) { if (!(e instanceof ClosedByInterruptException)) {
// default handler for job exceptions. shall be overridden for own handler // default handler for job exceptions. shall be overridden for own handler
logError("thread '" + this.getName() + "': " + e.toString(),e); logError("thread '" + getName() + "': " + e.toString(),e);
} }
} }
public void open() {}; // dummy definition; should be overriden public void open() {}; // dummy definition; should be overriden
public void close() {}; // dummy definition; should be overriden public void close() {}; // dummy definition; should be overriden
} }

Loading…
Cancel
Save