- enhancements for search speed

- bug fixes in many classes including basic data structure classes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7217 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent aa6075402a
commit aacf572a26

@ -519,7 +519,7 @@ public class IndexControlRWIs_p {
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking());
final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE, 1);
final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE);
ranked.run();
if (ranked.filteredCount() == 0) {

@ -28,7 +28,7 @@
<div id="yacylivesearch">
<form id="ysearch" method="get" accept-charset="UTF-8" action="yacysearch.html"><p>
<input name="query" id="yquery" class="fancy" type="text" size="20" maxlength="80" value=""/>
<input type="hidden" name="verify" value="true" />
<input type="hidden" name="verify" value="ifexist" />
<input type="hidden" name="maximumRecords" value="20" />
<input type="hidden" name="resource" value="local" />
<input type="hidden" name="urlmaskfilter" value=".*" />

@ -39,7 +39,7 @@ import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.cora.storage.WeakPriorityBlockingQueue;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow;
@ -195,7 +195,7 @@ public final class search {
int joincount = 0;
QueryParams theQuery = null;
SearchEvent theSearch = null;
ArrayList<ReverseElement<ResultEntry>> accu = null;
ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null;
if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts
Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
@ -365,7 +365,7 @@ public final class search {
final long timer = System.currentTimeMillis();
final StringBuilder links = new StringBuilder(6000);
String resource = null;
ReverseElement<ResultEntry> entry;
WeakPriorityBlockingQueue.Element<ResultEntry> entry;
for (int i = 0; i < accu.size(); i++) {
entry = accu.get(i);
resource = entry.getElement().resource();

@ -100,7 +100,7 @@ public class yacysearchitem {
// text search
// generate result object
final ResultEntry result = theSearch.oneResult(item);
final ResultEntry result = theSearch.oneResult(item, theQuery.isLocal() ? 1000 : 5000);
if (result == null) return prop; // no content
@ -136,7 +136,7 @@ public class yacysearchitem {
//prop.put("content_ybr", RankingProcess.ybr(result.hash()));
prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
prop.putHTML("content_sizename", sizename(result.filesize()));
prop.putHTML("content_host", result.url().getHost());
prop.putHTML("content_host", result.url().getHost() == null ? "" : result.url().getHost());
prop.putHTML("content_file", result.url().getFile());
prop.putHTML("content_path", result.url().getPath());
prop.put("content_nl", (item == 0) ? 0 : 1);
@ -203,7 +203,7 @@ public class yacysearchitem {
// any other media content
// generate result object
final ResultEntry result = theSearch.oneResult(item);
final ResultEntry result = theSearch.oneResult(item, 500);
if (result == null) return prop; // no content
prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
@ -234,7 +234,7 @@ public class yacysearchitem {
final int p = s.lastIndexOf('.');
if (p < 0) return s.substring(0, length - 3) + "...";
assert p >= 0;
assert length - (s.length() - p) - 3 >= 0;
assert length - (s.length() - p) - 3 >= 0: "length = " + length + ", s.length() = " + s.length() + ", p = " + p;
return s.substring(0, length - (s.length() - p) - 3) + "..." + s.substring(p); // TODO check oob
}

@ -303,7 +303,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
NOCACHE(0), // never use the cache, all content from fresh internet source
IFFRESH(1), // use the cache if the cache exists and is fresh using the proxy-fresh rules
IFEXIST(2), // use the cache if the cache exist. Do no check freshness. Otherwise use online source.
CACHEONLY(3); // never go online, use all content from cache. If no cache exist, treat content as unavailable
CACHEONLY(3); // never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available
public int code;
private CacheStrategy(int code) {
this.code = code;
@ -320,6 +320,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
if (name.equals("iffresh")) return IFFRESH;
if (name.equals("ifexist")) return IFEXIST;
if (name.equals("cacheonly")) return CACHEONLY;
if (name.equals("true")) return IFFRESH;
if (name.equals("false")) return CACHEONLY;
return null;
}
public String toName() {

@ -109,8 +109,6 @@ import de.anomic.yacy.graphics.EncodedImage;
public final class HTTPDFileHandler {
private static final boolean safeServletsMode = false; // if true then all servlets are called synchronized
// create a class loader
private static final serverClassLoader provider = new serverClassLoader(/*this.getClass().getClassLoader()*/);
private static serverSwitch switchboard = null;
@ -1177,15 +1175,8 @@ public final class HTTPDFileHandler {
return m;
}
public static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
// debug functions: for special servlets call them without reflection to get better stack trace results
Object result;
if (safeServletsMode) synchronized (switchboard) {
result = rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});
} else {
result = rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});
}
return result;
private static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
return rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});
}
/**

@ -191,7 +191,7 @@ public class DocumentIndex extends Segment {
// make a query and start a search
QueryParams query = new QueryParams(querystring, count, null, this, textRankingDefault);
ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation, 1);
RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation);
rankedCache.start();
// search is running; retrieve results

@ -65,7 +65,7 @@ public final class RankingProcess extends Thread {
public static BinSearch[] ybrTables = null; // block-rank tables
private static final int maxYBR = 3; // the lower this value, the faster the search
private static boolean useYBR = true;
private static final int maxDoubleDomAll = 100, maxDoubleDomSpecial = 10000;
private static final int maxDoubleDomAll = 1000, maxDoubleDomSpecial = 10000;
private final QueryParams query;
private final TreeSet<byte[]> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
@ -76,9 +76,9 @@ public final class RankingProcess extends Thread {
private int remote_resourceSize, remote_indexCount, remote_peerCount;
private int local_resourceSize, local_indexCount;
private final WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> stack;
private final WeakPriorityBlockingQueue<WordReferenceVars> stack;
private int feeders;
private final ConcurrentHashMap<String, WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>> doubleDomCache; // key = domhash (6 bytes); value = like stack
private final ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
//private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final Navigator ref; // reference score computation for the commonSense heuristic
@ -86,14 +86,15 @@ public final class RankingProcess extends Thread {
private final Navigator authorNavigator;
private final Navigator namespaceNavigator;
private final ReferenceOrder order;
private final long startTime;
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) {
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchInclusion = null;
this.stack = new WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>(maxentries);
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>>();
this.stack = new WeakPriorityBlockingQueue<WordReferenceVars>(maxentries);
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
this.query = query;
this.order = order;
this.remote_peerCount = 0;
@ -111,8 +112,8 @@ public final class RankingProcess extends Thread {
this.authorNavigator = new Navigator();
this.namespaceNavigator = new Navigator();
this.ref = new Navigator();
this.feeders = concurrency;
assert this.feeders >= 1;
this.feeders = 1;
this.startTime = System.currentTimeMillis();
}
public QueryParams getQuery() {
@ -146,8 +147,9 @@ public final class RankingProcess extends Thread {
add(index, true, "local index: " + this.query.getSegment().getLocation(), -1);
} catch (final Exception e) {
Log.logException(e);
} finally {
oneFeederTerminated();
}
oneFeederTerminated();
}
public void add(final ReferenceContainer<WordReference> index, final boolean local, String resourceName, final int fullResource) {
@ -226,7 +228,8 @@ public final class RankingProcess extends Thread {
// finally make a double-check and insert result to stack
if (urlhashes.add(iEntry.metadataHash())) {
stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
//System.out.println("stack.put: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
// increase counter for statistics
if (local) this.local_indexCount++; else this.remote_indexCount++;
}
@ -250,8 +253,9 @@ public final class RankingProcess extends Thread {
this.feeders += countMoreFeeders;
}
private boolean feedingIsFinished() {
return this.feeders == 0;
public boolean feedingIsFinished() {
//System.out.println("feedingIsFinished: this.feeders == " + this.feeders);
return System.currentTimeMillis() - this.startTime > 50 && this.feeders == 0;
}
private boolean testFlags(final WordReference ientry) {
@ -277,23 +281,37 @@ public final class RankingProcess extends Thread {
return localSearchInclusion;
}
private ReverseElement<WordReferenceVars> takeRWI(final boolean skipDoubleDom, long timeout) {
private WeakPriorityBlockingQueue.Element<WordReferenceVars> takeRWI(final boolean skipDoubleDom, long waitingtime) {
// returns from the current RWI list the best entry and removes this entry from the list
WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> m;
ReverseElement<WordReferenceVars> rwi;
WeakPriorityBlockingQueue<WordReferenceVars> m;
WeakPriorityBlockingQueue.Element<WordReferenceVars> rwi = null;
try {
//System.out.println("feeders = " + this.feeders);
while ((rwi = stack.poll((this.feedingIsFinished()) ? 0 : timeout)) != null) {
if (!skipDoubleDom) return rwi;
//System.out.println("stack.poll: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
int loops = 0; // a loop counter to terminate the reading if all the results are from the same domain
long timeout = System.currentTimeMillis() + waitingtime;
while (this.query.itemsPerPage < 1 || loops++ < this.query.itemsPerPage) {
if (waitingtime <= 0) {
rwi = stack.poll();
} else while (System.currentTimeMillis() < timeout) {
rwi = stack.poll(50);
if (rwi != null) break;
if (feedingIsFinished() && stack.sizeQueue() == 0) break;
}
if (rwi == null) break;
if (!skipDoubleDom) {
//System.out.println("!skipDoubleDom");
return rwi;
}
// check doubledom
final String domhash = new String(rwi.getElement().metadataHash(), 6, 6);
m = this.doubleDomCache.get(domhash);
if (m == null) {
// first appearance of dom
m = new WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll);
m = new WeakPriorityBlockingQueue<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll);
this.doubleDomCache.put(domhash, m);
//System.out.println("m == null");
return rwi;
}
@ -302,13 +320,17 @@ public final class RankingProcess extends Thread {
}
} catch (InterruptedException e1) {
}
if (this.doubleDomCache.size() == 0) {
//System.out.println("this.doubleDomCache.size() == 0");
return null;
}
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
// find best entry from all caches
ReverseElement<WordReferenceVars> bestEntry = null;
ReverseElement<WordReferenceVars> o;
WeakPriorityBlockingQueue.Element<WordReferenceVars> bestEntry = null;
WeakPriorityBlockingQueue.Element<WordReferenceVars> o;
synchronized (this.doubleDomCache) {
final Iterator<WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>> i = this.doubleDomCache.values().iterator();
final Iterator<WeakPriorityBlockingQueue<WordReferenceVars>> i = this.doubleDomCache.values().iterator();
while (i.hasNext()) {
try {
m = i.next();
@ -316,25 +338,39 @@ public final class RankingProcess extends Thread {
Log.logException(e);
break; // not the best solution...
}
if (m == null) continue;
if (m.isEmpty()) continue;
if (m == null) {
//System.out.println("m == null");
continue;
}
if (m.isEmpty()) {
//System.out.println("m.isEmpty()");
continue;
}
if (bestEntry == null) {
bestEntry = m.peek();
//System.out.println("bestEntry = m.peek() = " + bestEntry);
continue;
}
o = m.peek();
if (o == null) continue;
if (o == null) {
//System.out.println("o == null");
continue;
}
if (o.getWeight() < bestEntry.getWeight()) {
bestEntry = o;
}
}
}
if (bestEntry == null) return null;
if (bestEntry == null) {
//System.out.println("bestEntry == null");
return null;
}
// finally remove the best entry from the doubledom cache
m = this.doubleDomCache.get(new String(bestEntry.getElement().metadataHash()).substring(6));
o = m.poll();
//assert o == null || o.element.metadataHash().equals(bestEntry.element.metadataHash()) : "bestEntry.element.metadataHash() = " + bestEntry.element.metadataHash() + ", o.element.metadataHash() = " + o.element.metadataHash();
//System.out.println("return bestEntry");
return bestEntry;
}
@ -344,22 +380,19 @@ public final class RankingProcess extends Thread {
* limit is reached then null is returned. The caller may distinguish the timeout case
* from the case where there will be no more also in the future by calling this.feedingIsFinished()
* @param skipDoubleDom should be true if it is wanted that double domain entries are skipped
* @param timeout the time this method may take for a result computation
* @param waitingtime the time this method may take for a result computation
* @return a metadata entry for a url
*/
public URIMetadataRow takeURL(final boolean skipDoubleDom, final long timeout) {
public URIMetadataRow takeURL(final boolean skipDoubleDom, final long waitingtime) {
// returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + Math.max(10, timeout);
long timeout = System.currentTimeMillis() + Math.max(10, waitingtime);
int p = -1;
byte[] urlhash;
long timeleft;
while ((timeleft = timeLimit - System.currentTimeMillis()) > 0) {
final ReverseElement<WordReferenceVars> obrwi = takeRWI(skipDoubleDom, timeleft);
if (obrwi == null) {
if (this.feedingIsFinished()) return null;
try {Thread.sleep(50);} catch (final InterruptedException e1) {}
continue;
}
while ((timeleft = timeout - System.currentTimeMillis()) > 0) {
//System.out.println("timeleft = " + timeleft);
final WeakPriorityBlockingQueue.Element<WordReferenceVars> obrwi = takeRWI(skipDoubleDom, timeleft);
if (obrwi == null) return null; // all time was already wasted in takeRWI to get another element
urlhash = obrwi.getElement().metadataHash();
final URIMetadataRow page = this.query.getSegment().urlMetadata().load(urlhash, obrwi.getElement(), obrwi.getWeight());
if (page == null) {
@ -463,9 +496,17 @@ public final class RankingProcess extends Thread {
return null;
}
protected int size() {
public int sizeQueue() {
int c = stack.sizeQueue();
for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
c += s.sizeQueue();
}
return c;
}
public int sizeAvailable() {
int c = stack.sizeAvailable();
for (WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> s: this.doubleDomCache.values()) {
for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
c += s.sizeAvailable();
}
return c;
@ -473,7 +514,7 @@ public final class RankingProcess extends Thread {
public boolean isEmpty() {
if (!stack.isEmpty()) return false;
for (WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> s: this.doubleDomCache.values()) {
for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
if (!s.isEmpty()) return false;
}
return true;

@ -51,15 +51,15 @@ import de.anomic.yacy.graphics.ProfilingGraph;
public class ResultFetcher {
// input values
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
final RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
QueryParams query;
private final yacySeedDB peers;
// result values
protected final LoaderDispatcher loader;
protected Worker[] workerThreads;
protected final WeakPriorityBlockingQueue<ReverseElement<ResultEntry>> result;
protected final WeakPriorityBlockingQueue<ReverseElement<MediaSnippet>> images; // container to sort images by size
protected final WeakPriorityBlockingQueue<ResultEntry> result;
protected final WeakPriorityBlockingQueue<MediaSnippet> images; // container to sort images by size
protected final HandleSet failedURLs; // a set of urlhashes that could not been verified during search
protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets
long urlRetrievalAllTime;
@ -74,15 +74,15 @@ public class ResultFetcher {
final int taketimeout) {
this.loader = loader;
this.rankedCache = rankedCache;
this.rankingProcess = rankedCache;
this.query = query;
this.peers = peers;
this.taketimeout = taketimeout;
this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0;
this.result = new WeakPriorityBlockingQueue<ReverseElement<ResultEntry>>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new WeakPriorityBlockingQueue<ReverseElement<MediaSnippet>>(-1);
this.result = new WeakPriorityBlockingQueue<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new WeakPriorityBlockingQueue<MediaSnippet>(-1);
this.failedURLs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); // a set of url hashes where a worker thread tried to work on, but failed.
// snippets do not need to match with the complete query hashes,
@ -107,19 +107,25 @@ public class ResultFetcher {
public void deployWorker(int deployCount, int neededResults) {
if (anyWorkerAlive()) return;
if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) return;
this.workerThreads = new Worker[/*(query.snippetCacheStrategy.mustBeOffline()) ? 1 : */deployCount];
for (int i = 0; i < workerThreads.length; i++) {
this.workerThreads[i] = new Worker(i, 10000, query.snippetCacheStrategy, neededResults);
this.workerThreads[i].start();
}
synchronized(this.workerThreads) {
for (int i = 0; i < workerThreads.length; i++) {
Worker worker = new Worker(i, 1000, query.snippetCacheStrategy, neededResults);
worker.start();
this.workerThreads[i] = worker;
}
}
}
boolean anyWorkerAlive() {
if (this.workerThreads == null) return false;
for (int i = 0; i < this.workerThreads.length; i++) {
if ((this.workerThreads[i] != null) &&
(this.workerThreads[i].isAlive()) &&
(this.workerThreads[i].busytime() < 3000)) return true;
synchronized(this.workerThreads) {
for (int i = 0; i < this.workerThreads.length; i++) {
if ((this.workerThreads[i] != null) &&
(this.workerThreads[i].isAlive()) &&
(this.workerThreads[i].busytime() < 1000)) return true;
}
}
return false;
}
@ -155,20 +161,32 @@ public class ResultFetcher {
//final int fetchAhead = snippetMode == 0 ? 0 : 10;
boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0;
try {
//System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
int loops = 0;
while (System.currentTimeMillis() < this.timeout) {
if (result.sizeAvailable() > neededResults) break;
this.lastLifeSign = System.currentTimeMillis();
this.lastLifeSign = System.currentTimeMillis();
// check if we have enough
if ((query.contentdom == ContentDomain.IMAGE) && (images.sizeAvailable() >= query.neededResults() + 50)) break;
if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break;
if (result.sizeAvailable() >= this.neededResults) {
//System.out.println("result.sizeAvailable() >= this.neededResults");
break;
}
// check if we can succeed if we try to take another url
if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) {
break;
}
// get next entry
page = rankedCache.takeURL(true, this.timeout - System.currentTimeMillis());
//if (page == null) page = rankedCache.takeURL(false, taketimeout);
if (page == null) break;
page = rankingProcess.takeURL(true, this.timeout - System.currentTimeMillis());
//if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
if (page == null) {
//System.out.println("page == null");
break; // no more available
}
if (failedURLs.has(page.hash())) continue;
loops++;
final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
if (resultEntry == null) continue; // the entry had some problems, cannot be used
@ -176,23 +194,25 @@ public class ResultFetcher {
urlRetrievalAllTime += resultEntry.dbRetrievalTime;
snippetComputationAllTime += resultEntry.snippetComputationTime;
//System.out.println("+++DEBUG-resultWorker+++ fetched " + resultEntry.urlstring());
// place the result to the result vector
// apply post-ranking
long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, rankedCache.getTopics());
//System.out.println("*** resultEntry.hash = " + resultEntry.hash());
long ranking = Long.valueOf(rankingProcess.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, rankingProcess.getTopics());
result.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow
if (nav_topics) rankedCache.addTopics(resultEntry);
//System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url());
if (nav_topics) rankingProcess.addTopics(resultEntry);
}
//System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops);
} catch (final Exception e) {
Log.logException(e);
}
Log.logInfo("SEARCH", "resultWorker thread " + id + " terminated");
}
/**
* calculate the time since the worker has had the latest activity
* @return time in milliseconds lasted since latest activity
*/
public long busytime() {
return System.currentTimeMillis() - this.lastLifeSign;
}
@ -274,9 +294,10 @@ public class ResultFetcher {
Log.logInfo("SEARCH", "sorted out urlhash " + new String(urlhash) + " during search: " + reason);
}
public ResultEntry oneResult(final int item) {
public ResultEntry oneResult(final int item, long timeout) {
// check if we already retrieved this item
// (happens if a search pages is accessed a second time)
long finishTime = System.currentTimeMillis() + timeout;
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "started, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
if (this.result.sizeAvailable() > item) {
// we have the wanted result already in the result array .. return that
@ -289,35 +310,40 @@ public class ResultFetcher {
System.out.println("result.size() = " + this.result.size());
System.out.println("query.neededResults() = " + query.neededResults());
*/
if ((!anyWorkerAlive()) &&
(((query.contentdom == ContentDomain.IMAGE) && (images.sizeAvailable() + 30 < query.neededResults())) ||
(this.result.sizeAvailable() < query.neededResults())) &&
//(event.query.onlineSnippetFetch) &&
(this.rankedCache.size() > this.result.sizeAvailable())
) {
if (this.result.sizeAvailable() <= item) {
// start worker threads to fetch urls and snippets
deployWorker(Math.min(10, query.itemsPerPage), query.neededResults());
//System.out.println("item = " + item);
//System.out.println("anyWorkerAlive() = " + anyWorkerAlive());
//System.out.println("rankingProcess.feedingIsFinished() = " + rankingProcess.feedingIsFinished());
//System.out.println("this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue());
//System.out.println("this.result.sizeAvailable() = " + this.result.sizeAvailable());
//System.out.println("this.result.sizeAvailable() + this.rankingProcess.sizeQueue() = " + (this.result.sizeAvailable() + this.rankingProcess.sizeQueue()));
deployWorker(Math.min(20, query.itemsPerPage), ((item + query.itemsPerPage) / query.itemsPerPage) * query.itemsPerPage);
}
// finally wait until enough results are there produced from the
// snippet fetch process
while ((anyWorkerAlive()) && (result.sizeAvailable() <= item)) {
try {Thread.sleep((item % query.itemsPerPage) * 10L);} catch (final InterruptedException e) {}
WeakPriorityBlockingQueue.Element<ResultEntry> entry = null;
while (System.currentTimeMillis() < finishTime) {
if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break;
try {entry = this.result.element(item, 50);} catch (InterruptedException e) {Log.logException(e);}
if (entry != null) break;
if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break; //
}
// finally, if there is something, return the result
if (this.result.sizeAvailable() <= item) {
if (entry == null) {
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "not found, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
return null;
}
ResultEntry re = this.result.element(item).getElement();
ResultEntry re = entry.getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "retrieved, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false);
return re;
}
private int resultCounter = 0;
public ResultEntry nextResult() {
final ResultEntry re = oneResult(resultCounter);
final ResultEntry re = oneResult(resultCounter, 1000);
resultCounter++;
return re;
}
@ -355,7 +381,7 @@ public class ResultFetcher {
return c;
}
public ArrayList<ReverseElement<ResultEntry>> completeResults(final long waitingtime) {
public ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> completeResults(final long waitingtime) {
final long timeout = System.currentTimeMillis() + waitingtime;
while ((result.sizeAvailable() < query.neededResults()) && (anyWorkerAlive()) && (System.currentTimeMillis() < timeout)) {
try {Thread.sleep(20);} catch (final InterruptedException e) {}

@ -66,8 +66,8 @@ public final class SearchEvent {
private long eventTime;
private QueryParams query;
private final yacySeedDB peers;
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher results;
private RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher resultFetcher;
private final SecondarySearchSuperviser secondarySearchSuperviser;
@ -112,10 +112,10 @@ public final class SearchEvent {
// initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, fetchpeers + 1);
this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
// start a local search concurrently
this.rankedCache.start();
this.rankingProcess.start();
// start global searches
final long timer = System.currentTimeMillis();
@ -133,7 +133,7 @@ public final class SearchEvent {
query.getSegment(),
peers,
crawlResults,
rankedCache,
rankingProcess,
secondarySearchSuperviser,
fetchpeers,
Switchboard.urlBlacklist,
@ -141,7 +141,7 @@ public final class SearchEvent {
query.constraint,
(query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes);
if (this.primarySearchThreads != null) {
if (this.primarySearchThreads.length > fetchpeers) this.rankedCache.moreFeeders(this.primarySearchThreads.length - fetchpeers);
this.rankingProcess.moreFeeders(this.primarySearchThreads.length);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false);
// finished searching
Log.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
@ -151,20 +151,20 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.results = new ResultFetcher(loader, rankedCache, query, peers, 3000);
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, peers, 3000);
} else {
// do a local search
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1);
this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
if (generateAbstracts) {
this.rankedCache.run(); // this is not started concurrently here on purpose!
this.rankingProcess.run(); // this is not started concurrently here on purpose!
// compute index abstracts
final long timer = System.currentTimeMillis();
int maxcount = -1;
long mindhtdistance = Long.MAX_VALUE, l;
byte[] wordhash;
assert this.rankedCache.searchContainerMap() != null;
for (Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankedCache.searchContainerMap().entrySet()) {
assert this.rankingProcess.searchContainerMap() != null;
for (Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankingProcess.searchContainerMap().entrySet()) {
wordhash = entry.getKey();
final ReferenceContainer<WordReference> container = entry.getValue();
assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + new String(container.getTermHash()) + ", wordhash = " + new String(wordhash);
@ -181,13 +181,21 @@ public final class SearchEvent {
IACount.put(wordhash, Integer.valueOf(container.size()));
IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString());
}
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
} else {
this.rankedCache.start(); // start concurrently
this.rankingProcess.start(); // start concurrently
// but give process time to accumulate a certain amount of data
// before a reading process wants to get results from it
for (int i = 0; i < 10; i++) {
if (!this.rankingProcess.isAlive()) break;
try {Thread.sleep(10);} catch (InterruptedException e) {}
}
// this will reduce the maximum waiting time until results are available to 100 milliseconds
// while we always get a good set of ranked data
}
// start worker threads to fetch urls and snippets
this.results = new ResultFetcher(loader, rankedCache, query, peers, 300);
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, peers, 300);
}
// clean up events
@ -217,19 +225,23 @@ public final class SearchEvent {
public void setQuery(QueryParams query) {
this.query = query;
this.results.query = query;
this.resultFetcher.query = query;
}
public void cleanup() {
// stop all threads
if (primarySearchThreads != null) {
for (yacySearch search : this.primarySearchThreads) {
if (search.isAlive()) search.interrupt();
if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt();
}
}
}
if (secondarySearchThreads != null) {
for (yacySearch search : this.secondarySearchThreads) {
if (search.isAlive()) search.interrupt();
if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt();
}
}
}
@ -241,7 +253,7 @@ public final class SearchEvent {
if (this.heuristics != null) this.heuristics.clear();
// execute deletion of failed words
int rw = this.results.failedURLs.size();
int rw = this.resultFetcher.failedURLs.size();
if (rw > 0) {
long start = System.currentTimeMillis();
final HandleSet removeWords = query.queryHashes;
@ -254,7 +266,7 @@ public final class SearchEvent {
final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words
while (j.hasNext()) {
this.query.getSegment().termIndex().remove(j.next(), this.results.failedURLs);
this.query.getSegment().termIndex().remove(j.next(), this.resultFetcher.failedURLs);
}
} catch (IOException e) {
Log.logException(e);
@ -314,25 +326,25 @@ public final class SearchEvent {
}
public RankingProcess getRankingResult() {
return this.rankedCache;
return this.rankingProcess;
}
public ArrayList<Navigator.Item> getNamespaceNavigator(int maxentries) {
return this.rankedCache.getNamespaceNavigator(maxentries);
return this.rankingProcess.getNamespaceNavigator(maxentries);
}
public List<Navigator.Item> getHostNavigator(int maxentries) {
return this.rankedCache.getHostNavigator(maxentries);
return this.rankingProcess.getHostNavigator(maxentries);
}
public List<Navigator.Item> getTopicNavigator(final int maxentries) {
// returns a set of words that are computed as toplist
return this.rankedCache.getTopicNavigator(maxentries);
return this.rankingProcess.getTopicNavigator(maxentries);
}
public List<Navigator.Item> getAuthorNavigator(final int maxentries) {
// returns a list of authors so far seen on result set
return this.rankedCache.getAuthorNavigator(maxentries);
return this.rankingProcess.getAuthorNavigator(maxentries);
}
public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) {
@ -347,7 +359,7 @@ public final class SearchEvent {
}
}
public ResultEntry oneResult(final int item) {
public ResultEntry oneResult(final int item, long timeout) {
if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ||
(query.domType == QueryParams.SEARCHDOM_CLUSTERALL)) {
// this is a search using remote search threads. Also the local
@ -358,7 +370,7 @@ public final class SearchEvent {
try {localSearchThread.join();} catch (InterruptedException e) {}
}
}
return this.results.oneResult(item);
return this.resultFetcher.oneResult(item, timeout);
}
boolean secondarySearchStartet = false;
@ -520,10 +532,10 @@ public final class SearchEvent {
if (words.length() == 0) continue; // ???
assert words.length() >= 12 : "words = " + words;
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words);
rankedCache.moreFeeders(1);
rankingProcess.moreFeeders(1);
checkedPeers.add(peer);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls, query.getSegment(), peers, crawlResults, rankedCache, peer, Switchboard.urlBlacklist,
words, urls, query.getSegment(), peers, crawlResults, rankingProcess, peer, Switchboard.urlBlacklist,
query.ranking, query.constraint, preselectedPeerHashes);
}
@ -532,7 +544,7 @@ public final class SearchEvent {
}
public ResultFetcher result() {
return this.results;
return this.resultFetcher;
}
}

@ -830,7 +830,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
// checks for local/global IP range and local IP
public boolean isLocal() {
return this.protocol.equals("file") || this.protocol.equals("smb") || Domains.isLocal(this.host);
return this.isFile() || this.isSMB() || Domains.isLocal(this.host);
}
// language calculation

@ -480,8 +480,8 @@ public class HTTPClient {
HttpEntityEnclosingRequest hrequest = (HttpEntityEnclosingRequest) httpUriRequest;
HttpEntity entity = hrequest.getEntity();
assert entity != null;
assert !entity.isChunked();
assert entity.getContentLength() >= 0;
//assert !entity.isChunked();
//assert entity.getContentLength() >= 0;
assert !hrequest.expectContinue();
}
httpResponse = httpClient.execute(httpUriRequest, httpContext);

@ -96,8 +96,8 @@ abstract class SimpleARC<K, V> extends AbstractMap<K, V> implements Map<K, V>, I
// move value from A to B; since it was already removed from A, just put it to B
//System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size());
this.levelB.put((K) s, v);
assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically
}
assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically
return v;
}

@ -38,9 +38,9 @@ import java.util.concurrent.TimeUnit;
public class WeakPriorityBlockingQueue<E> {
private final TreeSet<E> queue; // object within the stack, ordered using a TreeSet
private final TreeSet<Element<E>> queue; // object within the stack, ordered using a TreeSet
private final Semaphore enqueued; // semaphore for elements in the stack
private final ArrayList<E> drained; // objects that had been on the stack but had been removed
private final ArrayList<Element<E>> drained; // objects that had been on the stack but had been removed
protected int maxsize;
/**
@ -52,8 +52,8 @@ public class WeakPriorityBlockingQueue<E> {
public WeakPriorityBlockingQueue(final int maxsize) {
// the maxsize is the maximum number of entries in the stack
// if this is set to -1, the size is unlimited
this.queue = new TreeSet<E>();
this.drained = new ArrayList<E>();
this.queue = new TreeSet<Element<E>>();
this.drained = new ArrayList<Element<E>>();
this.enqueued = new Semaphore(0);
this.maxsize = maxsize;
}
@ -110,7 +110,7 @@ public class WeakPriorityBlockingQueue<E> {
* @param weight the weight of the element
* @param remove - the rating of the element that shall be removed in case that the stack has an size overflow
*/
public synchronized void put(final E element) {
public synchronized void put(final Element<E> element) {
// put the element on the stack
if (this.drained.contains(element)) return;
if (this.queue.size() == this.maxsize) {
@ -127,7 +127,7 @@ public class WeakPriorityBlockingQueue<E> {
* return the element with the smallest weight and remove it from the stack
* @return null if no element is on the queue or the head of the queue
*/
public E poll() {
public Element<E> poll() {
boolean a = this.enqueued.tryAcquire();
if (!a) return null;
synchronized (this) {
@ -142,7 +142,7 @@ public class WeakPriorityBlockingQueue<E> {
* @return the head element from the queue
* @throws InterruptedException
*/
public E poll(long timeout) throws InterruptedException {
public Element<E> poll(long timeout) throws InterruptedException {
boolean a = (timeout <= 0) ? this.enqueued.tryAcquire() : this.enqueued.tryAcquire(timeout, TimeUnit.MILLISECONDS);
if (!a) return null;
synchronized (this) {
@ -155,15 +155,15 @@ public class WeakPriorityBlockingQueue<E> {
* @return the head element from the queue
* @throws InterruptedException
*/
public E take() throws InterruptedException {
public Element<E> take() throws InterruptedException {
this.enqueued.acquire();
synchronized (this) {
return takeUnsafe();
}
}
private E takeUnsafe() {
final E element = this.queue.first();
private Element<E> takeUnsafe() {
final Element<E> element = this.queue.first();
assert element != null;
this.queue.remove(element);
this.drained.add(element);
@ -176,7 +176,7 @@ public class WeakPriorityBlockingQueue<E> {
* return the element with the smallest weight, but do not remove it
* @return null if no element is on the queue or the head of the queue
*/
public synchronized E peek() {
public synchronized Element<E> peek() {
if (this.queue.isEmpty()) return null;
return this.queue.first();
}
@ -192,13 +192,15 @@ public class WeakPriorityBlockingQueue<E> {
* @param position inside the drained queue
* @return the element from the recorded position or null if that position is not available
*/
public synchronized E element(final int position) {
public Element<E> element(final int position) {
if (position < this.drained.size()) {
return this.drained.get(position);
}
if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element
while (position >= this.drained.size()) this.poll();
return this.drained.get(position);
synchronized (this) {
if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element
while (position >= this.drained.size()) this.poll();
return this.drained.get(position);
}
}
/**
@ -210,12 +212,11 @@ public class WeakPriorityBlockingQueue<E> {
* @return the element from the recorded position or null if that position is not available within the timeout
* @throws InterruptedException
*/
public synchronized E element(final int position, long time) throws InterruptedException {
public Element<E> element(final int position, long time) throws InterruptedException {
long timeout = System.currentTimeMillis() + time;
if (position < this.drained.size()) {
return this.drained.get(position);
}
if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element
while (position >= this.drained.size()) {
long t = timeout - System.currentTimeMillis();
if (t <= 0) break;
@ -232,7 +233,7 @@ public class WeakPriorityBlockingQueue<E> {
* @param count
* @return a list of elements in the stack
*/
public synchronized ArrayList<E> list(final int count) {
public synchronized ArrayList<Element<E>> list(final int count) {
if (count < 0) {
return list();
}
@ -245,7 +246,7 @@ public class WeakPriorityBlockingQueue<E> {
* return all entries as they would be retrievable with element()
* @return a list of all elements in the stack
*/
public synchronized ArrayList<E> list() {
public synchronized ArrayList<Element<E>> list() {
// shift all elements
while (!this.queue.isEmpty()) this.poll();
return this.drained;
@ -255,13 +256,13 @@ public class WeakPriorityBlockingQueue<E> {
* iterate over all elements available. All elements that are still in the queue are drained to recorded positions
* @return an iterator over all drained positions.
*/
public synchronized Iterator<E> iterator() {
public synchronized Iterator<Element<E>> iterator() {
// shift all elements to the offstack
while (!this.queue.isEmpty()) this.poll();
return this.drained.iterator();
}
protected interface Element<E> {
public interface Element<E> {
public long getWeight();
public E getElement();
public boolean equals(Element<E> o);
@ -269,7 +270,7 @@ public class WeakPriorityBlockingQueue<E> {
public String toString();
}
protected abstract static class AbstractElement<E> {
protected abstract static class AbstractElement<E> implements Element<E> {
public long weight;
public E element;
@ -299,7 +300,7 @@ public class WeakPriorityBlockingQueue<E> {
* natural ordering elements, can be used as container of objects <E> in the priority queue
* the elements with smallest ordering weights are first in the queue when elements are taken
*/
public static class NaturalElement<E> extends AbstractElement<E> implements Comparable<NaturalElement<E>>, Comparator<NaturalElement<E>> {
public static class NaturalElement<E> extends AbstractElement<E> implements Element<E>, Comparable<NaturalElement<E>>, Comparator<NaturalElement<E>> {
public NaturalElement(final E element, final long weight) {
this.element = element;
@ -321,13 +322,14 @@ public class WeakPriorityBlockingQueue<E> {
if (o1h < o2h) return -1;
return 0;
}
}
/**
* reverse ordering elements, can be used as container of objects <E> in the priority queue
* the elements with highest ordering weights are first in the queue when elements are taken
*/
public static class ReverseElement<E> extends AbstractElement<E> implements Comparable<ReverseElement<E>>, Comparator<ReverseElement<E>> {
public static class ReverseElement<E> extends AbstractElement<E> implements Element<E>, Comparable<ReverseElement<E>>, Comparator<ReverseElement<E>> {
public ReverseElement(final E element, final long weight) {
this.element = element;
@ -352,14 +354,26 @@ public class WeakPriorityBlockingQueue<E> {
}
public static void main(String[] args) {
WeakPriorityBlockingQueue<ReverseElement<String>> a = new WeakPriorityBlockingQueue<ReverseElement<String>>(3);
final WeakPriorityBlockingQueue<String> a = new WeakPriorityBlockingQueue<String>(3);
//final Element<String> REVERSE_POISON = new ReverseElement<String>("", Long.MIN_VALUE);
new Thread(){
public void run() {
Element<String> e;
try {
while ((e = a.poll(1000)) != null) System.out.println("> " + e.toString());
} catch (InterruptedException e1) {
e1.printStackTrace();
}
}
}.start();
a.put(new ReverseElement<String>("abc", 1));
//a.poll();
a.put(new ReverseElement<String>("abcx", 2));
a.put(new ReverseElement<String>("6s_7dfZk4xvc", 3));
a.put(new ReverseElement<String>("6s_7dfZk4xvcx", 4));
//a.put((Element<String>) REVERSE_POISON);
//a.poll();
System.out.println("size = " + a.sizeAvailable());
while (a.sizeQueue() > 0) System.out.println("> " + a.poll().toString());
//while (a.sizeQueue() > 0) System.out.println("> " + a.poll().toString());
}
}

@ -98,6 +98,11 @@ public interface Parser {
this.url = url;
}
public Failure(final String message, final MultiProtocolURI url, Throwable e) {
super(message + "; url = " + url.toNormalform(true, false), e);
this.url = url;
}
public MultiProtocolURI getURL() {
return this.url;
}

@ -58,7 +58,7 @@ public class rssParser extends AbstractParser implements Parser {
try {
rssReader = new RSSReader(RSSFeed.DEFAULT_MAXSIZE, source, RSSReader.Type.none);
} catch (IOException e) {
throw new Parser.Failure("Load error:" + e.getMessage(), url);
throw new Parser.Failure("Load error:" + e.getMessage(), url, e);
}
RSSFeed feed = rssReader.getFeed();

@ -273,6 +273,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
// checks for local/global IP range and local IP
public final boolean isLocal() {
if (this.isSMB() || this.isFile()) return true;
if (this.hash == null) {
if (super.isLocal()) return true;
synchronized (this) {

@ -179,6 +179,7 @@ public final class LoaderDispatcher {
private Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
// get the protocol of the next URL
final DigestURI url = request.url();
if (url.isFile() || url.isSMB()) cacheStrategy = CrawlProfile.CacheStrategy.NOCACHE; // load just from the file system
final String protocol = url.getProtocol();
final String host = url.getHost();

Loading…
Cancel
Save