diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 90d454b1b..272edfeef 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -519,7 +519,7 @@ public class IndexControlRWIs_p { public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) { final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking()); final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang); - final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE, 1); + final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE); ranked.run(); if (ranked.filteredCount() == 0) { diff --git a/htroot/env/templates/header.template b/htroot/env/templates/header.template index c3a2042e3..f8be7c6ad 100644 --- a/htroot/env/templates/header.template +++ b/htroot/env/templates/header.template @@ -28,7 +28,7 @@

- + diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 415a68b5a..357d83b86 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -39,7 +39,7 @@ import net.yacy.cora.document.RSSMessage; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; -import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement; +import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceRow; @@ -195,7 +195,7 @@ public final class search { int joincount = 0; QueryParams theQuery = null; SearchEvent theSearch = null; - ArrayList> accu = null; + ArrayList> accu = null; if ((query.length() == 0) && (abstractSet != null)) { // this is _not_ a normal search, only a request for index abstracts Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); @@ -365,7 +365,7 @@ public final class search { final long timer = System.currentTimeMillis(); final StringBuilder links = new StringBuilder(6000); String resource = null; - ReverseElement entry; + WeakPriorityBlockingQueue.Element entry; for (int i = 0; i < accu.size(); i++) { entry = accu.get(i); resource = entry.getElement().resource(); diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 8180913b1..bb5a1b5e5 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -100,7 +100,7 @@ public class yacysearchitem { // text search // generate result object - final ResultEntry result = theSearch.oneResult(item); + final ResultEntry result = theSearch.oneResult(item, theQuery.isLocal() ? 1000 : 5000); if (result == null) return prop; // no content @@ -136,7 +136,7 @@ public class yacysearchitem { //prop.put("content_ybr", RankingProcess.ybr(result.hash())); prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename' prop.putHTML("content_sizename", sizename(result.filesize())); - prop.putHTML("content_host", result.url().getHost()); + prop.putHTML("content_host", result.url().getHost() == null ? "" : result.url().getHost()); prop.putHTML("content_file", result.url().getFile()); prop.putHTML("content_path", result.url().getPath()); prop.put("content_nl", (item == 0) ? 0 : 1); @@ -203,7 +203,7 @@ public class yacysearchitem { // any other media content // generate result object - final ResultEntry result = theSearch.oneResult(item); + final ResultEntry result = theSearch.oneResult(item, 500); if (result == null) return prop; // no content prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content @@ -234,7 +234,7 @@ public class yacysearchitem { final int p = s.lastIndexOf('.'); if (p < 0) return s.substring(0, length - 3) + "..."; assert p >= 0; - assert length - (s.length() - p) - 3 >= 0; + assert length - (s.length() - p) - 3 >= 0: "length = " + length + ", s.length() = " + s.length() + ", p = " + p; return s.substring(0, length - (s.length() - p) - 3) + "..." + s.substring(p); // TODO check oob } diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index 08c028c3b..1943665b2 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -303,7 +303,7 @@ public class CrawlProfile extends ConcurrentHashMap implements M NOCACHE(0), // never use the cache, all content from fresh internet source IFFRESH(1), // use the cache if the cache exists and is fresh using the proxy-fresh rules IFEXIST(2), // use the cache if the cache exist. Do no check freshness. Otherwise use online source. - CACHEONLY(3); // never go online, use all content from cache. If no cache exist, treat content as unavailable + CACHEONLY(3); // never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available public int code; private CacheStrategy(int code) { this.code = code; @@ -320,6 +320,8 @@ public class CrawlProfile extends ConcurrentHashMap implements M if (name.equals("iffresh")) return IFFRESH; if (name.equals("ifexist")) return IFEXIST; if (name.equals("cacheonly")) return CACHEONLY; + if (name.equals("true")) return IFFRESH; + if (name.equals("false")) return CACHEONLY; return null; } public String toName() { diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java index 18cdd5850..f149a1d17 100644 --- a/source/de/anomic/http/server/HTTPDFileHandler.java +++ b/source/de/anomic/http/server/HTTPDFileHandler.java @@ -109,8 +109,6 @@ import de.anomic.yacy.graphics.EncodedImage; public final class HTTPDFileHandler { - private static final boolean safeServletsMode = false; // if true then all servlets are called synchronized - // create a class loader private static final serverClassLoader provider = new serverClassLoader(/*this.getClass().getClassLoader()*/); private static serverSwitch switchboard = null; @@ -1177,15 +1175,8 @@ public final class HTTPDFileHandler { return m; } - public static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException { - // debug functions: for special servlets call them without reflection to get better stack trace results - Object result; - if (safeServletsMode) synchronized (switchboard) { - result = rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard}); - } else { - result = rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard}); - } - return result; + private static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException { + return rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard}); } /** diff --git a/source/de/anomic/search/DocumentIndex.java b/source/de/anomic/search/DocumentIndex.java index 46992bd84..99c5e75a0 100644 --- a/source/de/anomic/search/DocumentIndex.java +++ b/source/de/anomic/search/DocumentIndex.java @@ -191,7 +191,7 @@ public class DocumentIndex extends Segment { // make a query and start a search QueryParams query = new QueryParams(querystring, count, null, this, textRankingDefault); ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang); - RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation, 1); + RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation); rankedCache.start(); // search is running; retrieve results diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 97e77e144..17de349c7 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -65,7 +65,7 @@ public final class RankingProcess extends Thread { public static BinSearch[] ybrTables = null; // block-rank tables private static final int maxYBR = 3; // the lower this value, the faster the search private static boolean useYBR = true; - private static final int maxDoubleDomAll = 100, maxDoubleDomSpecial = 10000; + private static final int maxDoubleDomAll = 1000, maxDoubleDomSpecial = 10000; private final QueryParams query; private final TreeSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) @@ -76,9 +76,9 @@ public final class RankingProcess extends Thread { private int remote_resourceSize, remote_indexCount, remote_peerCount; private int local_resourceSize, local_indexCount; - private final WeakPriorityBlockingQueue> stack; + private final WeakPriorityBlockingQueue stack; private int feeders; - private final ConcurrentHashMap>> doubleDomCache; // key = domhash (6 bytes); value = like stack + private final ConcurrentHashMap> doubleDomCache; // key = domhash (6 bytes); value = like stack //private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process private final Navigator ref; // reference score computation for the commonSense heuristic @@ -86,14 +86,15 @@ public final class RankingProcess extends Thread { private final Navigator authorNavigator; private final Navigator namespaceNavigator; private final ReferenceOrder order; + private final long startTime; - public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) { + public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) { // we collect the urlhashes and construct a list with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime // sortorder: 0 = hash, 1 = url, 2 = ranking this.localSearchInclusion = null; - this.stack = new WeakPriorityBlockingQueue>(maxentries); - this.doubleDomCache = new ConcurrentHashMap>>(); + this.stack = new WeakPriorityBlockingQueue(maxentries); + this.doubleDomCache = new ConcurrentHashMap>(); this.query = query; this.order = order; this.remote_peerCount = 0; @@ -111,8 +112,8 @@ public final class RankingProcess extends Thread { this.authorNavigator = new Navigator(); this.namespaceNavigator = new Navigator(); this.ref = new Navigator(); - this.feeders = concurrency; - assert this.feeders >= 1; + this.feeders = 1; + this.startTime = System.currentTimeMillis(); } public QueryParams getQuery() { @@ -146,8 +147,9 @@ public final class RankingProcess extends Thread { add(index, true, "local index: " + this.query.getSegment().getLocation(), -1); } catch (final Exception e) { Log.logException(e); + } finally { + oneFeederTerminated(); } - oneFeederTerminated(); } public void add(final ReferenceContainer index, final boolean local, String resourceName, final int fullResource) { @@ -226,7 +228,8 @@ public final class RankingProcess extends Thread { // finally make a double-check and insert result to stack if (urlhashes.add(iEntry.metadataHash())) { stack.put(new ReverseElement(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest) - + //System.out.println("stack.put: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue()); + // increase counter for statistics if (local) this.local_indexCount++; else this.remote_indexCount++; } @@ -250,8 +253,9 @@ public final class RankingProcess extends Thread { this.feeders += countMoreFeeders; } - private boolean feedingIsFinished() { - return this.feeders == 0; + public boolean feedingIsFinished() { + //System.out.println("feedingIsFinished: this.feeders == " + this.feeders); + return System.currentTimeMillis() - this.startTime > 50 && this.feeders == 0; } private boolean testFlags(final WordReference ientry) { @@ -277,23 +281,37 @@ public final class RankingProcess extends Thread { return localSearchInclusion; } - private ReverseElement takeRWI(final boolean skipDoubleDom, long timeout) { + private WeakPriorityBlockingQueue.Element takeRWI(final boolean skipDoubleDom, long waitingtime) { // returns from the current RWI list the best entry and removes this entry from the list - WeakPriorityBlockingQueue> m; - ReverseElement rwi; + WeakPriorityBlockingQueue m; + WeakPriorityBlockingQueue.Element rwi = null; try { - //System.out.println("feeders = " + this.feeders); - while ((rwi = stack.poll((this.feedingIsFinished()) ? 0 : timeout)) != null) { - if (!skipDoubleDom) return rwi; + //System.out.println("stack.poll: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue()); + int loops = 0; // a loop counter to terminate the reading if all the results are from the same domain + long timeout = System.currentTimeMillis() + waitingtime; + while (this.query.itemsPerPage < 1 || loops++ < this.query.itemsPerPage) { + if (waitingtime <= 0) { + rwi = stack.poll(); + } else while (System.currentTimeMillis() < timeout) { + rwi = stack.poll(50); + if (rwi != null) break; + if (feedingIsFinished() && stack.sizeQueue() == 0) break; + } + if (rwi == null) break; + if (!skipDoubleDom) { + //System.out.println("!skipDoubleDom"); + return rwi; + } // check doubledom final String domhash = new String(rwi.getElement().metadataHash(), 6, 6); m = this.doubleDomCache.get(domhash); if (m == null) { // first appearance of dom - m = new WeakPriorityBlockingQueue>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); + m = new WeakPriorityBlockingQueue((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); this.doubleDomCache.put(domhash, m); + //System.out.println("m == null"); return rwi; } @@ -302,13 +320,17 @@ public final class RankingProcess extends Thread { } } catch (InterruptedException e1) { } + if (this.doubleDomCache.size() == 0) { + //System.out.println("this.doubleDomCache.size() == 0"); + return null; + } // no more entries in sorted RWI entries. Now take Elements from the doubleDomCache // find best entry from all caches - ReverseElement bestEntry = null; - ReverseElement o; + WeakPriorityBlockingQueue.Element bestEntry = null; + WeakPriorityBlockingQueue.Element o; synchronized (this.doubleDomCache) { - final Iterator>> i = this.doubleDomCache.values().iterator(); + final Iterator> i = this.doubleDomCache.values().iterator(); while (i.hasNext()) { try { m = i.next(); @@ -316,25 +338,39 @@ public final class RankingProcess extends Thread { Log.logException(e); break; // not the best solution... } - if (m == null) continue; - if (m.isEmpty()) continue; + if (m == null) { + //System.out.println("m == null"); + continue; + } + if (m.isEmpty()) { + //System.out.println("m.isEmpty()"); + continue; + } if (bestEntry == null) { bestEntry = m.peek(); + //System.out.println("bestEntry = m.peek() = " + bestEntry); continue; } o = m.peek(); - if (o == null) continue; + if (o == null) { + //System.out.println("o == null"); + continue; + } if (o.getWeight() < bestEntry.getWeight()) { bestEntry = o; } } } - if (bestEntry == null) return null; + if (bestEntry == null) { + //System.out.println("bestEntry == null"); + return null; + } // finally remove the best entry from the doubledom cache m = this.doubleDomCache.get(new String(bestEntry.getElement().metadataHash()).substring(6)); o = m.poll(); //assert o == null || o.element.metadataHash().equals(bestEntry.element.metadataHash()) : "bestEntry.element.metadataHash() = " + bestEntry.element.metadataHash() + ", o.element.metadataHash() = " + o.element.metadataHash(); + //System.out.println("return bestEntry"); return bestEntry; } @@ -344,22 +380,19 @@ public final class RankingProcess extends Thread { * limit is reached then null is returned. The caller may distinguish the timeout case * from the case where there will be no more also in the future by calling this.feedingIsFinished() * @param skipDoubleDom should be true if it is wanted that double domain entries are skipped - * @param timeout the time this method may take for a result computation + * @param waitingtime the time this method may take for a result computation * @return a metadata entry for a url */ - public URIMetadataRow takeURL(final boolean skipDoubleDom, final long timeout) { + public URIMetadataRow takeURL(final boolean skipDoubleDom, final long waitingtime) { // returns from the current RWI list the best URL entry and removes this entry from the list - long timeLimit = System.currentTimeMillis() + Math.max(10, timeout); + long timeout = System.currentTimeMillis() + Math.max(10, waitingtime); int p = -1; byte[] urlhash; long timeleft; - while ((timeleft = timeLimit - System.currentTimeMillis()) > 0) { - final ReverseElement obrwi = takeRWI(skipDoubleDom, timeleft); - if (obrwi == null) { - if (this.feedingIsFinished()) return null; - try {Thread.sleep(50);} catch (final InterruptedException e1) {} - continue; - } + while ((timeleft = timeout - System.currentTimeMillis()) > 0) { + //System.out.println("timeleft = " + timeleft); + final WeakPriorityBlockingQueue.Element obrwi = takeRWI(skipDoubleDom, timeleft); + if (obrwi == null) return null; // all time was already wasted in takeRWI to get another element urlhash = obrwi.getElement().metadataHash(); final URIMetadataRow page = this.query.getSegment().urlMetadata().load(urlhash, obrwi.getElement(), obrwi.getWeight()); if (page == null) { @@ -463,9 +496,17 @@ public final class RankingProcess extends Thread { return null; } - protected int size() { + public int sizeQueue() { + int c = stack.sizeQueue(); + for (WeakPriorityBlockingQueue s: this.doubleDomCache.values()) { + c += s.sizeQueue(); + } + return c; + } + + public int sizeAvailable() { int c = stack.sizeAvailable(); - for (WeakPriorityBlockingQueue> s: this.doubleDomCache.values()) { + for (WeakPriorityBlockingQueue s: this.doubleDomCache.values()) { c += s.sizeAvailable(); } return c; @@ -473,7 +514,7 @@ public final class RankingProcess extends Thread { public boolean isEmpty() { if (!stack.isEmpty()) return false; - for (WeakPriorityBlockingQueue> s: this.doubleDomCache.values()) { + for (WeakPriorityBlockingQueue s: this.doubleDomCache.values()) { if (!s.isEmpty()) return false; } return true; diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index 1024d53e4..8220c8785 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -51,15 +51,15 @@ import de.anomic.yacy.graphics.ProfilingGraph; public class ResultFetcher { // input values - final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container + final RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container QueryParams query; private final yacySeedDB peers; // result values protected final LoaderDispatcher loader; protected Worker[] workerThreads; - protected final WeakPriorityBlockingQueue> result; - protected final WeakPriorityBlockingQueue> images; // container to sort images by size + protected final WeakPriorityBlockingQueue result; + protected final WeakPriorityBlockingQueue images; // container to sort images by size protected final HandleSet failedURLs; // a set of urlhashes that could not been verified during search protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets long urlRetrievalAllTime; @@ -74,15 +74,15 @@ public class ResultFetcher { final int taketimeout) { this.loader = loader; - this.rankedCache = rankedCache; + this.rankingProcess = rankedCache; this.query = query; this.peers = peers; this.taketimeout = taketimeout; this.urlRetrievalAllTime = 0; this.snippetComputationAllTime = 0; - this.result = new WeakPriorityBlockingQueue>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking - this.images = new WeakPriorityBlockingQueue>(-1); + this.result = new WeakPriorityBlockingQueue(-1); // this is the result, enriched with snippets, ranked and ordered by ranking + this.images = new WeakPriorityBlockingQueue(-1); this.failedURLs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); // a set of url hashes where a worker thread tried to work on, but failed. // snippets do not need to match with the complete query hashes, @@ -107,19 +107,25 @@ public class ResultFetcher { public void deployWorker(int deployCount, int neededResults) { if (anyWorkerAlive()) return; + if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) return; this.workerThreads = new Worker[/*(query.snippetCacheStrategy.mustBeOffline()) ? 1 : */deployCount]; - for (int i = 0; i < workerThreads.length; i++) { - this.workerThreads[i] = new Worker(i, 10000, query.snippetCacheStrategy, neededResults); - this.workerThreads[i].start(); - } + synchronized(this.workerThreads) { + for (int i = 0; i < workerThreads.length; i++) { + Worker worker = new Worker(i, 1000, query.snippetCacheStrategy, neededResults); + worker.start(); + this.workerThreads[i] = worker; + } + } } boolean anyWorkerAlive() { if (this.workerThreads == null) return false; - for (int i = 0; i < this.workerThreads.length; i++) { - if ((this.workerThreads[i] != null) && - (this.workerThreads[i].isAlive()) && - (this.workerThreads[i].busytime() < 3000)) return true; + synchronized(this.workerThreads) { + for (int i = 0; i < this.workerThreads.length; i++) { + if ((this.workerThreads[i] != null) && + (this.workerThreads[i].isAlive()) && + (this.workerThreads[i].busytime() < 1000)) return true; + } } return false; } @@ -155,20 +161,32 @@ public class ResultFetcher { //final int fetchAhead = snippetMode == 0 ? 0 : 10; boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0; try { + //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis())); + int loops = 0; while (System.currentTimeMillis() < this.timeout) { - if (result.sizeAvailable() > neededResults) break; - this.lastLifeSign = System.currentTimeMillis(); - + this.lastLifeSign = System.currentTimeMillis(); + // check if we have enough - if ((query.contentdom == ContentDomain.IMAGE) && (images.sizeAvailable() >= query.neededResults() + 50)) break; - if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break; + if (result.sizeAvailable() >= this.neededResults) { + //System.out.println("result.sizeAvailable() >= this.neededResults"); + break; + } + + // check if we can succeed if we try to take another url + if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) { + break; + } // get next entry - page = rankedCache.takeURL(true, this.timeout - System.currentTimeMillis()); - //if (page == null) page = rankedCache.takeURL(false, taketimeout); - if (page == null) break; + page = rankingProcess.takeURL(true, this.timeout - System.currentTimeMillis()); + //if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis()); + if (page == null) { + //System.out.println("page == null"); + break; // no more available + } if (failedURLs.has(page.hash())) continue; - + + loops++; final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0 if (resultEntry == null) continue; // the entry had some problems, cannot be used @@ -176,23 +194,25 @@ public class ResultFetcher { urlRetrievalAllTime += resultEntry.dbRetrievalTime; snippetComputationAllTime += resultEntry.snippetComputationTime; - //System.out.println("+++DEBUG-resultWorker+++ fetched " + resultEntry.urlstring()); // place the result to the result vector // apply post-ranking - long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word())); - ranking += postRanking(resultEntry, rankedCache.getTopics()); - //System.out.println("*** resultEntry.hash = " + resultEntry.hash()); + long ranking = Long.valueOf(rankingProcess.getOrder().cardinal(resultEntry.word())); + ranking += postRanking(resultEntry, rankingProcess.getTopics()); result.put(new ReverseElement(resultEntry, ranking)); // remove smallest in case of overflow - if (nav_topics) rankedCache.addTopics(resultEntry); - //System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url()); + if (nav_topics) rankingProcess.addTopics(resultEntry); } + //System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops); } catch (final Exception e) { Log.logException(e); } Log.logInfo("SEARCH", "resultWorker thread " + id + " terminated"); } + /** + * calculate the time since the worker has had the latest activity + * @return time in milliseconds lasted since latest activity + */ public long busytime() { return System.currentTimeMillis() - this.lastLifeSign; } @@ -274,9 +294,10 @@ public class ResultFetcher { Log.logInfo("SEARCH", "sorted out urlhash " + new String(urlhash) + " during search: " + reason); } - public ResultEntry oneResult(final int item) { + public ResultEntry oneResult(final int item, long timeout) { // check if we already retrieved this item // (happens if a search pages is accessed a second time) + long finishTime = System.currentTimeMillis() + timeout; EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "started, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false); if (this.result.sizeAvailable() > item) { // we have the wanted result already in the result array .. return that @@ -289,35 +310,40 @@ public class ResultFetcher { System.out.println("result.size() = " + this.result.size()); System.out.println("query.neededResults() = " + query.neededResults()); */ - if ((!anyWorkerAlive()) && - (((query.contentdom == ContentDomain.IMAGE) && (images.sizeAvailable() + 30 < query.neededResults())) || - (this.result.sizeAvailable() < query.neededResults())) && - //(event.query.onlineSnippetFetch) && - (this.rankedCache.size() > this.result.sizeAvailable()) - ) { + if (this.result.sizeAvailable() <= item) { // start worker threads to fetch urls and snippets - deployWorker(Math.min(10, query.itemsPerPage), query.neededResults()); + //System.out.println("item = " + item); + //System.out.println("anyWorkerAlive() = " + anyWorkerAlive()); + //System.out.println("rankingProcess.feedingIsFinished() = " + rankingProcess.feedingIsFinished()); + //System.out.println("this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue()); + //System.out.println("this.result.sizeAvailable() = " + this.result.sizeAvailable()); + //System.out.println("this.result.sizeAvailable() + this.rankingProcess.sizeQueue() = " + (this.result.sizeAvailable() + this.rankingProcess.sizeQueue())); + deployWorker(Math.min(20, query.itemsPerPage), ((item + query.itemsPerPage) / query.itemsPerPage) * query.itemsPerPage); } // finally wait until enough results are there produced from the // snippet fetch process - while ((anyWorkerAlive()) && (result.sizeAvailable() <= item)) { - try {Thread.sleep((item % query.itemsPerPage) * 10L);} catch (final InterruptedException e) {} + WeakPriorityBlockingQueue.Element entry = null; + while (System.currentTimeMillis() < finishTime) { + if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break; + try {entry = this.result.element(item, 50);} catch (InterruptedException e) {Log.logException(e);} + if (entry != null) break; + if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break; // } - + // finally, if there is something, return the result - if (this.result.sizeAvailable() <= item) { + if (entry == null) { EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "not found, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false); return null; } - ResultEntry re = this.result.element(item).getElement(); + ResultEntry re = entry.getElement(); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "retrieved, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false); return re; } private int resultCounter = 0; public ResultEntry nextResult() { - final ResultEntry re = oneResult(resultCounter); + final ResultEntry re = oneResult(resultCounter, 1000); resultCounter++; return re; } @@ -355,7 +381,7 @@ public class ResultFetcher { return c; } - public ArrayList> completeResults(final long waitingtime) { + public ArrayList> completeResults(final long waitingtime) { final long timeout = System.currentTimeMillis() + waitingtime; while ((result.sizeAvailable() < query.neededResults()) && (anyWorkerAlive()) && (System.currentTimeMillis() < timeout)) { try {Thread.sleep(20);} catch (final InterruptedException e) {} diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index a9e4f9a44..1c219a3e2 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -66,8 +66,8 @@ public final class SearchEvent { private long eventTime; private QueryParams query; private final yacySeedDB peers; - private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container - private ResultFetcher results; + private RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container + private ResultFetcher resultFetcher; private final SecondarySearchSuperviser secondarySearchSuperviser; @@ -112,10 +112,10 @@ public final class SearchEvent { // initialize a ranking process that is the target for data // that is generated concurrently from local and global search threads - this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, fetchpeers + 1); + this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation); // start a local search concurrently - this.rankedCache.start(); + this.rankingProcess.start(); // start global searches final long timer = System.currentTimeMillis(); @@ -133,7 +133,7 @@ public final class SearchEvent { query.getSegment(), peers, crawlResults, - rankedCache, + rankingProcess, secondarySearchSuperviser, fetchpeers, Switchboard.urlBlacklist, @@ -141,7 +141,7 @@ public final class SearchEvent { query.constraint, (query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes); if (this.primarySearchThreads != null) { - if (this.primarySearchThreads.length > fetchpeers) this.rankedCache.moreFeeders(this.primarySearchThreads.length - fetchpeers); + this.rankingProcess.moreFeeders(this.primarySearchThreads.length); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false); // finished searching Log.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); @@ -151,20 +151,20 @@ public final class SearchEvent { } // start worker threads to fetch urls and snippets - this.results = new ResultFetcher(loader, rankedCache, query, peers, 3000); + this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, peers, 3000); } else { // do a local search - this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1); + this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation); if (generateAbstracts) { - this.rankedCache.run(); // this is not started concurrently here on purpose! + this.rankingProcess.run(); // this is not started concurrently here on purpose! // compute index abstracts final long timer = System.currentTimeMillis(); int maxcount = -1; long mindhtdistance = Long.MAX_VALUE, l; byte[] wordhash; - assert this.rankedCache.searchContainerMap() != null; - for (Map.Entry> entry : this.rankedCache.searchContainerMap().entrySet()) { + assert this.rankingProcess.searchContainerMap() != null; + for (Map.Entry> entry : this.rankingProcess.searchContainerMap().entrySet()) { wordhash = entry.getKey(); final ReferenceContainer container = entry.getValue(); assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + new String(container.getTermHash()) + ", wordhash = " + new String(wordhash); @@ -181,13 +181,21 @@ public final class SearchEvent { IACount.put(wordhash, Integer.valueOf(container.size())); IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString()); } - EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false); + EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false); } else { - this.rankedCache.start(); // start concurrently + this.rankingProcess.start(); // start concurrently + // but give process time to accumulate a certain amount of data + // before a reading process wants to get results from it + for (int i = 0; i < 10; i++) { + if (!this.rankingProcess.isAlive()) break; + try {Thread.sleep(10);} catch (InterruptedException e) {} + } + // this will reduce the maximum waiting time until results are available to 100 milliseconds + // while we always get a good set of ranked data } // start worker threads to fetch urls and snippets - this.results = new ResultFetcher(loader, rankedCache, query, peers, 300); + this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, peers, 300); } // clean up events @@ -217,19 +225,23 @@ public final class SearchEvent { public void setQuery(QueryParams query) { this.query = query; - this.results.query = query; + this.resultFetcher.query = query; } public void cleanup() { // stop all threads if (primarySearchThreads != null) { for (yacySearch search : this.primarySearchThreads) { - if (search.isAlive()) search.interrupt(); + if (search != null) synchronized (search) { + if (search.isAlive()) search.interrupt(); + } } } if (secondarySearchThreads != null) { for (yacySearch search : this.secondarySearchThreads) { - if (search.isAlive()) search.interrupt(); + if (search != null) synchronized (search) { + if (search.isAlive()) search.interrupt(); + } } } @@ -241,7 +253,7 @@ public final class SearchEvent { if (this.heuristics != null) this.heuristics.clear(); // execute deletion of failed words - int rw = this.results.failedURLs.size(); + int rw = this.resultFetcher.failedURLs.size(); if (rw > 0) { long start = System.currentTimeMillis(); final HandleSet removeWords = query.queryHashes; @@ -254,7 +266,7 @@ public final class SearchEvent { final Iterator j = removeWords.iterator(); // remove the same url hashes for multiple words while (j.hasNext()) { - this.query.getSegment().termIndex().remove(j.next(), this.results.failedURLs); + this.query.getSegment().termIndex().remove(j.next(), this.resultFetcher.failedURLs); } } catch (IOException e) { Log.logException(e); @@ -314,25 +326,25 @@ public final class SearchEvent { } public RankingProcess getRankingResult() { - return this.rankedCache; + return this.rankingProcess; } public ArrayList getNamespaceNavigator(int maxentries) { - return this.rankedCache.getNamespaceNavigator(maxentries); + return this.rankingProcess.getNamespaceNavigator(maxentries); } public List getHostNavigator(int maxentries) { - return this.rankedCache.getHostNavigator(maxentries); + return this.rankingProcess.getHostNavigator(maxentries); } public List getTopicNavigator(final int maxentries) { // returns a set of words that are computed as toplist - return this.rankedCache.getTopicNavigator(maxentries); + return this.rankingProcess.getTopicNavigator(maxentries); } public List getAuthorNavigator(final int maxentries) { // returns a list of authors so far seen on result set - return this.rankedCache.getAuthorNavigator(maxentries); + return this.rankingProcess.getAuthorNavigator(maxentries); } public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) { @@ -347,7 +359,7 @@ public final class SearchEvent { } } - public ResultEntry oneResult(final int item) { + public ResultEntry oneResult(final int item, long timeout) { if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || (query.domType == QueryParams.SEARCHDOM_CLUSTERALL)) { // this is a search using remote search threads. Also the local @@ -358,7 +370,7 @@ public final class SearchEvent { try {localSearchThread.join();} catch (InterruptedException e) {} } } - return this.results.oneResult(item); + return this.resultFetcher.oneResult(item, timeout); } boolean secondarySearchStartet = false; @@ -520,10 +532,10 @@ public final class SearchEvent { if (words.length() == 0) continue; // ??? assert words.length() >= 12 : "words = " + words; //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words); - rankedCache.moreFeeders(1); + rankingProcess.moreFeeders(1); checkedPeers.add(peer); secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( - words, urls, query.getSegment(), peers, crawlResults, rankedCache, peer, Switchboard.urlBlacklist, + words, urls, query.getSegment(), peers, crawlResults, rankingProcess, peer, Switchboard.urlBlacklist, query.ranking, query.constraint, preselectedPeerHashes); } @@ -532,7 +544,7 @@ public final class SearchEvent { } public ResultFetcher result() { - return this.results; + return this.resultFetcher; } } diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java index 10dbdb5de..8ff68abdb 100644 --- a/source/net/yacy/cora/document/MultiProtocolURI.java +++ b/source/net/yacy/cora/document/MultiProtocolURI.java @@ -830,7 +830,7 @@ public class MultiProtocolURI implements Serializable, Comparable= 0; + //assert !entity.isChunked(); + //assert entity.getContentLength() >= 0; assert !hrequest.expectContinue(); } httpResponse = httpClient.execute(httpUriRequest, httpContext); diff --git a/source/net/yacy/cora/storage/SimpleARC.java b/source/net/yacy/cora/storage/SimpleARC.java index 9552d2200..2f8fa3e19 100644 --- a/source/net/yacy/cora/storage/SimpleARC.java +++ b/source/net/yacy/cora/storage/SimpleARC.java @@ -96,8 +96,8 @@ abstract class SimpleARC extends AbstractMap implements Map, I // move value from A to B; since it was already removed from A, just put it to B //System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size()); this.levelB.put((K) s, v); + assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically } - assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically return v; } diff --git a/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java b/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java index acc568454..a6798a869 100644 --- a/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java +++ b/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java @@ -38,9 +38,9 @@ import java.util.concurrent.TimeUnit; public class WeakPriorityBlockingQueue { - private final TreeSet queue; // object within the stack, ordered using a TreeSet + private final TreeSet> queue; // object within the stack, ordered using a TreeSet private final Semaphore enqueued; // semaphore for elements in the stack - private final ArrayList drained; // objects that had been on the stack but had been removed + private final ArrayList> drained; // objects that had been on the stack but had been removed protected int maxsize; /** @@ -52,8 +52,8 @@ public class WeakPriorityBlockingQueue { public WeakPriorityBlockingQueue(final int maxsize) { // the maxsize is the maximum number of entries in the stack // if this is set to -1, the size is unlimited - this.queue = new TreeSet(); - this.drained = new ArrayList(); + this.queue = new TreeSet>(); + this.drained = new ArrayList>(); this.enqueued = new Semaphore(0); this.maxsize = maxsize; } @@ -110,7 +110,7 @@ public class WeakPriorityBlockingQueue { * @param weight the weight of the element * @param remove - the rating of the element that shall be removed in case that the stack has an size overflow */ - public synchronized void put(final E element) { + public synchronized void put(final Element element) { // put the element on the stack if (this.drained.contains(element)) return; if (this.queue.size() == this.maxsize) { @@ -127,7 +127,7 @@ public class WeakPriorityBlockingQueue { * return the element with the smallest weight and remove it from the stack * @return null if no element is on the queue or the head of the queue */ - public E poll() { + public Element poll() { boolean a = this.enqueued.tryAcquire(); if (!a) return null; synchronized (this) { @@ -142,7 +142,7 @@ public class WeakPriorityBlockingQueue { * @return the head element from the queue * @throws InterruptedException */ - public E poll(long timeout) throws InterruptedException { + public Element poll(long timeout) throws InterruptedException { boolean a = (timeout <= 0) ? this.enqueued.tryAcquire() : this.enqueued.tryAcquire(timeout, TimeUnit.MILLISECONDS); if (!a) return null; synchronized (this) { @@ -155,15 +155,15 @@ public class WeakPriorityBlockingQueue { * @return the head element from the queue * @throws InterruptedException */ - public E take() throws InterruptedException { + public Element take() throws InterruptedException { this.enqueued.acquire(); synchronized (this) { return takeUnsafe(); } } - private E takeUnsafe() { - final E element = this.queue.first(); + private Element takeUnsafe() { + final Element element = this.queue.first(); assert element != null; this.queue.remove(element); this.drained.add(element); @@ -176,7 +176,7 @@ public class WeakPriorityBlockingQueue { * return the element with the smallest weight, but do not remove it * @return null if no element is on the queue or the head of the queue */ - public synchronized E peek() { + public synchronized Element peek() { if (this.queue.isEmpty()) return null; return this.queue.first(); } @@ -192,13 +192,15 @@ public class WeakPriorityBlockingQueue { * @param position inside the drained queue * @return the element from the recorded position or null if that position is not available */ - public synchronized E element(final int position) { + public Element element(final int position) { if (position < this.drained.size()) { return this.drained.get(position); } - if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element - while (position >= this.drained.size()) this.poll(); - return this.drained.get(position); + synchronized (this) { + if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element + while (position >= this.drained.size()) this.poll(); + return this.drained.get(position); + } } /** @@ -210,12 +212,11 @@ public class WeakPriorityBlockingQueue { * @return the element from the recorded position or null if that position is not available within the timeout * @throws InterruptedException */ - public synchronized E element(final int position, long time) throws InterruptedException { + public Element element(final int position, long time) throws InterruptedException { long timeout = System.currentTimeMillis() + time; if (position < this.drained.size()) { return this.drained.get(position); } - if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element while (position >= this.drained.size()) { long t = timeout - System.currentTimeMillis(); if (t <= 0) break; @@ -232,7 +233,7 @@ public class WeakPriorityBlockingQueue { * @param count * @return a list of elements in the stack */ - public synchronized ArrayList list(final int count) { + public synchronized ArrayList> list(final int count) { if (count < 0) { return list(); } @@ -245,7 +246,7 @@ public class WeakPriorityBlockingQueue { * return all entries as they would be retrievable with element() * @return a list of all elements in the stack */ - public synchronized ArrayList list() { + public synchronized ArrayList> list() { // shift all elements while (!this.queue.isEmpty()) this.poll(); return this.drained; @@ -255,13 +256,13 @@ public class WeakPriorityBlockingQueue { * iterate over all elements available. All elements that are still in the queue are drained to recorded positions * @return an iterator over all drained positions. */ - public synchronized Iterator iterator() { + public synchronized Iterator> iterator() { // shift all elements to the offstack while (!this.queue.isEmpty()) this.poll(); return this.drained.iterator(); } - protected interface Element { + public interface Element { public long getWeight(); public E getElement(); public boolean equals(Element o); @@ -269,7 +270,7 @@ public class WeakPriorityBlockingQueue { public String toString(); } - protected abstract static class AbstractElement { + protected abstract static class AbstractElement implements Element { public long weight; public E element; @@ -299,7 +300,7 @@ public class WeakPriorityBlockingQueue { * natural ordering elements, can be used as container of objects in the priority queue * the elements with smallest ordering weights are first in the queue when elements are taken */ - public static class NaturalElement extends AbstractElement implements Comparable>, Comparator> { + public static class NaturalElement extends AbstractElement implements Element, Comparable>, Comparator> { public NaturalElement(final E element, final long weight) { this.element = element; @@ -321,13 +322,14 @@ public class WeakPriorityBlockingQueue { if (o1h < o2h) return -1; return 0; } + } /** * reverse ordering elements, can be used as container of objects in the priority queue * the elements with highest ordering weights are first in the queue when elements are taken */ - public static class ReverseElement extends AbstractElement implements Comparable>, Comparator> { + public static class ReverseElement extends AbstractElement implements Element, Comparable>, Comparator> { public ReverseElement(final E element, final long weight) { this.element = element; @@ -352,14 +354,26 @@ public class WeakPriorityBlockingQueue { } public static void main(String[] args) { - WeakPriorityBlockingQueue> a = new WeakPriorityBlockingQueue>(3); + final WeakPriorityBlockingQueue a = new WeakPriorityBlockingQueue(3); + //final Element REVERSE_POISON = new ReverseElement("", Long.MIN_VALUE); + new Thread(){ + public void run() { + Element e; + try { + while ((e = a.poll(1000)) != null) System.out.println("> " + e.toString()); + } catch (InterruptedException e1) { + e1.printStackTrace(); + } + } + }.start(); a.put(new ReverseElement("abc", 1)); //a.poll(); a.put(new ReverseElement("abcx", 2)); a.put(new ReverseElement("6s_7dfZk4xvc", 3)); a.put(new ReverseElement("6s_7dfZk4xvcx", 4)); + //a.put((Element) REVERSE_POISON); //a.poll(); System.out.println("size = " + a.sizeAvailable()); - while (a.sizeQueue() > 0) System.out.println("> " + a.poll().toString()); + //while (a.sizeQueue() > 0) System.out.println("> " + a.poll().toString()); } } diff --git a/source/net/yacy/document/Parser.java b/source/net/yacy/document/Parser.java index ab6e6aea9..0c5974e76 100644 --- a/source/net/yacy/document/Parser.java +++ b/source/net/yacy/document/Parser.java @@ -98,6 +98,11 @@ public interface Parser { this.url = url; } + public Failure(final String message, final MultiProtocolURI url, Throwable e) { + super(message + "; url = " + url.toNormalform(true, false), e); + this.url = url; + } + public MultiProtocolURI getURL() { return this.url; } diff --git a/source/net/yacy/document/parser/rssParser.java b/source/net/yacy/document/parser/rssParser.java index 719bd28af..72c1841f8 100644 --- a/source/net/yacy/document/parser/rssParser.java +++ b/source/net/yacy/document/parser/rssParser.java @@ -58,7 +58,7 @@ public class rssParser extends AbstractParser implements Parser { try { rssReader = new RSSReader(RSSFeed.DEFAULT_MAXSIZE, source, RSSReader.Type.none); } catch (IOException e) { - throw new Parser.Failure("Load error:" + e.getMessage(), url); + throw new Parser.Failure("Load error:" + e.getMessage(), url, e); } RSSFeed feed = rssReader.getFeed(); diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java index 3a5cef1fc..cfc39a142 100644 --- a/source/net/yacy/kelondro/data/meta/DigestURI.java +++ b/source/net/yacy/kelondro/data/meta/DigestURI.java @@ -273,6 +273,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable { // checks for local/global IP range and local IP public final boolean isLocal() { + if (this.isSMB() || this.isFile()) return true; if (this.hash == null) { if (super.isLocal()) return true; synchronized (this) { diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 8fd8e751f..fe2183880 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -179,6 +179,7 @@ public final class LoaderDispatcher { private Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException { // get the protocol of the next URL final DigestURI url = request.url(); + if (url.isFile() || url.isSMB()) cacheStrategy = CrawlProfile.CacheStrategy.NOCACHE; // load just from the file system final String protocol = url.getProtocol(); final String host = url.getHost();