- enhancements for search speed

- bug fixes in many classes including basic data structure classes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7217 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent aa6075402a
commit aacf572a26

@ -519,7 +519,7 @@ public class IndexControlRWIs_p {
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) { public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking()); final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking());
final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang); final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE, 1); final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE);
ranked.run(); ranked.run();
if (ranked.filteredCount() == 0) { if (ranked.filteredCount() == 0) {

@ -28,7 +28,7 @@
<div id="yacylivesearch"> <div id="yacylivesearch">
<form id="ysearch" method="get" accept-charset="UTF-8" action="yacysearch.html"><p> <form id="ysearch" method="get" accept-charset="UTF-8" action="yacysearch.html"><p>
<input name="query" id="yquery" class="fancy" type="text" size="20" maxlength="80" value=""/> <input name="query" id="yquery" class="fancy" type="text" size="20" maxlength="80" value=""/>
<input type="hidden" name="verify" value="true" /> <input type="hidden" name="verify" value="ifexist" />
<input type="hidden" name="maximumRecords" value="20" /> <input type="hidden" name="maximumRecords" value="20" />
<input type="hidden" name="resource" value="local" /> <input type="hidden" name="resource" value="local" />
<input type="hidden" name="urlmaskfilter" value=".*" /> <input type="hidden" name="urlmaskfilter" value=".*" />

@ -39,7 +39,7 @@ import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.cora.storage.WeakPriorityBlockingQueue;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceRow;
@ -195,7 +195,7 @@ public final class search {
int joincount = 0; int joincount = 0;
QueryParams theQuery = null; QueryParams theQuery = null;
SearchEvent theSearch = null; SearchEvent theSearch = null;
ArrayList<ReverseElement<ResultEntry>> accu = null; ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null;
if ((query.length() == 0) && (abstractSet != null)) { if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts // this is _not_ a normal search, only a request for index abstracts
Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
@ -365,7 +365,7 @@ public final class search {
final long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
final StringBuilder links = new StringBuilder(6000); final StringBuilder links = new StringBuilder(6000);
String resource = null; String resource = null;
ReverseElement<ResultEntry> entry; WeakPriorityBlockingQueue.Element<ResultEntry> entry;
for (int i = 0; i < accu.size(); i++) { for (int i = 0; i < accu.size(); i++) {
entry = accu.get(i); entry = accu.get(i);
resource = entry.getElement().resource(); resource = entry.getElement().resource();

@ -100,7 +100,7 @@ public class yacysearchitem {
// text search // text search
// generate result object // generate result object
final ResultEntry result = theSearch.oneResult(item); final ResultEntry result = theSearch.oneResult(item, theQuery.isLocal() ? 1000 : 5000);
if (result == null) return prop; // no content if (result == null) return prop; // no content
@ -136,7 +136,7 @@ public class yacysearchitem {
//prop.put("content_ybr", RankingProcess.ybr(result.hash())); //prop.put("content_ybr", RankingProcess.ybr(result.hash()));
prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename' prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
prop.putHTML("content_sizename", sizename(result.filesize())); prop.putHTML("content_sizename", sizename(result.filesize()));
prop.putHTML("content_host", result.url().getHost()); prop.putHTML("content_host", result.url().getHost() == null ? "" : result.url().getHost());
prop.putHTML("content_file", result.url().getFile()); prop.putHTML("content_file", result.url().getFile());
prop.putHTML("content_path", result.url().getPath()); prop.putHTML("content_path", result.url().getPath());
prop.put("content_nl", (item == 0) ? 0 : 1); prop.put("content_nl", (item == 0) ? 0 : 1);
@ -203,7 +203,7 @@ public class yacysearchitem {
// any other media content // any other media content
// generate result object // generate result object
final ResultEntry result = theSearch.oneResult(item); final ResultEntry result = theSearch.oneResult(item, 500);
if (result == null) return prop; // no content if (result == null) return prop; // no content
prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
@ -234,7 +234,7 @@ public class yacysearchitem {
final int p = s.lastIndexOf('.'); final int p = s.lastIndexOf('.');
if (p < 0) return s.substring(0, length - 3) + "..."; if (p < 0) return s.substring(0, length - 3) + "...";
assert p >= 0; assert p >= 0;
assert length - (s.length() - p) - 3 >= 0; assert length - (s.length() - p) - 3 >= 0: "length = " + length + ", s.length() = " + s.length() + ", p = " + p;
return s.substring(0, length - (s.length() - p) - 3) + "..." + s.substring(p); // TODO check oob return s.substring(0, length - (s.length() - p) - 3) + "..." + s.substring(p); // TODO check oob
} }

@ -303,7 +303,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
NOCACHE(0), // never use the cache, all content from fresh internet source NOCACHE(0), // never use the cache, all content from fresh internet source
IFFRESH(1), // use the cache if the cache exists and is fresh using the proxy-fresh rules IFFRESH(1), // use the cache if the cache exists and is fresh using the proxy-fresh rules
IFEXIST(2), // use the cache if the cache exist. Do no check freshness. Otherwise use online source. IFEXIST(2), // use the cache if the cache exist. Do no check freshness. Otherwise use online source.
CACHEONLY(3); // never go online, use all content from cache. If no cache exist, treat content as unavailable CACHEONLY(3); // never go online, use all content from cache. If no cache entry exist, consider content nevertheless as available
public int code; public int code;
private CacheStrategy(int code) { private CacheStrategy(int code) {
this.code = code; this.code = code;
@ -320,6 +320,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
if (name.equals("iffresh")) return IFFRESH; if (name.equals("iffresh")) return IFFRESH;
if (name.equals("ifexist")) return IFEXIST; if (name.equals("ifexist")) return IFEXIST;
if (name.equals("cacheonly")) return CACHEONLY; if (name.equals("cacheonly")) return CACHEONLY;
if (name.equals("true")) return IFFRESH;
if (name.equals("false")) return CACHEONLY;
return null; return null;
} }
public String toName() { public String toName() {

@ -109,8 +109,6 @@ import de.anomic.yacy.graphics.EncodedImage;
public final class HTTPDFileHandler { public final class HTTPDFileHandler {
private static final boolean safeServletsMode = false; // if true then all servlets are called synchronized
// create a class loader // create a class loader
private static final serverClassLoader provider = new serverClassLoader(/*this.getClass().getClassLoader()*/); private static final serverClassLoader provider = new serverClassLoader(/*this.getClass().getClassLoader()*/);
private static serverSwitch switchboard = null; private static serverSwitch switchboard = null;
@ -1177,15 +1175,8 @@ public final class HTTPDFileHandler {
return m; return m;
} }
public static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException { private static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
// debug functions: for special servlets call them without reflection to get better stack trace results return rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});
Object result;
if (safeServletsMode) synchronized (switchboard) {
result = rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});
} else {
result = rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});
}
return result;
} }
/** /**

@ -191,7 +191,7 @@ public class DocumentIndex extends Segment {
// make a query and start a search // make a query and start a search
QueryParams query = new QueryParams(querystring, count, null, this, textRankingDefault); QueryParams query = new QueryParams(querystring, count, null, this, textRankingDefault);
ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang); ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation, 1); RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation);
rankedCache.start(); rankedCache.start();
// search is running; retrieve results // search is running; retrieve results

@ -65,7 +65,7 @@ public final class RankingProcess extends Thread {
public static BinSearch[] ybrTables = null; // block-rank tables public static BinSearch[] ybrTables = null; // block-rank tables
private static final int maxYBR = 3; // the lower this value, the faster the search private static final int maxYBR = 3; // the lower this value, the faster the search
private static boolean useYBR = true; private static boolean useYBR = true;
private static final int maxDoubleDomAll = 100, maxDoubleDomSpecial = 10000; private static final int maxDoubleDomAll = 1000, maxDoubleDomSpecial = 10000;
private final QueryParams query; private final QueryParams query;
private final TreeSet<byte[]> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private final TreeSet<byte[]> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
@ -76,9 +76,9 @@ public final class RankingProcess extends Thread {
private int remote_resourceSize, remote_indexCount, remote_peerCount; private int remote_resourceSize, remote_indexCount, remote_peerCount;
private int local_resourceSize, local_indexCount; private int local_resourceSize, local_indexCount;
private final WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> stack; private final WeakPriorityBlockingQueue<WordReferenceVars> stack;
private int feeders; private int feeders;
private final ConcurrentHashMap<String, WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>> doubleDomCache; // key = domhash (6 bytes); value = like stack private final ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
//private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process //private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process
private final Navigator ref; // reference score computation for the commonSense heuristic private final Navigator ref; // reference score computation for the commonSense heuristic
@ -86,14 +86,15 @@ public final class RankingProcess extends Thread {
private final Navigator authorNavigator; private final Navigator authorNavigator;
private final Navigator namespaceNavigator; private final Navigator namespaceNavigator;
private final ReferenceOrder order; private final ReferenceOrder order;
private final long startTime;
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) { public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) {
// we collect the urlhashes and construct a list with urlEntry objects // we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime // attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking // sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchInclusion = null; this.localSearchInclusion = null;
this.stack = new WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>(maxentries); this.stack = new WeakPriorityBlockingQueue<WordReferenceVars>(maxentries);
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>>(); this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
this.query = query; this.query = query;
this.order = order; this.order = order;
this.remote_peerCount = 0; this.remote_peerCount = 0;
@ -111,8 +112,8 @@ public final class RankingProcess extends Thread {
this.authorNavigator = new Navigator(); this.authorNavigator = new Navigator();
this.namespaceNavigator = new Navigator(); this.namespaceNavigator = new Navigator();
this.ref = new Navigator(); this.ref = new Navigator();
this.feeders = concurrency; this.feeders = 1;
assert this.feeders >= 1; this.startTime = System.currentTimeMillis();
} }
public QueryParams getQuery() { public QueryParams getQuery() {
@ -146,9 +147,10 @@ public final class RankingProcess extends Thread {
add(index, true, "local index: " + this.query.getSegment().getLocation(), -1); add(index, true, "local index: " + this.query.getSegment().getLocation(), -1);
} catch (final Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);
} } finally {
oneFeederTerminated(); oneFeederTerminated();
} }
}
public void add(final ReferenceContainer<WordReference> index, final boolean local, String resourceName, final int fullResource) { public void add(final ReferenceContainer<WordReference> index, final boolean local, String resourceName, final int fullResource) {
// we collect the urlhashes and construct a list with urlEntry objects // we collect the urlhashes and construct a list with urlEntry objects
@ -226,6 +228,7 @@ public final class RankingProcess extends Thread {
// finally make a double-check and insert result to stack // finally make a double-check and insert result to stack
if (urlhashes.add(iEntry.metadataHash())) { if (urlhashes.add(iEntry.metadataHash())) {
stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest) stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
//System.out.println("stack.put: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
// increase counter for statistics // increase counter for statistics
if (local) this.local_indexCount++; else this.remote_indexCount++; if (local) this.local_indexCount++; else this.remote_indexCount++;
@ -250,8 +253,9 @@ public final class RankingProcess extends Thread {
this.feeders += countMoreFeeders; this.feeders += countMoreFeeders;
} }
private boolean feedingIsFinished() { public boolean feedingIsFinished() {
return this.feeders == 0; //System.out.println("feedingIsFinished: this.feeders == " + this.feeders);
return System.currentTimeMillis() - this.startTime > 50 && this.feeders == 0;
} }
private boolean testFlags(final WordReference ientry) { private boolean testFlags(final WordReference ientry) {
@ -277,23 +281,37 @@ public final class RankingProcess extends Thread {
return localSearchInclusion; return localSearchInclusion;
} }
private ReverseElement<WordReferenceVars> takeRWI(final boolean skipDoubleDom, long timeout) { private WeakPriorityBlockingQueue.Element<WordReferenceVars> takeRWI(final boolean skipDoubleDom, long waitingtime) {
// returns from the current RWI list the best entry and removes this entry from the list // returns from the current RWI list the best entry and removes this entry from the list
WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> m; WeakPriorityBlockingQueue<WordReferenceVars> m;
ReverseElement<WordReferenceVars> rwi; WeakPriorityBlockingQueue.Element<WordReferenceVars> rwi = null;
try { try {
//System.out.println("feeders = " + this.feeders); //System.out.println("stack.poll: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
while ((rwi = stack.poll((this.feedingIsFinished()) ? 0 : timeout)) != null) { int loops = 0; // a loop counter to terminate the reading if all the results are from the same domain
if (!skipDoubleDom) return rwi; long timeout = System.currentTimeMillis() + waitingtime;
while (this.query.itemsPerPage < 1 || loops++ < this.query.itemsPerPage) {
if (waitingtime <= 0) {
rwi = stack.poll();
} else while (System.currentTimeMillis() < timeout) {
rwi = stack.poll(50);
if (rwi != null) break;
if (feedingIsFinished() && stack.sizeQueue() == 0) break;
}
if (rwi == null) break;
if (!skipDoubleDom) {
//System.out.println("!skipDoubleDom");
return rwi;
}
// check doubledom // check doubledom
final String domhash = new String(rwi.getElement().metadataHash(), 6, 6); final String domhash = new String(rwi.getElement().metadataHash(), 6, 6);
m = this.doubleDomCache.get(domhash); m = this.doubleDomCache.get(domhash);
if (m == null) { if (m == null) {
// first appearance of dom // first appearance of dom
m = new WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); m = new WeakPriorityBlockingQueue<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll);
this.doubleDomCache.put(domhash, m); this.doubleDomCache.put(domhash, m);
//System.out.println("m == null");
return rwi; return rwi;
} }
@ -302,13 +320,17 @@ public final class RankingProcess extends Thread {
} }
} catch (InterruptedException e1) { } catch (InterruptedException e1) {
} }
if (this.doubleDomCache.size() == 0) {
//System.out.println("this.doubleDomCache.size() == 0");
return null;
}
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache // no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
// find best entry from all caches // find best entry from all caches
ReverseElement<WordReferenceVars> bestEntry = null; WeakPriorityBlockingQueue.Element<WordReferenceVars> bestEntry = null;
ReverseElement<WordReferenceVars> o; WeakPriorityBlockingQueue.Element<WordReferenceVars> o;
synchronized (this.doubleDomCache) { synchronized (this.doubleDomCache) {
final Iterator<WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>> i = this.doubleDomCache.values().iterator(); final Iterator<WeakPriorityBlockingQueue<WordReferenceVars>> i = this.doubleDomCache.values().iterator();
while (i.hasNext()) { while (i.hasNext()) {
try { try {
m = i.next(); m = i.next();
@ -316,25 +338,39 @@ public final class RankingProcess extends Thread {
Log.logException(e); Log.logException(e);
break; // not the best solution... break; // not the best solution...
} }
if (m == null) continue; if (m == null) {
if (m.isEmpty()) continue; //System.out.println("m == null");
continue;
}
if (m.isEmpty()) {
//System.out.println("m.isEmpty()");
continue;
}
if (bestEntry == null) { if (bestEntry == null) {
bestEntry = m.peek(); bestEntry = m.peek();
//System.out.println("bestEntry = m.peek() = " + bestEntry);
continue; continue;
} }
o = m.peek(); o = m.peek();
if (o == null) continue; if (o == null) {
//System.out.println("o == null");
continue;
}
if (o.getWeight() < bestEntry.getWeight()) { if (o.getWeight() < bestEntry.getWeight()) {
bestEntry = o; bestEntry = o;
} }
} }
} }
if (bestEntry == null) return null; if (bestEntry == null) {
//System.out.println("bestEntry == null");
return null;
}
// finally remove the best entry from the doubledom cache // finally remove the best entry from the doubledom cache
m = this.doubleDomCache.get(new String(bestEntry.getElement().metadataHash()).substring(6)); m = this.doubleDomCache.get(new String(bestEntry.getElement().metadataHash()).substring(6));
o = m.poll(); o = m.poll();
//assert o == null || o.element.metadataHash().equals(bestEntry.element.metadataHash()) : "bestEntry.element.metadataHash() = " + bestEntry.element.metadataHash() + ", o.element.metadataHash() = " + o.element.metadataHash(); //assert o == null || o.element.metadataHash().equals(bestEntry.element.metadataHash()) : "bestEntry.element.metadataHash() = " + bestEntry.element.metadataHash() + ", o.element.metadataHash() = " + o.element.metadataHash();
//System.out.println("return bestEntry");
return bestEntry; return bestEntry;
} }
@ -344,22 +380,19 @@ public final class RankingProcess extends Thread {
* limit is reached then null is returned. The caller may distinguish the timeout case * limit is reached then null is returned. The caller may distinguish the timeout case
* from the case where there will be no more also in the future by calling this.feedingIsFinished() * from the case where there will be no more also in the future by calling this.feedingIsFinished()
* @param skipDoubleDom should be true if it is wanted that double domain entries are skipped * @param skipDoubleDom should be true if it is wanted that double domain entries are skipped
* @param timeout the time this method may take for a result computation * @param waitingtime the time this method may take for a result computation
* @return a metadata entry for a url * @return a metadata entry for a url
*/ */
public URIMetadataRow takeURL(final boolean skipDoubleDom, final long timeout) { public URIMetadataRow takeURL(final boolean skipDoubleDom, final long waitingtime) {
// returns from the current RWI list the best URL entry and removes this entry from the list // returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + Math.max(10, timeout); long timeout = System.currentTimeMillis() + Math.max(10, waitingtime);
int p = -1; int p = -1;
byte[] urlhash; byte[] urlhash;
long timeleft; long timeleft;
while ((timeleft = timeLimit - System.currentTimeMillis()) > 0) { while ((timeleft = timeout - System.currentTimeMillis()) > 0) {
final ReverseElement<WordReferenceVars> obrwi = takeRWI(skipDoubleDom, timeleft); //System.out.println("timeleft = " + timeleft);
if (obrwi == null) { final WeakPriorityBlockingQueue.Element<WordReferenceVars> obrwi = takeRWI(skipDoubleDom, timeleft);
if (this.feedingIsFinished()) return null; if (obrwi == null) return null; // all time was already wasted in takeRWI to get another element
try {Thread.sleep(50);} catch (final InterruptedException e1) {}
continue;
}
urlhash = obrwi.getElement().metadataHash(); urlhash = obrwi.getElement().metadataHash();
final URIMetadataRow page = this.query.getSegment().urlMetadata().load(urlhash, obrwi.getElement(), obrwi.getWeight()); final URIMetadataRow page = this.query.getSegment().urlMetadata().load(urlhash, obrwi.getElement(), obrwi.getWeight());
if (page == null) { if (page == null) {
@ -463,9 +496,17 @@ public final class RankingProcess extends Thread {
return null; return null;
} }
protected int size() { public int sizeQueue() {
int c = stack.sizeQueue();
for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
c += s.sizeQueue();
}
return c;
}
public int sizeAvailable() {
int c = stack.sizeAvailable(); int c = stack.sizeAvailable();
for (WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> s: this.doubleDomCache.values()) { for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
c += s.sizeAvailable(); c += s.sizeAvailable();
} }
return c; return c;
@ -473,7 +514,7 @@ public final class RankingProcess extends Thread {
public boolean isEmpty() { public boolean isEmpty() {
if (!stack.isEmpty()) return false; if (!stack.isEmpty()) return false;
for (WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> s: this.doubleDomCache.values()) { for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
if (!s.isEmpty()) return false; if (!s.isEmpty()) return false;
} }
return true; return true;

@ -51,15 +51,15 @@ import de.anomic.yacy.graphics.ProfilingGraph;
public class ResultFetcher { public class ResultFetcher {
// input values // input values
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container final RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
QueryParams query; QueryParams query;
private final yacySeedDB peers; private final yacySeedDB peers;
// result values // result values
protected final LoaderDispatcher loader; protected final LoaderDispatcher loader;
protected Worker[] workerThreads; protected Worker[] workerThreads;
protected final WeakPriorityBlockingQueue<ReverseElement<ResultEntry>> result; protected final WeakPriorityBlockingQueue<ResultEntry> result;
protected final WeakPriorityBlockingQueue<ReverseElement<MediaSnippet>> images; // container to sort images by size protected final WeakPriorityBlockingQueue<MediaSnippet> images; // container to sort images by size
protected final HandleSet failedURLs; // a set of urlhashes that could not been verified during search protected final HandleSet failedURLs; // a set of urlhashes that could not been verified during search
protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets
long urlRetrievalAllTime; long urlRetrievalAllTime;
@ -74,15 +74,15 @@ public class ResultFetcher {
final int taketimeout) { final int taketimeout) {
this.loader = loader; this.loader = loader;
this.rankedCache = rankedCache; this.rankingProcess = rankedCache;
this.query = query; this.query = query;
this.peers = peers; this.peers = peers;
this.taketimeout = taketimeout; this.taketimeout = taketimeout;
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
this.result = new WeakPriorityBlockingQueue<ReverseElement<ResultEntry>>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking this.result = new WeakPriorityBlockingQueue<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new WeakPriorityBlockingQueue<ReverseElement<MediaSnippet>>(-1); this.images = new WeakPriorityBlockingQueue<MediaSnippet>(-1);
this.failedURLs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); // a set of url hashes where a worker thread tried to work on, but failed. this.failedURLs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); // a set of url hashes where a worker thread tried to work on, but failed.
// snippets do not need to match with the complete query hashes, // snippets do not need to match with the complete query hashes,
@ -107,19 +107,25 @@ public class ResultFetcher {
public void deployWorker(int deployCount, int neededResults) { public void deployWorker(int deployCount, int neededResults) {
if (anyWorkerAlive()) return; if (anyWorkerAlive()) return;
if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) return;
this.workerThreads = new Worker[/*(query.snippetCacheStrategy.mustBeOffline()) ? 1 : */deployCount]; this.workerThreads = new Worker[/*(query.snippetCacheStrategy.mustBeOffline()) ? 1 : */deployCount];
synchronized(this.workerThreads) {
for (int i = 0; i < workerThreads.length; i++) { for (int i = 0; i < workerThreads.length; i++) {
this.workerThreads[i] = new Worker(i, 10000, query.snippetCacheStrategy, neededResults); Worker worker = new Worker(i, 1000, query.snippetCacheStrategy, neededResults);
this.workerThreads[i].start(); worker.start();
this.workerThreads[i] = worker;
}
} }
} }
boolean anyWorkerAlive() { boolean anyWorkerAlive() {
if (this.workerThreads == null) return false; if (this.workerThreads == null) return false;
synchronized(this.workerThreads) {
for (int i = 0; i < this.workerThreads.length; i++) { for (int i = 0; i < this.workerThreads.length; i++) {
if ((this.workerThreads[i] != null) && if ((this.workerThreads[i] != null) &&
(this.workerThreads[i].isAlive()) && (this.workerThreads[i].isAlive()) &&
(this.workerThreads[i].busytime() < 3000)) return true; (this.workerThreads[i].busytime() < 1000)) return true;
}
} }
return false; return false;
} }
@ -155,20 +161,32 @@ public class ResultFetcher {
//final int fetchAhead = snippetMode == 0 ? 0 : 10; //final int fetchAhead = snippetMode == 0 ? 0 : 10;
boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0; boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0;
try { try {
//System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
int loops = 0;
while (System.currentTimeMillis() < this.timeout) { while (System.currentTimeMillis() < this.timeout) {
if (result.sizeAvailable() > neededResults) break;
this.lastLifeSign = System.currentTimeMillis(); this.lastLifeSign = System.currentTimeMillis();
// check if we have enough // check if we have enough
if ((query.contentdom == ContentDomain.IMAGE) && (images.sizeAvailable() >= query.neededResults() + 50)) break; if (result.sizeAvailable() >= this.neededResults) {
if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break; //System.out.println("result.sizeAvailable() >= this.neededResults");
break;
}
// check if we can succeed if we try to take another url
if (rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0) {
break;
}
// get next entry // get next entry
page = rankedCache.takeURL(true, this.timeout - System.currentTimeMillis()); page = rankingProcess.takeURL(true, this.timeout - System.currentTimeMillis());
//if (page == null) page = rankedCache.takeURL(false, taketimeout); //if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
if (page == null) break; if (page == null) {
//System.out.println("page == null");
break; // no more available
}
if (failedURLs.has(page.hash())) continue; if (failedURLs.has(page.hash())) continue;
loops++;
final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0 final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
if (resultEntry == null) continue; // the entry had some problems, cannot be used if (resultEntry == null) continue; // the entry had some problems, cannot be used
@ -176,23 +194,25 @@ public class ResultFetcher {
urlRetrievalAllTime += resultEntry.dbRetrievalTime; urlRetrievalAllTime += resultEntry.dbRetrievalTime;
snippetComputationAllTime += resultEntry.snippetComputationTime; snippetComputationAllTime += resultEntry.snippetComputationTime;
//System.out.println("+++DEBUG-resultWorker+++ fetched " + resultEntry.urlstring());
// place the result to the result vector // place the result to the result vector
// apply post-ranking // apply post-ranking
long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word())); long ranking = Long.valueOf(rankingProcess.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, rankedCache.getTopics()); ranking += postRanking(resultEntry, rankingProcess.getTopics());
//System.out.println("*** resultEntry.hash = " + resultEntry.hash());
result.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow result.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow
if (nav_topics) rankedCache.addTopics(resultEntry); if (nav_topics) rankingProcess.addTopics(resultEntry);
//System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url());
} }
//System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops);
} catch (final Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);
} }
Log.logInfo("SEARCH", "resultWorker thread " + id + " terminated"); Log.logInfo("SEARCH", "resultWorker thread " + id + " terminated");
} }
/**
* calculate the time since the worker has had the latest activity
* @return time in milliseconds lasted since latest activity
*/
public long busytime() { public long busytime() {
return System.currentTimeMillis() - this.lastLifeSign; return System.currentTimeMillis() - this.lastLifeSign;
} }
@ -274,9 +294,10 @@ public class ResultFetcher {
Log.logInfo("SEARCH", "sorted out urlhash " + new String(urlhash) + " during search: " + reason); Log.logInfo("SEARCH", "sorted out urlhash " + new String(urlhash) + " during search: " + reason);
} }
public ResultEntry oneResult(final int item) { public ResultEntry oneResult(final int item, long timeout) {
// check if we already retrieved this item // check if we already retrieved this item
// (happens if a search pages is accessed a second time) // (happens if a search pages is accessed a second time)
long finishTime = System.currentTimeMillis() + timeout;
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "started, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "started, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
if (this.result.sizeAvailable() > item) { if (this.result.sizeAvailable() > item) {
// we have the wanted result already in the result array .. return that // we have the wanted result already in the result array .. return that
@ -289,35 +310,40 @@ public class ResultFetcher {
System.out.println("result.size() = " + this.result.size()); System.out.println("result.size() = " + this.result.size());
System.out.println("query.neededResults() = " + query.neededResults()); System.out.println("query.neededResults() = " + query.neededResults());
*/ */
if ((!anyWorkerAlive()) && if (this.result.sizeAvailable() <= item) {
(((query.contentdom == ContentDomain.IMAGE) && (images.sizeAvailable() + 30 < query.neededResults())) ||
(this.result.sizeAvailable() < query.neededResults())) &&
//(event.query.onlineSnippetFetch) &&
(this.rankedCache.size() > this.result.sizeAvailable())
) {
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
deployWorker(Math.min(10, query.itemsPerPage), query.neededResults()); //System.out.println("item = " + item);
//System.out.println("anyWorkerAlive() = " + anyWorkerAlive());
//System.out.println("rankingProcess.feedingIsFinished() = " + rankingProcess.feedingIsFinished());
//System.out.println("this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue());
//System.out.println("this.result.sizeAvailable() = " + this.result.sizeAvailable());
//System.out.println("this.result.sizeAvailable() + this.rankingProcess.sizeQueue() = " + (this.result.sizeAvailable() + this.rankingProcess.sizeQueue()));
deployWorker(Math.min(20, query.itemsPerPage), ((item + query.itemsPerPage) / query.itemsPerPage) * query.itemsPerPage);
} }
// finally wait until enough results are there produced from the // finally wait until enough results are there produced from the
// snippet fetch process // snippet fetch process
while ((anyWorkerAlive()) && (result.sizeAvailable() <= item)) { WeakPriorityBlockingQueue.Element<ResultEntry> entry = null;
try {Thread.sleep((item % query.itemsPerPage) * 10L);} catch (final InterruptedException e) {} while (System.currentTimeMillis() < finishTime) {
if (this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && !anyWorkerAlive() && this.rankingProcess.feedingIsFinished()) break;
try {entry = this.result.element(item, 50);} catch (InterruptedException e) {Log.logException(e);}
if (entry != null) break;
if (!anyWorkerAlive() && this.rankingProcess.sizeQueue() == 0 && this.rankingProcess.feedingIsFinished()) break; //
} }
// finally, if there is something, return the result // finally, if there is something, return the result
if (this.result.sizeAvailable() <= item) { if (entry == null) {
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "not found, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "not found, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
return null; return null;
} }
ResultEntry re = this.result.element(item).getElement(); ResultEntry re = entry.getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "retrieved, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.ONERESULT, "retrieved, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false);
return re; return re;
} }
private int resultCounter = 0; private int resultCounter = 0;
public ResultEntry nextResult() { public ResultEntry nextResult() {
final ResultEntry re = oneResult(resultCounter); final ResultEntry re = oneResult(resultCounter, 1000);
resultCounter++; resultCounter++;
return re; return re;
} }
@ -355,7 +381,7 @@ public class ResultFetcher {
return c; return c;
} }
public ArrayList<ReverseElement<ResultEntry>> completeResults(final long waitingtime) { public ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> completeResults(final long waitingtime) {
final long timeout = System.currentTimeMillis() + waitingtime; final long timeout = System.currentTimeMillis() + waitingtime;
while ((result.sizeAvailable() < query.neededResults()) && (anyWorkerAlive()) && (System.currentTimeMillis() < timeout)) { while ((result.sizeAvailable() < query.neededResults()) && (anyWorkerAlive()) && (System.currentTimeMillis() < timeout)) {
try {Thread.sleep(20);} catch (final InterruptedException e) {} try {Thread.sleep(20);} catch (final InterruptedException e) {}

@ -66,8 +66,8 @@ public final class SearchEvent {
private long eventTime; private long eventTime;
private QueryParams query; private QueryParams query;
private final yacySeedDB peers; private final yacySeedDB peers;
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container private RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher results; private ResultFetcher resultFetcher;
private final SecondarySearchSuperviser secondarySearchSuperviser; private final SecondarySearchSuperviser secondarySearchSuperviser;
@ -112,10 +112,10 @@ public final class SearchEvent {
// initialize a ranking process that is the target for data // initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads // that is generated concurrently from local and global search threads
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, fetchpeers + 1); this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
// start a local search concurrently // start a local search concurrently
this.rankedCache.start(); this.rankingProcess.start();
// start global searches // start global searches
final long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
@ -133,7 +133,7 @@ public final class SearchEvent {
query.getSegment(), query.getSegment(),
peers, peers,
crawlResults, crawlResults,
rankedCache, rankingProcess,
secondarySearchSuperviser, secondarySearchSuperviser,
fetchpeers, fetchpeers,
Switchboard.urlBlacklist, Switchboard.urlBlacklist,
@ -141,7 +141,7 @@ public final class SearchEvent {
query.constraint, query.constraint,
(query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes); (query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes);
if (this.primarySearchThreads != null) { if (this.primarySearchThreads != null) {
if (this.primarySearchThreads.length > fetchpeers) this.rankedCache.moreFeeders(this.primarySearchThreads.length - fetchpeers); this.rankingProcess.moreFeeders(this.primarySearchThreads.length);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false);
// finished searching // finished searching
Log.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); Log.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
@ -151,20 +151,20 @@ public final class SearchEvent {
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.results = new ResultFetcher(loader, rankedCache, query, peers, 3000); this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, peers, 3000);
} else { } else {
// do a local search // do a local search
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1); this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
if (generateAbstracts) { if (generateAbstracts) {
this.rankedCache.run(); // this is not started concurrently here on purpose! this.rankingProcess.run(); // this is not started concurrently here on purpose!
// compute index abstracts // compute index abstracts
final long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
int maxcount = -1; int maxcount = -1;
long mindhtdistance = Long.MAX_VALUE, l; long mindhtdistance = Long.MAX_VALUE, l;
byte[] wordhash; byte[] wordhash;
assert this.rankedCache.searchContainerMap() != null; assert this.rankingProcess.searchContainerMap() != null;
for (Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankedCache.searchContainerMap().entrySet()) { for (Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankingProcess.searchContainerMap().entrySet()) {
wordhash = entry.getKey(); wordhash = entry.getKey();
final ReferenceContainer<WordReference> container = entry.getValue(); final ReferenceContainer<WordReference> container = entry.getValue();
assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + new String(container.getTermHash()) + ", wordhash = " + new String(wordhash); assert (Base64Order.enhancedCoder.equal(container.getTermHash(), wordhash)) : "container.getTermHash() = " + new String(container.getTermHash()) + ", wordhash = " + new String(wordhash);
@ -181,13 +181,21 @@ public final class SearchEvent {
IACount.put(wordhash, Integer.valueOf(container.size())); IACount.put(wordhash, Integer.valueOf(container.size()));
IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString()); IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString());
} }
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
} else { } else {
this.rankedCache.start(); // start concurrently this.rankingProcess.start(); // start concurrently
// but give process time to accumulate a certain amount of data
// before a reading process wants to get results from it
for (int i = 0; i < 10; i++) {
if (!this.rankingProcess.isAlive()) break;
try {Thread.sleep(10);} catch (InterruptedException e) {}
}
// this will reduce the maximum waiting time until results are available to 100 milliseconds
// while we always get a good set of ranked data
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.results = new ResultFetcher(loader, rankedCache, query, peers, 300); this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, peers, 300);
} }
// clean up events // clean up events
@ -217,21 +225,25 @@ public final class SearchEvent {
public void setQuery(QueryParams query) { public void setQuery(QueryParams query) {
this.query = query; this.query = query;
this.results.query = query; this.resultFetcher.query = query;
} }
public void cleanup() { public void cleanup() {
// stop all threads // stop all threads
if (primarySearchThreads != null) { if (primarySearchThreads != null) {
for (yacySearch search : this.primarySearchThreads) { for (yacySearch search : this.primarySearchThreads) {
if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt(); if (search.isAlive()) search.interrupt();
} }
} }
}
if (secondarySearchThreads != null) { if (secondarySearchThreads != null) {
for (yacySearch search : this.secondarySearchThreads) { for (yacySearch search : this.secondarySearchThreads) {
if (search != null) synchronized (search) {
if (search.isAlive()) search.interrupt(); if (search.isAlive()) search.interrupt();
} }
} }
}
// clear all data structures // clear all data structures
if (this.preselectedPeerHashes != null) this.preselectedPeerHashes.clear(); if (this.preselectedPeerHashes != null) this.preselectedPeerHashes.clear();
@ -241,7 +253,7 @@ public final class SearchEvent {
if (this.heuristics != null) this.heuristics.clear(); if (this.heuristics != null) this.heuristics.clear();
// execute deletion of failed words // execute deletion of failed words
int rw = this.results.failedURLs.size(); int rw = this.resultFetcher.failedURLs.size();
if (rw > 0) { if (rw > 0) {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
final HandleSet removeWords = query.queryHashes; final HandleSet removeWords = query.queryHashes;
@ -254,7 +266,7 @@ public final class SearchEvent {
final Iterator<byte[]> j = removeWords.iterator(); final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words // remove the same url hashes for multiple words
while (j.hasNext()) { while (j.hasNext()) {
this.query.getSegment().termIndex().remove(j.next(), this.results.failedURLs); this.query.getSegment().termIndex().remove(j.next(), this.resultFetcher.failedURLs);
} }
} catch (IOException e) { } catch (IOException e) {
Log.logException(e); Log.logException(e);
@ -314,25 +326,25 @@ public final class SearchEvent {
} }
public RankingProcess getRankingResult() { public RankingProcess getRankingResult() {
return this.rankedCache; return this.rankingProcess;
} }
public ArrayList<Navigator.Item> getNamespaceNavigator(int maxentries) { public ArrayList<Navigator.Item> getNamespaceNavigator(int maxentries) {
return this.rankedCache.getNamespaceNavigator(maxentries); return this.rankingProcess.getNamespaceNavigator(maxentries);
} }
public List<Navigator.Item> getHostNavigator(int maxentries) { public List<Navigator.Item> getHostNavigator(int maxentries) {
return this.rankedCache.getHostNavigator(maxentries); return this.rankingProcess.getHostNavigator(maxentries);
} }
public List<Navigator.Item> getTopicNavigator(final int maxentries) { public List<Navigator.Item> getTopicNavigator(final int maxentries) {
// returns a set of words that are computed as toplist // returns a set of words that are computed as toplist
return this.rankedCache.getTopicNavigator(maxentries); return this.rankingProcess.getTopicNavigator(maxentries);
} }
public List<Navigator.Item> getAuthorNavigator(final int maxentries) { public List<Navigator.Item> getAuthorNavigator(final int maxentries) {
// returns a list of authors so far seen on result set // returns a list of authors so far seen on result set
return this.rankedCache.getAuthorNavigator(maxentries); return this.rankingProcess.getAuthorNavigator(maxentries);
} }
public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) { public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) {
@ -347,7 +359,7 @@ public final class SearchEvent {
} }
} }
public ResultEntry oneResult(final int item) { public ResultEntry oneResult(final int item, long timeout) {
if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ||
(query.domType == QueryParams.SEARCHDOM_CLUSTERALL)) { (query.domType == QueryParams.SEARCHDOM_CLUSTERALL)) {
// this is a search using remote search threads. Also the local // this is a search using remote search threads. Also the local
@ -358,7 +370,7 @@ public final class SearchEvent {
try {localSearchThread.join();} catch (InterruptedException e) {} try {localSearchThread.join();} catch (InterruptedException e) {}
} }
} }
return this.results.oneResult(item); return this.resultFetcher.oneResult(item, timeout);
} }
boolean secondarySearchStartet = false; boolean secondarySearchStartet = false;
@ -520,10 +532,10 @@ public final class SearchEvent {
if (words.length() == 0) continue; // ??? if (words.length() == 0) continue; // ???
assert words.length() >= 12 : "words = " + words; assert words.length() >= 12 : "words = " + words;
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words); //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls + " from words: " + words);
rankedCache.moreFeeders(1); rankingProcess.moreFeeders(1);
checkedPeers.add(peer); checkedPeers.add(peer);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls, query.getSegment(), peers, crawlResults, rankedCache, peer, Switchboard.urlBlacklist, words, urls, query.getSegment(), peers, crawlResults, rankingProcess, peer, Switchboard.urlBlacklist,
query.ranking, query.constraint, preselectedPeerHashes); query.ranking, query.constraint, preselectedPeerHashes);
} }
@ -532,7 +544,7 @@ public final class SearchEvent {
} }
public ResultFetcher result() { public ResultFetcher result() {
return this.results; return this.resultFetcher;
} }
} }

@ -830,7 +830,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
// checks for local/global IP range and local IP // checks for local/global IP range and local IP
public boolean isLocal() { public boolean isLocal() {
return this.protocol.equals("file") || this.protocol.equals("smb") || Domains.isLocal(this.host); return this.isFile() || this.isSMB() || Domains.isLocal(this.host);
} }
// language calculation // language calculation

@ -480,8 +480,8 @@ public class HTTPClient {
HttpEntityEnclosingRequest hrequest = (HttpEntityEnclosingRequest) httpUriRequest; HttpEntityEnclosingRequest hrequest = (HttpEntityEnclosingRequest) httpUriRequest;
HttpEntity entity = hrequest.getEntity(); HttpEntity entity = hrequest.getEntity();
assert entity != null; assert entity != null;
assert !entity.isChunked(); //assert !entity.isChunked();
assert entity.getContentLength() >= 0; //assert entity.getContentLength() >= 0;
assert !hrequest.expectContinue(); assert !hrequest.expectContinue();
} }
httpResponse = httpClient.execute(httpUriRequest, httpContext); httpResponse = httpClient.execute(httpUriRequest, httpContext);

@ -96,8 +96,8 @@ abstract class SimpleARC<K, V> extends AbstractMap<K, V> implements Map<K, V>, I
// move value from A to B; since it was already removed from A, just put it to B // move value from A to B; since it was already removed from A, just put it to B
//System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size()); //System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size());
this.levelB.put((K) s, v); this.levelB.put((K) s, v);
}
assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically
}
return v; return v;
} }

@ -38,9 +38,9 @@ import java.util.concurrent.TimeUnit;
public class WeakPriorityBlockingQueue<E> { public class WeakPriorityBlockingQueue<E> {
private final TreeSet<E> queue; // object within the stack, ordered using a TreeSet private final TreeSet<Element<E>> queue; // object within the stack, ordered using a TreeSet
private final Semaphore enqueued; // semaphore for elements in the stack private final Semaphore enqueued; // semaphore for elements in the stack
private final ArrayList<E> drained; // objects that had been on the stack but had been removed private final ArrayList<Element<E>> drained; // objects that had been on the stack but had been removed
protected int maxsize; protected int maxsize;
/** /**
@ -52,8 +52,8 @@ public class WeakPriorityBlockingQueue<E> {
public WeakPriorityBlockingQueue(final int maxsize) { public WeakPriorityBlockingQueue(final int maxsize) {
// the maxsize is the maximum number of entries in the stack // the maxsize is the maximum number of entries in the stack
// if this is set to -1, the size is unlimited // if this is set to -1, the size is unlimited
this.queue = new TreeSet<E>(); this.queue = new TreeSet<Element<E>>();
this.drained = new ArrayList<E>(); this.drained = new ArrayList<Element<E>>();
this.enqueued = new Semaphore(0); this.enqueued = new Semaphore(0);
this.maxsize = maxsize; this.maxsize = maxsize;
} }
@ -110,7 +110,7 @@ public class WeakPriorityBlockingQueue<E> {
* @param weight the weight of the element * @param weight the weight of the element
* @param remove - the rating of the element that shall be removed in case that the stack has an size overflow * @param remove - the rating of the element that shall be removed in case that the stack has an size overflow
*/ */
public synchronized void put(final E element) { public synchronized void put(final Element<E> element) {
// put the element on the stack // put the element on the stack
if (this.drained.contains(element)) return; if (this.drained.contains(element)) return;
if (this.queue.size() == this.maxsize) { if (this.queue.size() == this.maxsize) {
@ -127,7 +127,7 @@ public class WeakPriorityBlockingQueue<E> {
* return the element with the smallest weight and remove it from the stack * return the element with the smallest weight and remove it from the stack
* @return null if no element is on the queue or the head of the queue * @return null if no element is on the queue or the head of the queue
*/ */
public E poll() { public Element<E> poll() {
boolean a = this.enqueued.tryAcquire(); boolean a = this.enqueued.tryAcquire();
if (!a) return null; if (!a) return null;
synchronized (this) { synchronized (this) {
@ -142,7 +142,7 @@ public class WeakPriorityBlockingQueue<E> {
* @return the head element from the queue * @return the head element from the queue
* @throws InterruptedException * @throws InterruptedException
*/ */
public E poll(long timeout) throws InterruptedException { public Element<E> poll(long timeout) throws InterruptedException {
boolean a = (timeout <= 0) ? this.enqueued.tryAcquire() : this.enqueued.tryAcquire(timeout, TimeUnit.MILLISECONDS); boolean a = (timeout <= 0) ? this.enqueued.tryAcquire() : this.enqueued.tryAcquire(timeout, TimeUnit.MILLISECONDS);
if (!a) return null; if (!a) return null;
synchronized (this) { synchronized (this) {
@ -155,15 +155,15 @@ public class WeakPriorityBlockingQueue<E> {
* @return the head element from the queue * @return the head element from the queue
* @throws InterruptedException * @throws InterruptedException
*/ */
public E take() throws InterruptedException { public Element<E> take() throws InterruptedException {
this.enqueued.acquire(); this.enqueued.acquire();
synchronized (this) { synchronized (this) {
return takeUnsafe(); return takeUnsafe();
} }
} }
private E takeUnsafe() { private Element<E> takeUnsafe() {
final E element = this.queue.first(); final Element<E> element = this.queue.first();
assert element != null; assert element != null;
this.queue.remove(element); this.queue.remove(element);
this.drained.add(element); this.drained.add(element);
@ -176,7 +176,7 @@ public class WeakPriorityBlockingQueue<E> {
* return the element with the smallest weight, but do not remove it * return the element with the smallest weight, but do not remove it
* @return null if no element is on the queue or the head of the queue * @return null if no element is on the queue or the head of the queue
*/ */
public synchronized E peek() { public synchronized Element<E> peek() {
if (this.queue.isEmpty()) return null; if (this.queue.isEmpty()) return null;
return this.queue.first(); return this.queue.first();
} }
@ -192,14 +192,16 @@ public class WeakPriorityBlockingQueue<E> {
* @param position inside the drained queue * @param position inside the drained queue
* @return the element from the recorded position or null if that position is not available * @return the element from the recorded position or null if that position is not available
*/ */
public synchronized E element(final int position) { public Element<E> element(final int position) {
if (position < this.drained.size()) { if (position < this.drained.size()) {
return this.drained.get(position); return this.drained.get(position);
} }
synchronized (this) {
if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element
while (position >= this.drained.size()) this.poll(); while (position >= this.drained.size()) this.poll();
return this.drained.get(position); return this.drained.get(position);
} }
}
/** /**
* retrieve an element from the drained queue but wait until a timeout * retrieve an element from the drained queue but wait until a timeout
@ -210,12 +212,11 @@ public class WeakPriorityBlockingQueue<E> {
* @return the element from the recorded position or null if that position is not available within the timeout * @return the element from the recorded position or null if that position is not available within the timeout
* @throws InterruptedException * @throws InterruptedException
*/ */
public synchronized E element(final int position, long time) throws InterruptedException { public Element<E> element(final int position, long time) throws InterruptedException {
long timeout = System.currentTimeMillis() + time; long timeout = System.currentTimeMillis() + time;
if (position < this.drained.size()) { if (position < this.drained.size()) {
return this.drained.get(position); return this.drained.get(position);
} }
if (position >= this.queue.size() + this.drained.size()) return null; // we don't have that element
while (position >= this.drained.size()) { while (position >= this.drained.size()) {
long t = timeout - System.currentTimeMillis(); long t = timeout - System.currentTimeMillis();
if (t <= 0) break; if (t <= 0) break;
@ -232,7 +233,7 @@ public class WeakPriorityBlockingQueue<E> {
* @param count * @param count
* @return a list of elements in the stack * @return a list of elements in the stack
*/ */
public synchronized ArrayList<E> list(final int count) { public synchronized ArrayList<Element<E>> list(final int count) {
if (count < 0) { if (count < 0) {
return list(); return list();
} }
@ -245,7 +246,7 @@ public class WeakPriorityBlockingQueue<E> {
* return all entries as they would be retrievable with element() * return all entries as they would be retrievable with element()
* @return a list of all elements in the stack * @return a list of all elements in the stack
*/ */
public synchronized ArrayList<E> list() { public synchronized ArrayList<Element<E>> list() {
// shift all elements // shift all elements
while (!this.queue.isEmpty()) this.poll(); while (!this.queue.isEmpty()) this.poll();
return this.drained; return this.drained;
@ -255,13 +256,13 @@ public class WeakPriorityBlockingQueue<E> {
* iterate over all elements available. All elements that are still in the queue are drained to recorded positions * iterate over all elements available. All elements that are still in the queue are drained to recorded positions
* @return an iterator over all drained positions. * @return an iterator over all drained positions.
*/ */
public synchronized Iterator<E> iterator() { public synchronized Iterator<Element<E>> iterator() {
// shift all elements to the offstack // shift all elements to the offstack
while (!this.queue.isEmpty()) this.poll(); while (!this.queue.isEmpty()) this.poll();
return this.drained.iterator(); return this.drained.iterator();
} }
protected interface Element<E> { public interface Element<E> {
public long getWeight(); public long getWeight();
public E getElement(); public E getElement();
public boolean equals(Element<E> o); public boolean equals(Element<E> o);
@ -269,7 +270,7 @@ public class WeakPriorityBlockingQueue<E> {
public String toString(); public String toString();
} }
protected abstract static class AbstractElement<E> { protected abstract static class AbstractElement<E> implements Element<E> {
public long weight; public long weight;
public E element; public E element;
@ -299,7 +300,7 @@ public class WeakPriorityBlockingQueue<E> {
* natural ordering elements, can be used as container of objects <E> in the priority queue * natural ordering elements, can be used as container of objects <E> in the priority queue
* the elements with smallest ordering weights are first in the queue when elements are taken * the elements with smallest ordering weights are first in the queue when elements are taken
*/ */
public static class NaturalElement<E> extends AbstractElement<E> implements Comparable<NaturalElement<E>>, Comparator<NaturalElement<E>> { public static class NaturalElement<E> extends AbstractElement<E> implements Element<E>, Comparable<NaturalElement<E>>, Comparator<NaturalElement<E>> {
public NaturalElement(final E element, final long weight) { public NaturalElement(final E element, final long weight) {
this.element = element; this.element = element;
@ -321,13 +322,14 @@ public class WeakPriorityBlockingQueue<E> {
if (o1h < o2h) return -1; if (o1h < o2h) return -1;
return 0; return 0;
} }
} }
/** /**
* reverse ordering elements, can be used as container of objects <E> in the priority queue * reverse ordering elements, can be used as container of objects <E> in the priority queue
* the elements with highest ordering weights are first in the queue when elements are taken * the elements with highest ordering weights are first in the queue when elements are taken
*/ */
public static class ReverseElement<E> extends AbstractElement<E> implements Comparable<ReverseElement<E>>, Comparator<ReverseElement<E>> { public static class ReverseElement<E> extends AbstractElement<E> implements Element<E>, Comparable<ReverseElement<E>>, Comparator<ReverseElement<E>> {
public ReverseElement(final E element, final long weight) { public ReverseElement(final E element, final long weight) {
this.element = element; this.element = element;
@ -352,14 +354,26 @@ public class WeakPriorityBlockingQueue<E> {
} }
public static void main(String[] args) { public static void main(String[] args) {
WeakPriorityBlockingQueue<ReverseElement<String>> a = new WeakPriorityBlockingQueue<ReverseElement<String>>(3); final WeakPriorityBlockingQueue<String> a = new WeakPriorityBlockingQueue<String>(3);
//final Element<String> REVERSE_POISON = new ReverseElement<String>("", Long.MIN_VALUE);
new Thread(){
public void run() {
Element<String> e;
try {
while ((e = a.poll(1000)) != null) System.out.println("> " + e.toString());
} catch (InterruptedException e1) {
e1.printStackTrace();
}
}
}.start();
a.put(new ReverseElement<String>("abc", 1)); a.put(new ReverseElement<String>("abc", 1));
//a.poll(); //a.poll();
a.put(new ReverseElement<String>("abcx", 2)); a.put(new ReverseElement<String>("abcx", 2));
a.put(new ReverseElement<String>("6s_7dfZk4xvc", 3)); a.put(new ReverseElement<String>("6s_7dfZk4xvc", 3));
a.put(new ReverseElement<String>("6s_7dfZk4xvcx", 4)); a.put(new ReverseElement<String>("6s_7dfZk4xvcx", 4));
//a.put((Element<String>) REVERSE_POISON);
//a.poll(); //a.poll();
System.out.println("size = " + a.sizeAvailable()); System.out.println("size = " + a.sizeAvailable());
while (a.sizeQueue() > 0) System.out.println("> " + a.poll().toString()); //while (a.sizeQueue() > 0) System.out.println("> " + a.poll().toString());
} }
} }

@ -98,6 +98,11 @@ public interface Parser {
this.url = url; this.url = url;
} }
public Failure(final String message, final MultiProtocolURI url, Throwable e) {
super(message + "; url = " + url.toNormalform(true, false), e);
this.url = url;
}
public MultiProtocolURI getURL() { public MultiProtocolURI getURL() {
return this.url; return this.url;
} }

@ -58,7 +58,7 @@ public class rssParser extends AbstractParser implements Parser {
try { try {
rssReader = new RSSReader(RSSFeed.DEFAULT_MAXSIZE, source, RSSReader.Type.none); rssReader = new RSSReader(RSSFeed.DEFAULT_MAXSIZE, source, RSSReader.Type.none);
} catch (IOException e) { } catch (IOException e) {
throw new Parser.Failure("Load error:" + e.getMessage(), url); throw new Parser.Failure("Load error:" + e.getMessage(), url, e);
} }
RSSFeed feed = rssReader.getFeed(); RSSFeed feed = rssReader.getFeed();

@ -273,6 +273,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
// checks for local/global IP range and local IP // checks for local/global IP range and local IP
public final boolean isLocal() { public final boolean isLocal() {
if (this.isSMB() || this.isFile()) return true;
if (this.hash == null) { if (this.hash == null) {
if (super.isLocal()) return true; if (super.isLocal()) return true;
synchronized (this) { synchronized (this) {

@ -179,6 +179,7 @@ public final class LoaderDispatcher {
private Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException { private Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
// get the protocol of the next URL // get the protocol of the next URL
final DigestURI url = request.url(); final DigestURI url = request.url();
if (url.isFile() || url.isSMB()) cacheStrategy = CrawlProfile.CacheStrategy.NOCACHE; // load just from the file system
final String protocol = url.getProtocol(); final String protocol = url.getProtocol();
final String host = url.getHost(); final String host = url.getHost();

Loading…
Cancel
Save