fixed a problem with local search from solr results: now all results

from solr are shown (again)
pull/1/head
Michael Peter Christen 12 years ago
parent 02957d5982
commit c5f67a5d6d

@ -126,7 +126,7 @@ public class IndexControlRWIs_p {
if ( post.containsKey("keystringsearch") ) {
prop.put("keyhash", keyhash);
final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
if ( ranking.filteredCount() == 0 ) {
if ( ranking.rwiAvailableCount() == 0 ) {
prop.put("searchresult", 1);
prop.putHTML("searchresult_word", keystring);
}
@ -137,7 +137,7 @@ public class IndexControlRWIs_p {
prop.put("keystring", "<" + errmsg + ">");
}
final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
if ( ranking.filteredCount() == 0 ) {
if ( ranking.rwiAvailableCount() == 0 ) {
prop.put("searchresult", 2);
prop.putHTML("searchresult_wordhash", ASCII.String(keyhash));
}
@ -466,7 +466,7 @@ public class IndexControlRWIs_p {
final String keyhashs = ASCII.String(keyhash);
prop.put("genUrlList_keyHash", keyhashs);
if ( ranked.filteredCount() == 0 ) {
if ( ranked.rwiAvailableCount() == 0 ) {
prop.put("genUrlList", 1);
prop.put("genUrlList_count", 0);
prop.put("searchresult", 2);
@ -480,7 +480,7 @@ public class IndexControlRWIs_p {
URIMetadataNode entry;
String us;
long rn = -1;
while ( !ranked.isEmpty() && (entry = ranked.takeURL(false, 1000)) != null ) {
while ( !ranked.rwiIsEmpty() && (entry = ranked.takeURL(false, 1000)) != null ) {
url = entry.url();
if ( url == null ) {
continue;
@ -678,12 +678,12 @@ public class IndexControlRWIs_p {
final RWIProcess ranked = new RWIProcess(query, order, false);
ranked.run();
if ( ranked.filteredCount() == 0 ) {
if ( ranked.rwiAvailableCount() == 0 ) {
prop.put("searchresult", 2);
prop.put("searchresult_wordhash", keyhash);
} else {
prop.put("searchresult", 3);
prop.put("searchresult_allurl", ranked.filteredCount());
prop.put("searchresult_allurl", ranked.rwiAvailableCount());
prop
.put("searchresult_description", ranked.flagCount()[WordReferenceRow.flag_app_dc_description]);
prop.put("searchresult_title", ranked.flagCount()[WordReferenceRow.flag_app_dc_title]);

@ -323,7 +323,7 @@ public final class search {
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, count, maxtime, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
// set statistic details of search result and find best result index set
joincount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount();
joincount = theSearch.getRankingResult().rwiAvailableCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount();
prop.put("joincount", Integer.toString(joincount));
if (joincount != 0) {
accu = theSearch.result().completeResults(maxtime);

@ -832,7 +832,7 @@ public class yacysearch {
+ theQuery.queryString
+ " - "
+ "local-unfiltered("
+ theSearch.getRankingResult().getLocalIndexCount()
+ theSearch.getRankingResult().rwiAvailableCount()
+ "), "
+ "local_miss("
+ theSearch.getRankingResult().getMissCount()
@ -848,7 +848,7 @@ public class yacysearch {
// prepare search statistics
theQuery.resultcount =
theSearch.getRankingResult().getLocalIndexCount()
theSearch.getRankingResult().rwiAvailableCount()
- theSearch.getRankingResult().getMissCount()
- theSearch.getRankingResult().getSortOutCount()
+ theSearch.getRankingResult().getRemoteIndexCount();
@ -933,7 +933,7 @@ public class yacysearch {
}
final int indexcount =
theSearch.getRankingResult().getLocalIndexCount()
theSearch.getRankingResult().rwiAvailableCount()
- theSearch.getRankingResult().getMissCount()
- theSearch.getRankingResult().getSortOutCount()
+ theSearch.getRankingResult().getRemoteIndexCount();
@ -950,7 +950,7 @@ public class yacysearch {
: "0");
prop.put(
"num-results_globalresults_localResourceSize",
Formatter.number(theSearch.getRankingResult().getLocalIndexCount(), true));
Formatter.number(theSearch.getRankingResult().rwiAvailableCount(), true));
prop.put(
"num-results_globalresults_localMissCount",
Formatter.number(theSearch.getRankingResult().getMissCount(), true));

@ -90,13 +90,13 @@ public class yacysearchitem {
final QueryParams theQuery = theSearch.getQuery();
// dynamically update count values
final int totalcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int totalcount = theSearch.getRankingResult().rwiAvailableCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int offset = theQuery.neededResults() - theQuery.itemsPerPage() + 1;
prop.put("offset", offset);
prop.put("itemscount", Formatter.number(Math.min((item < 0) ? theQuery.neededResults() : item + 1, totalcount)));
prop.put("itemsperpage", Formatter.number(theQuery.itemsPerPage));
prop.put("totalcount", Formatter.number(totalcount, true));
prop.put("localResourceSize", Formatter.number(theSearch.getRankingResult().getLocalIndexCount(), true));
prop.put("localResourceSize", Formatter.number(theSearch.getRankingResult().rwiAvailableCount(), true));
prop.put("localMissCount", Formatter.number(theSearch.getRankingResult().getMissCount(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));

@ -33,13 +33,13 @@ public class yacysearchlatestinfo {
final QueryParams theQuery = theSearch.getQuery();
// dynamically update count values
final int totalcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int totalcount = theSearch.getRankingResult().rwiAvailableCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int offset = theQuery.neededResults() - theQuery.itemsPerPage() + 1;
prop.put("offset", offset);
prop.put("itemscount",Formatter.number(offset + theSearch.getQuery().itemsPerPage >= totalcount ? offset + totalcount % theSearch.getQuery().itemsPerPage - 1 : offset + theSearch.getQuery().itemsPerPage - 1));
prop.put("itemsperpage", theSearch.getQuery().itemsPerPage);
prop.put("totalcount", Formatter.number(totalcount, true));
prop.put("localResourceSize", Formatter.number(theSearch.getRankingResult().getLocalIndexCount(), true));
prop.put("localResourceSize", Formatter.number(theSearch.getRankingResult().rwiAvailableCount(), true));
prop.put("localMissCount", Formatter.number(theSearch.getRankingResult().getMissCount(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));

@ -338,7 +338,7 @@ public class yacysearchtrailer {
final String aboutBody = env.getConfig("about.body", "");
final String aboutHeadline = env.getConfig("about.headline", "");
if ((aboutBody.isEmpty() && aboutHeadline.isEmpty()) ||
theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount() == 0) {
theSearch.getRankingResult().rwiAvailableCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount() == 0) {
prop.put("nav-about", 0);
} else {
prop.put("nav-about", 1);
@ -349,7 +349,7 @@ public class yacysearchtrailer {
// category: location search
// show only if there is a location database present and if there had been any search results
if (LibraryProvider.geoLoc.isEmpty() ||
theSearch.getRankingResult().getLocalIndexCount() == 0) {
theSearch.getRankingResult().rwiAvailableCount() == 0) {
prop.put("cat-location", 0);
} else {
prop.put("cat-location", 1);
@ -357,7 +357,7 @@ public class yacysearchtrailer {
prop.put(fileType, "cat-location_queryenc", theQuery.queryString(true).replace(' ', '+'));
}
final int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int indexcount = theSearch.getRankingResult().rwiAvailableCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
prop.put("num-results_totalcount", indexcount);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEvent.Type.FINALIZATION, "bottomline", 0, 0), false);

@ -47,7 +47,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
private final TreeSet<Element<E>> queue; // object within the stack, ordered using a TreeSet
private final Semaphore enqueued; // semaphore for elements in the stack
private final ArrayList<Element<E>> drained; // objects that had been on the stack but had been removed
protected int maxsize;
private int maxsize;
/**
* create a new WeakPriorityBlockingQueue
@ -158,18 +158,6 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
}
}
/**
* Retrieves and removes the head of this queue, waiting if no elements are present on this queue.
* @return the head element from the queue
* @throws InterruptedException
*/
public Element<E> take() throws InterruptedException {
this.enqueued.acquire();
synchronized (this) {
return takeUnsafe();
}
}
private Element<E> takeUnsafe() {
final Element<E> element = this.queue.first();
assert element != null;
@ -265,7 +253,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
* return all entries as they would be retrievable with element()
* @return a list of all elements in the stack
*/
public synchronized ArrayList<Element<E>> list() {
private synchronized ArrayList<Element<E>> list() {
if (this.drained == null) return null;
// shift all elements
while (!this.queue.isEmpty()) this.poll();
@ -293,7 +281,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
public String toString();
}
protected abstract static class AbstractElement<E> implements Element<E>, Serializable {
private abstract static class AbstractElement<E> implements Element<E>, Serializable {
private static final long serialVersionUID = -7026597258248026566L;

@ -396,10 +396,12 @@ public final class FileUtils {
return mb;
}
private final static Pattern ps = Pattern.compile("\\\\");
private final static Pattern pn = Pattern.compile("\\n");
private final static Pattern pe = Pattern.compile("=");
private final static Pattern backslashbackslash = Pattern.compile("\\\\");
private final static Pattern unescaped_equal = Pattern.compile("=");
private final static Pattern escaped_equal = Pattern.compile("\\=", Pattern.LITERAL);
private final static Pattern escaped_newline = Pattern.compile("\\n", Pattern.LITERAL);
private final static Pattern escaped_backslash = Pattern.compile(Pattern.quote("\\"), Pattern.LITERAL);
public static void saveMap(final File file, final Map<String, String> props, final String comment) {
PrintWriter pw = null;
final File tf = new File(file.toString() + "." + (System.currentTimeMillis() % 1000));
@ -410,16 +412,16 @@ public final class FileUtils {
for ( final Map.Entry<String, String> entry : props.entrySet() ) {
key = entry.getKey();
if ( key != null ) {
key = ps.matcher(key).replaceAll("\\\\");
key = pn.matcher(key).replaceAll("\\n");
key = pe.matcher(key).replaceAll("\\=");
key = backslashbackslash.matcher(key).replaceAll("\\\\");
key = escaped_newline.matcher(key).replaceAll("\\n");
key = unescaped_equal.matcher(key).replaceAll("\\=");
}
if ( entry.getValue() == null ) {
value = "";
} else {
value = entry.getValue();
value = ps.matcher(value).replaceAll("\\\\");
value = pn.matcher(value).replaceAll("\\n");
value = backslashbackslash.matcher(value).replaceAll("\\\\");
value = escaped_newline.matcher(value).replaceAll("\\n");
}
pw.println(key + "=" + value);
}
@ -452,10 +454,6 @@ public final class FileUtils {
return table(new StringsIterator(br));
}
private final static Pattern escaped_equal = Pattern.compile("\\=", Pattern.LITERAL);
private final static Pattern escaped_newline = Pattern.compile("\\n", Pattern.LITERAL);
private final static Pattern escaped_backslash = Pattern.compile(Pattern.quote("\\"), Pattern.LITERAL);
public static ConcurrentHashMap<String, String> table(final Iterator<String> li) {
String line;
final ConcurrentHashMap<String, String> props = new ConcurrentHashMap<String, String>();

@ -870,7 +870,7 @@ public final class Protocol
Network.log.logInfo("remote search: peer " + target.getName() + " sent " + container.get(0).size() + "/" + result.joincount + " references");
}
public static class SearchResult {
private static class SearchResult {
public String version; // version : application version of responder
public String uptime; // uptime : uptime in seconds of responder
public String fwhop; // hops (depth) of forwards that had been performed to construct this result
@ -1060,7 +1060,9 @@ public final class Protocol
// evaluate result
List<URIMetadataNode> container = new ArrayList<URIMetadataNode>();
if (docList.size() > 0) {// create containers
if (docList.size() == 0) {
Network.log.logInfo("SEARCH (solr), returned 0 out of " + docList.getNumFound() + " documents from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())) + " query = " + solrQuery.toString()) ;
} else {// create containers
Network.log.logInfo("SEARCH (solr), returned " + docList.size() + " out of " + docList.getNumFound() + " documents from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))) ;
int term = count;
@ -1623,7 +1625,7 @@ public final class Protocol
return false;
}
public static final LinkedHashMap<String, ContentBody> basicRequestParts(
private static final LinkedHashMap<String, ContentBody> basicRequestParts(
final Switchboard sb,
final String targetHash,
final String salt) {
@ -1655,7 +1657,7 @@ public final class Protocol
return parts;
}
public static final LinkedHashMap<String, ContentBody> basicRequestParts(
private static final LinkedHashMap<String, ContentBody> basicRequestParts(
final String myHash,
final String targetHash,
final String networkName) {

@ -68,8 +68,8 @@ import net.yacy.search.Switchboard;
public class WebStructureGraph {
public static int maxref = 300; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia)
public static int maxhosts = 50000; // maximum number of hosts in web structure map
public static int maxref = 200; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia)
public static int maxhosts = 10000; // maximum number of hosts in web structure map
private final static Log log = new Log("WebStructureGraph");
@ -110,7 +110,8 @@ public class WebStructureGraph {
if ( loadedStructureB != null ) {
this.structure_old.putAll(loadedStructureB);
}
log.logInfo("loaded dump of " + loadedStructureB.size() + " entries from " + this.structureFile.toString());
// delete out-dated entries in case the structure is too big
if ( this.structure_old.size() > maxhosts ) {
// fill a set with last-modified - dates of the structure
@ -152,6 +153,11 @@ public class WebStructureGraph {
}
}
public void clear() {
this.structure_old.clear();
this.structure_new.clear();
}
public void generateCitationReference(final DigestURI url, final Document document) {
// generate citation reference
final Map<MultiProtocolURI, String> hl = document.getHyperlinks();
@ -527,6 +533,7 @@ public class WebStructureGraph {
if (hosthash == null || hosthash.length() != 6) return 0;
SortedMap<String, byte[]> tailMap;
int c = 0;
try {
synchronized ( this.structure_old ) {
tailMap = this.structure_old.tailMap(hosthash);
if ( !tailMap.isEmpty() ) {
@ -545,6 +552,9 @@ public class WebStructureGraph {
}
}
}
} catch (Throwable t) {
this.clear();
}
return c;
}
@ -803,6 +813,7 @@ public class WebStructureGraph {
+ " entries");
final long time = System.currentTimeMillis();
joinOldNew();
log.logInfo("dumping " + structure_old.size() + " entries to " + structureFile.toString());
if ( !this.structure_old.isEmpty() ) {
synchronized ( this.structure_old ) {
if ( !this.structure_old.isEmpty() ) {

@ -74,13 +74,6 @@ public final class QueryParams {
public enum Searchdom {
LOCAL, CLUSTER, GLOBAL;
public static Searchdom contentdomParser(final String dom) {
if ("local".equals(dom)) return LOCAL;
else if ("global".equals(dom)) return GLOBAL;
else if ("cluster".equals(dom)) return CLUSTER;
return LOCAL;
}
@Override
public String toString() {
if (this == LOCAL) return "local";
@ -92,15 +85,9 @@ public final class QueryParams {
private static final String ampersand = "&amp;";
public static enum FetchMode {
NO_FETCH_NO_VERIFY,
FETCH_BUT_ACCEPT_OFFLINE_OR_USE_CACHE,
FETCH_AND_VERIFY_ONLINE;
}
public static class Modifier {
String s;
public Modifier(final String modifier) {
private String s;
private Modifier(final String modifier) {
this.s = modifier;
}
public String getModifier() {
@ -111,44 +98,47 @@ public final class QueryParams {
public static final Bitfield empty_constraint = new Bitfield(4, "AAAAAA");
public static final Pattern catchall_pattern = Pattern.compile(".*");
public static final Pattern matchnothing_pattern = Pattern.compile("");
private static final Pattern matchnothing_pattern = Pattern.compile("");
public final String queryString;
public final HandleSet query_include_hashes, query_exclude_hashes, query_all_hashes;
public final Collection<String> query_include_words, query_exclude_words, query_all_words;
private final Collection<String> query_include_words, query_exclude_words, query_all_words;
public final int itemsPerPage;
public int offset;
public final Pattern urlMask, prefer;
public final boolean urlMask_isCatchall, prefer_isMatchnothing;
final boolean urlMask_isCatchall;
private final boolean prefer_isMatchnothing;
public final Classification.ContentDomain contentdom;
public final String targetlang;
public final Collection<Tagging.Metatag> metatags;
protected final Collection<Tagging.Metatag> metatags;
public final String navigators;
public final Searchdom domType;
public final int zonecode;
public final int domMaxTargets;
private final int zonecode;
private final int domMaxTargets;
public final int maxDistance;
public final Bitfield constraint;
public final boolean allofconstraint;
public CacheStrategy snippetCacheStrategy;
final boolean allofconstraint;
protected CacheStrategy snippetCacheStrategy;
public final RankingProfile ranking;
private final Segment indexSegment;
public final String host; // this is the client host that starts the query, not a site operator
public final String sitehash; // this is a domain hash, 6 bytes long or null
public final Set<String> siteexcludes; // set of domain hashes that are excluded if not included by sitehash
protected final Set<String> siteexcludes; // set of domain hashes that are excluded if not included by sitehash
public final String authorhash;
public final Modifier modifier;
public Seed remotepeer;
public final long starttime, maxtime, timeout; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds)
public final long starttime; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds)
protected final long maxtime;
protected final long timeout;
// values that are set after a search:
public int resultcount; // number of found results
public int transmitcount; // number of results that had been shown to the user
public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options
private boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options
public final String userAgent;
public boolean filterfailurls;
public double lat, lon, radius;
public String solrQueryString = null;
protected boolean filterfailurls;
protected double lat, lon, radius;
public QueryParams(
final String queryString,
@ -303,7 +293,7 @@ public final class QueryParams {
this.radius = Math.floor(radius * this.kmNormal + 1) / this.kmNormal;
}
double kmNormal = 100.d; // 100 =ca 40000.d / 360.d == 111.11 - if lat/lon is multiplied with this, rounded and diveded by this, the location is normalized to a 1km grid
private double kmNormal = 100.d; // 100 =ca 40000.d / 360.d == 111.11 - if lat/lon is multiplied with this, rounded and diveded by this, the location is normalized to a 1km grid
public Segment getSegment() {
return this.indexSegment;
@ -397,7 +387,7 @@ public final class QueryParams {
* @param text
* @return true if the query matches with the given text
*/
public final boolean matchesText(final String text) {
private final boolean matchesText(final String text) {
boolean ret = false;
final HandleSet wordhashes = Word.words2hashesHandles(Condenser.getWords(text, null).keySet());
if (!SetTools.anymatch(wordhashes, this.query_exclude_hashes)) {
@ -406,7 +396,7 @@ public final class QueryParams {
return ret;
}
public static final boolean anymatch(final String text, final HandleSet keyhashes) {
protected static final boolean anymatch(final String text, final HandleSet keyhashes) {
// returns true if any of the word hashes in keyhashes appear in the String text
// to do this, all words in the string must be recognized and transcoded to word hashes
if (keyhashes == null || keyhashes.isEmpty()) return false;
@ -469,12 +459,12 @@ public final class QueryParams {
return ret;
}
final static YaCySchema[] fields = new YaCySchema[]{
private final static YaCySchema[] fields = new YaCySchema[]{
YaCySchema.sku,YaCySchema.title,YaCySchema.h1_txt,YaCySchema.h2_txt,
YaCySchema.author,YaCySchema.description,YaCySchema.keywords,YaCySchema.text_t,YaCySchema.synonyms_sxt
};
final static Map<YaCySchema,Float> boosts = new LinkedHashMap<YaCySchema,Float>();
private final static Map<YaCySchema,Float> boosts = new LinkedHashMap<YaCySchema,Float>();
static {
boosts.put(YaCySchema.sku, 20.0f);
boosts.put(YaCySchema.title, 15.0f);
@ -485,19 +475,6 @@ public final class QueryParams {
boosts.put(YaCySchema.keywords, 2.0f);
boosts.put(YaCySchema.text_t, 1.0f);
}
/*
public static final String QT ="qt";
public static final String WT ="wt";
public static final String Q ="q";
public static final String START ="start";
public static final String ROWS ="rows";
public static final String XSL ="xsl";
public static final String VERSION ="version";
public static final String FL = "fl";
public static final String DF = "df";
*/
public SolrQuery solrQuery() {
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
@ -567,6 +544,7 @@ public final class QueryParams {
w.append(s);
wc++;
}
if (wc > 1) {w.insert(0, '('); w.append(')');}
// combine these queries for all relevant fields
wc = 0;

@ -100,8 +100,8 @@ public final class RWIProcess extends Thread
private int remote_peerCount;
private int local_indexCount;
private final AtomicInteger maxExpectedRemoteReferences, expectedRemoteReferences, receivedRemoteReferences;
private final WeakPriorityBlockingQueue<WordReferenceVars> stack;
private final WeakPriorityBlockingQueue<URIMetadataNode> nodeStack;
private final WeakPriorityBlockingQueue<WordReferenceVars> rwiStack;
protected final WeakPriorityBlockingQueue<URIMetadataNode> nodeStack;
private final AtomicInteger feedersAlive, feedersTerminated;
private final ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
//private final HandleSet handover; // key = urlhash; used for double-check of urls that had been handed over to search process
@ -129,7 +129,7 @@ public final class RWIProcess extends Thread
this.addRunning = true;
this.localSearchInclusion = null;
int stackMaxsize = query.snippetCacheStrategy == null || query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation;
this.stack = new WeakPriorityBlockingQueue<WordReferenceVars>(stackMaxsize, false);
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(stackMaxsize, false);
this.nodeStack = new WeakPriorityBlockingQueue<URIMetadataNode>(stackMaxsize, false);
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
this.query = query;
@ -176,11 +176,11 @@ public final class RWIProcess extends Thread
this.expectedRemoteReferences.addAndGet(x);
}
public boolean expectMoreRemoteReferences() {
protected boolean expectMoreRemoteReferences() {
return this.expectedRemoteReferences.get() > 0;
}
public long waitTimeRecommendation() {
protected long waitTimeRecommendation() {
return
this.maxExpectedRemoteReferences.get() == 0 ? 0 :
Math.min(maxWaitPerResult,
@ -528,7 +528,7 @@ public final class RWIProcess extends Thread
this.urlhashes.putUnique(iEntry.urlhash());
rankingtryloop: while (true) {
try {
this.stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
this.rwiStack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
break rankingtryloop;
} catch ( final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
@ -566,7 +566,7 @@ public final class RWIProcess extends Thread
this.feedersAlive.addAndGet(1);
}
public boolean feedingIsFinished() {
protected boolean feedingIsFinished() {
return
this.feedersTerminated.intValue() > (this.remote ? 1 : 0) &&
this.feedersAlive.get() == 0;// &&
@ -590,7 +590,7 @@ public final class RWIProcess extends Thread
return false;
}
public Map<byte[], ReferenceContainer<WordReference>> searchContainerMap() {
protected Map<byte[], ReferenceContainer<WordReference>> searchContainerMap() {
// direct access to the result maps is needed for abstract generation
// this is only available if execQuery() was called before
return this.localSearchInclusion;
@ -615,19 +615,20 @@ public final class RWIProcess extends Thread
}
// loop as long as we can expect that we should get more results
final long timeout = System.currentTimeMillis() + waitingtime;
while (((!feedingIsFinished() && this.addRunning) || this.stack.sizeQueue() > 0) &&
while (((!feedingIsFinished() && this.addRunning) || this.nodeStack.sizeQueue() > 0 || this.rwiStack.sizeQueue() > 0) &&
(this.query.itemsPerPage < 1 || loops++ < this.query.itemsPerPage || (loops > 1000 && !this.doubleDomCache.isEmpty()))) {
page = null;
rwi = null;
if ( waitingtime <= 0 ) {
page = this.addRunning ? this.nodeStack.poll(waitingtime) : this.nodeStack.poll();
if (page == null) rwi = this.addRunning ? this.stack.poll(waitingtime) : this.stack.poll();
if (page == null) rwi = this.addRunning ? this.rwiStack.poll(waitingtime) : this.rwiStack.poll();
} else {
timeoutloop: while ( System.currentTimeMillis() < timeout ) {
if (feedingIsFinished() && this.stack.sizeQueue() == 0) break timeoutloop;
//System.out.println("### RWIProcess feedingIsFinished() = " + feedingIsFinished() + ", this.nodeStack.sizeQueue() = " + this.nodeStack.sizeQueue());
if (feedingIsFinished() && this.rwiStack.sizeQueue() == 0 && this.nodeStack.sizeQueue() == 0) break timeoutloop;
page = this.nodeStack.poll(50);
if (page != null) break timeoutloop;
rwi = this.stack.poll(50);
rwi = this.rwiStack.poll(50);
if (rwi != null) break timeoutloop;
}
}
@ -658,7 +659,13 @@ public final class RWIProcess extends Thread
}
} catch ( final InterruptedException e1 ) {
}
if ( this.doubleDomCache.isEmpty() ) {
//Log.logWarning("RWIProcess", "feedingIsFinished() = " + feedingIsFinished());
//Log.logWarning("RWIProcess", "this.addRunning = " + this.addRunning);
//Log.logWarning("RWIProcess", "this.nodeStack.sizeQueue() = " + this.nodeStack.sizeQueue());
//Log.logWarning("RWIProcess", "this.stack.sizeQueue() = " + this.rwiStack.sizeQueue());
//Log.logWarning("RWIProcess", "this.doubleDomCachee.size() = " + this.doubleDomCache.size());
if (this.doubleDomCache.isEmpty()) {
Log.logWarning("RWIProcess", "doubleDomCache.isEmpty");
return null;
}
@ -684,7 +691,10 @@ public final class RWIProcess extends Thread
if (o == null) continue;
if (o.getWeight() < bestEntry.getWeight()) bestEntry = o;
}
if (bestEntry == null) return null;
if (bestEntry == null) {
Log.logWarning("RWIProcess", "bestEntry == null (1)");
return null;
}
// finally remove the best entry from the doubledom cache
m = this.doubleDomCache.get(bestEntry.getElement().hosthash());
@ -698,7 +708,10 @@ public final class RWIProcess extends Thread
}
}
}
if (bestEntry == null) return null;
if (bestEntry == null) {
Log.logWarning("RWIProcess", "bestEntry == null (2)");
return null;
}
return this.query.getSegment().fulltext().getMetadata(bestEntry.getElement(), bestEntry.getWeight());
}
@ -720,7 +733,10 @@ public final class RWIProcess extends Thread
while ( (timeleft = timeout - System.currentTimeMillis()) > 0 ) {
//System.out.println("timeleft = " + timeleft);
final URIMetadataNode page = takeRWI(skipDoubleDom, timeleft);
if (page == null) return null; // all time was already wasted in takeRWI to get another element
if (page == null) {
Log.logWarning("RWIProcess", "takeRWI returned null");
return null; // all time was already wasted in takeRWI to get another element
}
if ( !this.query.urlMask_isCatchall ) {
// check url mask
@ -751,8 +767,7 @@ public final class RWIProcess extends Thread
// content control
if (Switchboard.getSwitchboard().getConfigBool(
"contentcontrol.enabled", false) == true) {
if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false) == true) {
// check global network filter from bookmark list
if (!Switchboard.getSwitchboard()
@ -766,7 +781,6 @@ public final class RWIProcess extends Thread
continue;
}
}
}
}
@ -873,29 +887,16 @@ public final class RWIProcess extends Thread
// accept url
return page;
}
Log.logWarning("RWIProcess", "loop terminated");
return null;
}
final static Pattern SPACE_PATTERN = Pattern.compile(" ");
public int sizeQueue() {
int c = this.stack.sizeQueue();
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
c += s.sizeQueue();
}
return c;
}
public int sizeAvailable() {
int c = this.stack.sizeAvailable();
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
c += s.sizeAvailable();
}
return c;
public int[] flagCount() {
return this.flagcount;
}
public boolean isEmpty() {
if ( !this.stack.isEmpty() ) {
public boolean rwiIsEmpty() {
if ( !this.rwiStack.isEmpty() ) {
return false;
}
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
@ -906,20 +907,17 @@ public final class RWIProcess extends Thread
return true;
}
public int[] flagCount() {
return this.flagcount;
}
// "results from a total number of <remote_resourceSize + local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize> remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers."
public int filteredCount() {
// the number of index entries that are considered as result set
return this.stack.sizeAvailable();
protected int rwiQueueSize() {
int c = this.rwiStack.sizeQueue();
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
c += s.sizeQueue();
}
return c;
}
public int getLocalIndexCount() {
public int rwiAvailableCount() {
// the number of results in the local peer after filtering
return this.local_indexCount;
return this.rwiStack.sizeAvailable();
}
public int getRemoteIndexCount() {

@ -103,6 +103,7 @@ public final class SearchEvent {
private final SortedMap<byte[], HeuristicResult> heuristics;
private byte[] IAmaxcounthash, IAneardhthash;
private final ReferenceOrder order;
private final Thread localsearch;
protected SearchEvent(
final QueryParams query,
@ -150,7 +151,7 @@ public final class SearchEvent {
this.rankingProcess = new RWIProcess(this.query, this.order, remote);
// start a local solr search
RemoteSearch.solrRemoteSearch(this, 1000, null /*this peer*/, Switchboard.urlBlacklist);
this.localsearch = RemoteSearch.solrRemoteSearch(this, 100, null /*this peer*/, Switchboard.urlBlacklist);
// start a local RWI search concurrently
this.rankingProcess.start();
@ -476,6 +477,7 @@ public final class SearchEvent {
}
public ResultEntry oneResult(final int item, final long timeout) {
if (this.localsearch != null && this.localsearch.isAlive()) try {this.localsearch.join();} catch (InterruptedException e) {}
return this.resultFetcher.oneResult(item, timeout);
}

@ -158,7 +158,7 @@ public class SnippetProcess {
int thisRankingQueueSize, lastRankingQueueSize = 0;
if (item < 10) {
while (
((thisRankingQueueSize = this.rankingProcess.sizeQueue()) > 0 || !this.rankingProcess.feedingIsFinished()) &&
((thisRankingQueueSize = this.rankingProcess.rwiQueueSize()) > 0 || !this.rankingProcess.feedingIsFinished()) &&
(thisRankingQueueSize > lastRankingQueueSize || this.result.sizeAvailable() < item + 1) &&
System.currentTimeMillis() < waittimeout &&
anyWorkerAlive()
@ -180,9 +180,9 @@ public class SnippetProcess {
WeakPriorityBlockingQueue.Element<ResultEntry> entry = null;
while (System.currentTimeMillis() < finishTime) {
//Log.logInfo("SnippetProcess", "item = " + item + "; anyWorkerAlive=" + anyWorkerAlive() + "; this.rankingProcess.isAlive() = " + this.rankingProcess.isAlive() + "; this.rankingProcess.feedingIsFinished() = " + this.rankingProcess.feedingIsFinished() + "; this.result.sizeAvailable() = " + this.result.sizeAvailable() + ", this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue());
Log.logInfo("SnippetProcess", "item = " + item + "; anyWorkerAlive=" + anyWorkerAlive() + "; this.rankingProcess.isAlive() = " + this.rankingProcess.isAlive() + "; this.rankingProcess.feedingIsFinished() = " + this.rankingProcess.feedingIsFinished() + "; this.result.sizeAvailable() = " + this.result.sizeAvailable() + ", this.rankingProcess.sizeQueue() = " + this.rankingProcess.rwiQueueSize() + ", this.rankingProcess.nodeStack.sizeAvailable() = " + this.rankingProcess.nodeStack.sizeAvailable());
if (!anyWorkerAlive() && !this.rankingProcess.isAlive() && this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && this.rankingProcess.feedingIsFinished()) {
if (!anyWorkerAlive() && !this.rankingProcess.isAlive() && this.result.sizeAvailable() + this.rankingProcess.rwiQueueSize() + this.rankingProcess.nodeStack.sizeAvailable() <= item && this.rankingProcess.feedingIsFinished()) {
//Log.logInfo("SnippetProcess", "interrupted result fetching; item = " + item + "; this.result.sizeAvailable() = " + this.result.sizeAvailable() + ", this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue() + "; this.rankingProcess.feedingIsFinished() = " + this.rankingProcess.feedingIsFinished());
break; // the fail case
}
@ -194,9 +194,7 @@ public class SnippetProcess {
}
try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {break;}
if (entry != null) {
break;
}
if (entry != null) { break; }
}
// finally, if there is something, return the result
@ -346,7 +344,7 @@ public class SnippetProcess {
private void deployWorker(int deployCount, final int neededResults) {
if (this.cleanupState ||
(this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) ||
(this.rankingProcess.feedingIsFinished() && this.rankingProcess.rwiQueueSize() == 0 && this.rankingProcess.nodeStack.sizeAvailable() == 0) ||
this.result.sizeAvailable() >= neededResults) {
return;
}
@ -356,7 +354,7 @@ public class SnippetProcess {
synchronized(this.workerThreads) {try {
for (int i = 0; i < this.workerThreads.length; i++) {
if (this.result.sizeAvailable() >= neededResults ||
(this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0)) {
(this.rankingProcess.feedingIsFinished() && this.rankingProcess.rwiQueueSize() == 0) && this.rankingProcess.nodeStack.sizeAvailable() == 0) {
break;
}
worker = new Worker(this.query.maxtime, this.query.snippetCacheStrategy, neededResults);
@ -377,7 +375,7 @@ public class SnippetProcess {
for (int i = 0; i < this.workerThreads.length; i++) {
if (deployCount <= 0 ||
this.result.sizeAvailable() >= neededResults ||
(this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0)) {
(this.rankingProcess.feedingIsFinished() && this.rankingProcess.rwiQueueSize() == 0) && this.rankingProcess.nodeStack.sizeAvailable() == 0) {
break;
}
if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) {
@ -450,33 +448,32 @@ public class SnippetProcess {
//final int fetchAhead = snippetMode == 0 ? 0 : 10;
final boolean nav_topics = SnippetProcess.this.query.navigators.equals("all") || SnippetProcess.this.query.navigators.indexOf("topics",0) >= 0;
try {
//System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
while (this.shallrun && System.currentTimeMillis() < this.timeout) {
//Log.logInfo("SnippetProcess", "***** timeleft = " + (this.timeout - System.currentTimeMillis()));
this.lastLifeSign = System.currentTimeMillis();
if (MemoryControl.shortStatus()) {
Log.logWarning("SnippetProcess", "shortStatus");
break;
}
// check if we have enough; we stop only if we can fetch online; otherwise its better to run this to get better navigation
if ((this.cacheStrategy == null || this.cacheStrategy.isAllowedToFetchOnline()) && SnippetProcess.this.result.sizeAvailable() >= this.neededResults) {
//Log.logWarning("ResultFetcher", SnippetProcess.this.result.sizeAvailable() + " = result.sizeAvailable() >= this.neededResults = " + this.neededResults);
Log.logWarning("SnippetProcess", SnippetProcess.this.result.sizeAvailable() + " = result.sizeAvailable() >= this.neededResults = " + this.neededResults);
break;
}
// check if we can succeed if we try to take another url
if (SnippetProcess.this.rankingProcess.feedingIsFinished() && SnippetProcess.this.rankingProcess.sizeQueue() == 0) {
//Log.logWarning("ResultFetcher", "rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0");
if (SnippetProcess.this.rankingProcess.feedingIsFinished() && SnippetProcess.this.rankingProcess.rwiQueueSize() == 0 && SnippetProcess.this.rankingProcess.nodeStack.sizeAvailable() == 0) {
Log.logWarning("SnippetProcess", "rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0");
break;
}
// get next entry
page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(500, Math.max(20, this.timeout - System.currentTimeMillis())));
//if (page != null) Log.logInfo("ResultFetcher", "got one page: " + page.metadata().url().toNormalform(true, false));
//if (page != null) Log.logInfo("SnippetProcess", "got one page: " + page.metadata().url().toNormalform(true, false));
//if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
if (page == null) {
//Log.logWarning("ResultFetcher", "page == null");
Log.logWarning("SnippetProcess", "page == null");
break; // no more available
}
@ -489,12 +486,8 @@ public class SnippetProcess {
String solrContent = page.getText();
resultEntry = fetchSnippet(page, solrContent, this.cacheStrategy); // does not fetch snippets if snippetMode == 0
if (resultEntry == null)
{
if (resultEntry == null) {
continue; // the entry had some problems, cannot be used
//final String rawLine = resultEntry.textSnippet() == null ? null : resultEntry.textSnippet().getLineRaw();
//System.out.println("***SNIPPET*** raw='" + rawLine + "', pattern='" + this.snippetPattern.toString() + "'");
//if (rawLine != null && !this.snippetPattern.matcher(rawLine).matches()) continue;
}
//if (result.contains(resultEntry)) continue;

Loading…
Cancel
Save