search process enhancements

pull/1/head
Michael Peter Christen 12 years ago
parent 02020b590b
commit 84f82541e8

@ -107,7 +107,7 @@ public class yacysearchtrailer {
}
// host navigators
final ScoreMap<String> hostNavigator = theSearch.rankingProcess.getHostNavigator();
final ScoreMap<String> hostNavigator = theSearch.hostNavigator;
if (hostNavigator == null || hostNavigator.isEmpty()) {
prop.put("nav-domains", 0);
} else {

@ -1970,6 +1970,7 @@ public class FTPClient {
// protocoll socket commands
private void send(final String buf) throws IOException {
if (this.clientOutput == null) return;
byte[] b = buf.getBytes("UTF-8");
this.clientOutput.write(b, 0, b.length);
this.clientOutput.write('\r');

@ -230,7 +230,7 @@ public final class LoaderDispatcher {
}
// now the cacheStrategy must be CACHE_STRATEGY_IFFRESH, that means we should do a proxy freshness test
assert cacheStrategy == CacheStrategy.IFFRESH : "cacheStrategy = " + cacheStrategy;
//assert cacheStrategy == CacheStrategy.IFFRESH : "cacheStrategy = " + cacheStrategy;
if (response.isFreshForProxy()) {
final byte[] content = Cache.getContent(url.hash());
if (content != null) {

@ -463,7 +463,7 @@ public final class QueryParams {
while ((p = urlMaskPattern.indexOf(':')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1);
while ((p = urlMaskPattern.indexOf('/')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1);
while ((p = urlMaskPattern.indexOf('\\')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 2);
//fq.append(" AND ").append(YaCySchema.sku.getSolrFieldName() + ":/" + urlMaskPattern + "/");
fq.append(" AND ").append(YaCySchema.sku.getSolrFieldName() + ":/" + urlMaskPattern + "/");
}
if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) {

@ -82,7 +82,6 @@ public final class RankingProcess extends Thread {
protected final AtomicInteger receivedRemoteReferences;
protected final ReferenceOrder order;
protected final HandleSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
protected final ScoreMap<String> hostNavigator = new ConcurrentScoreMap<String>(); // a counter for the appearance of host names
private final Map<String, String> taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris
private boolean remote;
@ -361,11 +360,7 @@ public final class RankingProcess extends Thread {
// this is only available if execQuery() was called before
return this.localSearchInclusion;
}
public ScoreMap<String> getHostNavigator() {
return this.hostNavigator;
}
public ScoreMap<String> getTopicNavigator(final int count) {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
@ -420,14 +415,10 @@ public final class RankingProcess extends Thread {
protected void addTopics(final ResultEntry resultEntry) {
// take out relevant information for reference computation
if ( (resultEntry.url() == null) || (resultEntry.title() == null) ) {
return;
}
//final String[] urlcomps = htmlFilterContentScraper.urlComps(resultEntry.url().toNormalform(true, true)); // word components of the url
if ((resultEntry.url() == null) || (resultEntry.title() == null)) return;
final String[] descrcomps = MultiProtocolURI.splitpattern.split(resultEntry.title().toLowerCase()); // words in the description
// add references
//addTopic(urlcomps);
addTopic(descrcomps);
}

@ -62,7 +62,6 @@ import net.yacy.document.LargeNumberCache;
import net.yacy.document.LibraryProvider;
import net.yacy.document.TextParser;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory;
import net.yacy.kelondro.data.word.WordReferenceVars;
@ -113,6 +112,7 @@ public final class SearchEvent {
private byte[] IAmaxcounthash, IAneardhthash;
private final Thread localsearch;
private final AtomicInteger expectedRemoteReferences, maxExpectedRemoteReferences; // counter for referenced that had been sorted out for other reasons
public final ScoreMap<String> hostNavigator; // a counter for the appearance of host names
public final ScoreMap<String> authorNavigator; // a counter for the appearances of authors
public final ScoreMap<String> namespaceNavigator; // a counter for name spaces
public final ScoreMap<String> protocolNavigator; // a counter for protocol types
@ -164,6 +164,7 @@ public final class SearchEvent {
} else {
this.namespaceNavigator = null;
}
this.hostNavigator = new ConcurrentScoreMap<String>();
this.protocolNavigator = new ConcurrentScoreMap<String>();
this.filetypeNavigator = new ConcurrentScoreMap<String>();
this.vocabularyNavigator = new ConcurrentHashMap<String, ScoreMap<String>>();
@ -478,7 +479,7 @@ public final class SearchEvent {
// collect navigation information
ReversibleScoreMap<String> fcts = facets.get(YaCySchema.host_s.getSolrFieldName());
if (fcts != null) this.rankingProcess.hostNavigator.inc(fcts);
if (fcts != null) this.hostNavigator.inc(fcts);
if (this.filetypeNavigator != null) {
fcts = facets.get(YaCySchema.url_file_ext_s.getSolrFieldName());
@ -564,38 +565,6 @@ public final class SearchEvent {
if (!hosthash.equals(this.query.nav_sitehash)) continue pollloop;
}
// check vocabulary constraint
/*
String subject = YaCyMetadata.hashURI(iEntry.hash());
Resource resource = JenaTripleStore.getResource(subject);
if (this.query.metatags != null && !this.query.metatags.isEmpty()) {
// all metatags must appear in the tags list
for (Tagging.Metatag metatag: this.query.metatags) {
Iterator<RDFNode> ni = JenaTripleStore.getObjects(resource, metatag.getPredicate());
if (!ni.hasNext()) continue pollloop;
String tags = ni.next().toString();
if (tags.indexOf(metatag.getObject()) < 0) continue pollloop;
}
}
*/
// add navigators using the triplestore
/*
for (Map.Entry<String, String> v: this.rankingProcess.taggingPredicates.entrySet()) {
Iterator<RDFNode> ni = JenaTripleStore.getObjects(resource, v.getValue());
while (ni.hasNext()) {
String[] tags = CommonPattern.COMMA.split(ni.next().toString());
for (String tag: tags) {
ScoreMap<String> voc = this.rankingProcess.vocabularyNavigator.get(v.getKey());
if (voc == null) {
voc = new ConcurrentScoreMap<String>();
this.rankingProcess.vocabularyNavigator.put(v.getKey(), voc);
}
voc.inc(tag);
}
}
}
*/
// finally extend the double-check and insert result to stack
this.rankingProcess.urlhashes.putUnique(iEntry.hash());
rankingtryloop: while (true) {
@ -894,7 +863,12 @@ public final class SearchEvent {
deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), this.query.neededResults());
}
// wait until local data is there
while (this.localsearch != null && this.localsearch.isAlive() && this.result.sizeAvailable() < item) try {this.localsearch.join(10);} catch (InterruptedException e) {}
while (this.localsearch != null && this.localsearch.isAlive() && this.result.sizeAvailable() < item) try {
if (!anyWorkerAlive()) {
deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), this.query.neededResults());
}
this.localsearch.join(10);
} catch (InterruptedException e) {}
// check if we already retrieved this item
// (happens if a search pages is accessed a second time)
final long finishTime = System.currentTimeMillis() + timeout;

Loading…
Cancel
Save