modified auto-delete of search failure-words:

- words are now not deleted from the search index automatically if index receive is switched off
- a flag in the network definition defines if this feature is switched on at all
- the search filter for not-found word references is switched off for server-side remote searches

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7441 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 58346b9a76
commit efb4ca8fa8

@ -15,6 +15,7 @@ network.unit.dht = true
network.unit.dhtredundancy.junior = 1
network.unit.dhtredundancy.senior = 3
network.unit.dht.partitionExponent = 4
network.unit.inspection.searchverify = true
network.unit.remotecrawl.speed = 300
network.unit.bootstrap.seedlist0 = http://www.yacy.net/seed.txt
network.unit.bootstrap.seedlist1 = http://home.arcor.de/hermens/yacy/seed.txt

@ -14,6 +14,7 @@ network.unit.dht = false
network.unit.dhtredundancy.junior = 1
network.unit.dhtredundancy.senior = 1
network.unit.dht.partitionExponent = 0
network.unit.inspection.searchverify = false
network.unit.remotecrawl.speed = 600
# each network may use different yacy distributions.

@ -11,6 +11,7 @@ network.unit.dht = false
network.unit.dhtredundancy.junior = 1
network.unit.dhtredundancy.senior = 1
network.unit.dht.partitionExponent = 0
network.unit.inspection.searchverify = false
network.unit.remotecrawl.speed = 1
# each network may use different yacy distributions.

@ -232,7 +232,8 @@ public final class search {
false,
indexSegment,
rankingProfile,
header.get(RequestHeader.USER_AGENT, "")
header.get(RequestHeader.USER_AGENT, ""),
false
);
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
@ -286,7 +287,8 @@ public final class search {
false,
sb.indexSegments.segment(Segments.Process.PUBLIC),
rankingProfile,
header.get(RequestHeader.USER_AGENT, "")
header.get(RequestHeader.USER_AGENT, ""),
false
);
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
yacyChannel.channels(yacyChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));

@ -499,7 +499,8 @@ public class yacysearch {
authenticated,
indexSegment,
ranking,
header.get(RequestHeader.USER_AGENT, ""));
header.get(RequestHeader.USER_AGENT, ""),
sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false) && sb.peers.mySeed().getFlagAcceptRemoteIndex());
EventTracker.delete(EventTracker.EClass.SEARCH);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.Type.INITIALIZATION, "", 0, 0), false);

@ -103,6 +103,7 @@ public final class QueryParams {
public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options
public final String userAgent;
public boolean filterfailurls;
public QueryParams(final String queryString,
final int itemsPerPage,
@ -154,6 +155,7 @@ public final class QueryParams {
this.indexSegment = indexSegment;
this.userAgent = userAgent;
this.transmitcount = 0;
this.filterfailurls = false;
}
public QueryParams(
@ -175,7 +177,8 @@ public final class QueryParams {
final boolean specialRights,
final Segment indexSegment,
final RankingProfile ranking,
final String userAgent) {
final String userAgent,
final boolean filterfailurls) {
this.queryString = queryString;
this.queryHashes = queryHashes;
@ -209,6 +212,7 @@ public final class QueryParams {
this.indexSegment = indexSegment;
this.userAgent = userAgent;
this.transmitcount = 0;
this.filterfailurls = filterfailurls;
}
public Segment getSegment() {

@ -333,7 +333,7 @@ public class ResultFetcher {
//System.out.println("page == null");
break; // no more available
}
if (workTables.failURLsContains(page.hash())) continue;
if (query.filterfailurls && workTables.failURLsContains(page.hash())) continue;
loops++;
final ResultEntry resultEntry = fetchSnippet(page, cacheStrategy); // does not fetch snippets if snippetMode == 0

@ -1656,14 +1656,16 @@ public final class Switchboard extends serverSwitch {
this.clusterhashes = this.peers.clusterHashes(getConfig("cluster.peers.yacydomain", ""));
// check if we are reachable and try to map port again if not (e.g. when router rebooted)
if(getConfigBool(SwitchboardConstants.UPNP_ENABLED, false) && sb.peers.mySeed().isJunior())
if (getConfigBool(SwitchboardConstants.UPNP_ENABLED, false) && sb.peers.mySeed().isJunior())
UPnP.addPortMapping();
// after all clean up is done, check the resource usage
observer.resourceObserverJob();
// cleanup cached search failures
this.tables.cleanFailURLS(this.getConfigLong("cleanup.failedSearchURLtimeout", -1));
if (getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false) && peers.mySeed().getFlagAcceptRemoteIndex()) {
this.tables.cleanFailURLS(this.getConfigLong("cleanup.failedSearchURLtimeout", -1));
}
return true;
} catch (final InterruptedException e) {

@ -389,6 +389,8 @@ public final class SwitchboardConstants {
public static final String NETWORK_WHITELIST = "network.unit.access.whitelist";
public static final String NETWORK_BLACKLIST = "network.unit.access.blacklist";
public static final String NETWORK_SEARCHVERIFY = "network.unit.inspection.searchverify";
/**
* appearance
*/

Loading…
Cancel
Save