some refactoring to get the LoaderDispatcher a little bit more independent from the switchboard

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6755 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 36bd843ece
commit bfb518cd47

@ -278,7 +278,7 @@ public final class search {
RSSFeed.channels(RSSFeed.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));
// make event
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, null, true);
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, null, true, sb.loader);
// set statistic details of search result and find best result index set
if (theSearch.getRankingResult().getLocalIndexCount() == 0) {

@ -495,7 +495,7 @@ public class yacysearch {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0;
}
final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false);
final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader);
try {Thread.sleep(100);} catch (InterruptedException e1) {} // wait a little time to get first results in the search
// generate result object

@ -43,6 +43,7 @@ import net.yacy.kelondro.util.EventTracker;
import net.yacy.kelondro.util.SetTools;
import net.yacy.kelondro.util.SortStack;
import net.yacy.kelondro.util.SortStore;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.search.MediaSnippet;
import de.anomic.yacy.yacySeedDB;
@ -56,6 +57,7 @@ public class ResultFetcher {
private final yacySeedDB peers;
// result values
protected final LoaderDispatcher loader;
protected Worker[] workerThreads;
protected final SortStore<ResultEntry> result;
protected final SortStore<MediaSnippet> images; // container to sort images by size
@ -67,11 +69,13 @@ public class ResultFetcher {
@SuppressWarnings("unchecked")
public ResultFetcher(
final LoaderDispatcher loader,
RankingProcess rankedCache,
final QueryParams query,
final yacySeedDB peers,
final int taketimeout) {
this.loader = loader;
this.rankedCache = rankedCache;
this.query = query;
this.peers = peers;
@ -211,7 +215,15 @@ public class ResultFetcher {
if (query.contentdom == ContentDomain.TEXT) {
// attach text snippet
startTime = System.currentTimeMillis();
final TextSnippet snippet = TextSnippet.retrieveTextSnippet(metadata, snippetFetchWordHashes, (snippetMode == 2), ((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))), 180, (snippetMode == 2) ? Integer.MAX_VALUE : 30000, query.isGlobal());
final TextSnippet snippet = TextSnippet.retrieveTextSnippet(
this.loader,
metadata,
snippetFetchWordHashes,
(snippetMode == 2),
((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))),
180,
(snippetMode == 2) ? Integer.MAX_VALUE : 30000,
query.isGlobal());
final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH_EVENT", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));

@ -42,6 +42,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.EventTracker;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.SetTools;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.ResultURLs;
import de.anomic.yacy.yacySearch;
@ -85,7 +86,8 @@ public final class SearchEvent {
final yacySeedDB peers,
final ResultURLs crawlResults,
final TreeMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts) {
final boolean generateAbstracts,
final LoaderDispatcher loader) {
this.eventTime = System.currentTimeMillis(); // for lifetime check
this.peers = peers;
this.crawlResults = crawlResults;
@ -148,7 +150,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.results = new ResultFetcher(rankedCache, query, peers, 10000);
this.results = new ResultFetcher(loader, rankedCache, query, peers, 10000);
} else {
// do a local search
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2);
@ -183,7 +185,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.results = new ResultFetcher(rankedCache, query, peers, 300);
this.results = new ResultFetcher(loader, rankedCache, query, peers, 300);
}
// clean up events

@ -30,6 +30,8 @@ import java.util.Iterator;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.ResultURLs;
import de.anomic.yacy.yacySeedDB;
@ -69,7 +71,8 @@ public class SearchEventCache {
final yacySeedDB peers,
final ResultURLs crawlResults,
final TreeMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts) {
final boolean generateAbstracts,
final LoaderDispatcher loader) {
String id = query.id(false);
SearchEvent event = SearchEventCache.lastEvents.get(id);
@ -89,7 +92,7 @@ public class SearchEventCache {
}
if (event == null) {
// start a new event
event = new SearchEvent(query, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
event = new SearchEvent(query, peers, crawlResults, preselectedPeerHashes, generateAbstracts, loader);
}
return event;

@ -307,7 +307,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
}
@SuppressWarnings("unchecked")
public static TextSnippet retrieveTextSnippet(final URIMetadataRow.Components comp, final TreeSet<byte[]> queryhashes, final boolean fetchOnline, final boolean pre, final int snippetMaxLength, final int maxDocLen, final boolean reindexing) {
public static TextSnippet retrieveTextSnippet(final LoaderDispatcher loader, final URIMetadataRow.Components comp, final TreeSet<byte[]> queryhashes, final boolean fetchOnline, final boolean pre, final int snippetMaxLength, final int maxDocLen, final boolean reindexing) {
// heise = "0OQUNU3JSs05"
final DigestURI url = comp.url();
if (queryhashes.isEmpty()) {
@ -324,6 +324,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
return new TextSnippet(url, line, source, null, null, faviconCache.get(url.hash()));
}
/* ===========================================================================
* LOADING RESOURCE DATA
* =========================================================================== */
@ -346,6 +347,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
// try to create the snippet from information given in the subject metadata
return new TextSnippet(url, loc, SOURCE_METADATA, null, null, faviconCache.get(url.hash()));
} else {
// trying to load the resource from the cache
resContent = Cache.getContent(url);
responseHeader = Cache.getResponseHeader(url);
@ -356,9 +358,9 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
// if not found try to download it
// download resource using the crawler and keep resource in memory if possible
final Response entry = Switchboard.getSwitchboard().loader.load(url, true, reindexing);
final Response entry = loader.load(url, true, reindexing);
// getting resource metadata (e.g. the http headers for http resources)
// get resource metadata (e.g. the http headers for http resources)
if (entry != null) {
// place entry on indexing queue
Switchboard.getSwitchboard().toIndexer(entry);

@ -102,6 +102,15 @@ public final class LoaderDispatcher {
return load(request(url, forText, global), forText);
}
/**
* load a resource from the web, from ftp, from smb or a file
* @param url
* @param forText
* @param global
* @param cacheStratgy strategy according to CACHE_STRATEGY_NOCACHE,CACHE_STRATEGY_IFFRESH,CACHE_STRATEGY_IFEXIST,CACHE_STRATEGY_CACHEONLY
* @return the loaded entity in a Response object
* @throws IOException
*/
public Response load(
final DigestURI url,
final boolean forText,
@ -112,7 +121,7 @@ public final class LoaderDispatcher {
public void load(final DigestURI url, int cacheStratgy, File targetFile) throws IOException {
byte[] b = load(url, cacheStratgy);
byte[] b = load(request(url, false, true), false, cacheStratgy).getContent();
if (b == null) throw new IOException("load == null");
File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
@ -123,11 +132,6 @@ public final class LoaderDispatcher {
tmp.renameTo(targetFile);
}
public byte[] load(final DigestURI url, int cacheStratgy) throws IOException {
Response response = load(request(url, false, true), false, cacheStratgy);
return response.getContent();
}
/**
* generate a request object
* @param url the target url
@ -162,7 +166,7 @@ public final class LoaderDispatcher {
public Response load(final Request request, final boolean acceptOnlyParseable) throws IOException {
CrawlProfile.entry crawlProfile = sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle());
int cacheStrategy = CrawlProfile.CACHE_STRATEGY_IFFRESH;
int cacheStrategy = CrawlProfile.CACHE_STRATEGY_IFEXIST;
if (crawlProfile != null) cacheStrategy = crawlProfile.cacheStrategy();
return load(request, acceptOnlyParseable, cacheStrategy);
}

Loading…
Cancel
Save