fixed several search bugs

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7180 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 937dd956d3
commit 84a023cbc8

@ -92,7 +92,7 @@ public class ViewImage {
if (url != null) try {
resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CrawlProfile.CacheStrategy.IFEXIST);
} catch (IOException e) {
Log.logWarning("ViewImage", "cannot load: " + e.getMessage());
Log.logFine("ViewImage", "cannot load: " + e.getMessage());
}
byte[] imgb = null;
if (resourceb == null) {

@ -134,7 +134,7 @@ document.getElementById("Enter").value = "search again";
<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
#(/excluded)#
<!-- type the number of results -->
<!-- type the number of results and navigation bar -->
#(num-results)#
::
<p>No Results.</p>

@ -97,7 +97,7 @@ public class yacysearch {
// get query
String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim();
String querystring = originalquerystring.replace('+', ' ');
CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFEXIST : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly"));
CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFFRESH : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly"));
if (snippetFetchStrategy == null) snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
final serverObjects prop = new serverObjects();
@ -237,7 +237,7 @@ public class yacysearch {
Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
} else if (Domains.matchesList(client, sb.networkWhitelist)) {
Log.logInfo("LOCAL_SEARCH", "ACCECC CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions");
} else if (global || snippetFetchStrategy.isAllowedToFetchOnline()) {
} else if (!authenticated && (global || snippetFetchStrategy.isAllowedToFetchOnline())) {
// in case that we do a global search or we want to fetch snippets, we check for DoS cases
synchronized (trackerHandles) {
int accInOneSecond = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000)).size();

@ -222,10 +222,6 @@ public final class QueryParams {
return this.contentdom.toString();
}
public boolean isGlobal() {
return this.domType != SEARCHDOM_LOCAL;
}
public boolean isLocal() {
return this.domType == SEARCHDOM_LOCAL;
}
@ -418,8 +414,8 @@ public final class QueryParams {
context.append('-');
context.append(hashSet2hashString(this.excludeHashes));
}
context.append(asterisk);
context.append(this.domType);
//context.append(asterisk);
//context.append(this.domType);
context.append(asterisk);
context.append(this.contentdom);
context.append(asterisk);

@ -347,9 +347,9 @@ public final class RankingProcess extends Thread {
* @param timeout the time this method may take for a result computation
* @return a metadata entry for a url
*/
public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) {
public URIMetadataRow takeURL(final boolean skipDoubleDom, final long timeout) {
// returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + timeout;
long timeLimit = System.currentTimeMillis() + Math.max(10, timeout);
int p = -1;
byte[] urlhash;
long timeleft;

@ -164,12 +164,12 @@ public class ResultFetcher {
if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break;
// get next entry
page = rankedCache.takeURL(true, taketimeout);
page = rankedCache.takeURL(true, this.timeout - System.currentTimeMillis());
//if (page == null) page = rankedCache.takeURL(false, taketimeout);
if (page == null) break;
if (failedURLs.has(page.hash())) continue;
final ResultEntry resultEntry = fetchSnippet(page, query.host == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
if (resultEntry == null) continue; // the entry had some problems, cannot be used
//if (result.contains(resultEntry)) continue;
@ -228,7 +228,7 @@ public class ResultFetcher {
((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))),
180,
Integer.MAX_VALUE,
query.isGlobal());
!query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
@ -247,7 +247,7 @@ public class ResultFetcher {
} else {
// attach media information
startTime = System.currentTimeMillis();
final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, query.isGlobal());
final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime);

@ -155,9 +155,9 @@ public final class SearchEvent {
} else {
// do a local search
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1);
this.rankedCache.run(); // this is not started concurrently here on purpose!
if (generateAbstracts) {
this.rankedCache.run(); // this is not started concurrently here on purpose!
// compute index abstracts
final long timer = System.currentTimeMillis();
int maxcount = -1;
@ -182,6 +182,8 @@ public final class SearchEvent {
IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString());
}
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
} else {
this.rankedCache.start(); // start concurrently
}
// start worker threads to fetch urls and snippets

@ -475,11 +475,11 @@ public final class yacyClient {
continue; // db-error
}
if (urlEntry.snippet() != null) {
if (urlEntry.snippet() != null && urlEntry.snippet().length() > 0 && !urlEntry.snippet().equals("null")) {
// we don't store the snippets along the url entry,
// because they are search-specific.
// instead, they are placed in a snipped-search cache.
// System.out.println("--- RECEIVED SNIPPET '" + link.snippet() + "'");
// System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'");
TextSnippet.storeToCache(wordhashes, new String(urlEntry.hash()), urlEntry.snippet());
}

@ -76,10 +76,11 @@ public class YaCySearchClient {
}
public static class RSSEntry {
String title, link;
String title, link, snippet;
public RSSEntry(Element element) {
title = val(element, "title", "");
link = val(element, "link", "");
snippet = val(element, "description", "");
}
private String val(Element parent, String label, String dflt) {
Element e = (Element) parent.getElementsByTagName(label).item(0);
@ -88,7 +89,7 @@ public class YaCySearchClient {
((CharacterData) child).getData() : dflt;
}
public String toString() {
return "Title : " + title + "\nLink : " + link + "\n";
return "Title : " + title + "\nLink : " + link + "\nDescription: " + snippet + "\n";
}
}

@ -147,7 +147,7 @@ public class genericImageParser extends AbstractParser implements Parser {
props.put(tag.getTagName(), tag.getDescription());
ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n");
} catch (MetadataException e) {
Log.logException(e);
//Log.logException(e);
}
}
title = props.get("Image Description");

Loading…
Cancel
Save