fixed several search bugs

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7180 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 937dd956d3
commit 84a023cbc8

@ -92,7 +92,7 @@ public class ViewImage {
if (url != null) try { if (url != null) try {
resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CrawlProfile.CacheStrategy.IFEXIST); resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CrawlProfile.CacheStrategy.IFEXIST);
} catch (IOException e) { } catch (IOException e) {
Log.logWarning("ViewImage", "cannot load: " + e.getMessage()); Log.logFine("ViewImage", "cannot load: " + e.getMessage());
} }
byte[] imgb = null; byte[] imgb = null;
if (resourceb == null) { if (resourceb == null) {

@ -134,7 +134,7 @@ document.getElementById("Enter").value = "search again";
<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p> <p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
#(/excluded)# #(/excluded)#
<!-- type the number of results --> <!-- type the number of results and navigation bar -->
#(num-results)# #(num-results)#
:: ::
<p>No Results.</p> <p>No Results.</p>

@ -97,7 +97,7 @@ public class yacysearch {
// get query // get query
String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim(); String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim();
String querystring = originalquerystring.replace('+', ' '); String querystring = originalquerystring.replace('+', ' ');
CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFEXIST : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly")); CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFFRESH : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly"));
if (snippetFetchStrategy == null) snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY; if (snippetFetchStrategy == null) snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
@ -237,7 +237,7 @@ public class yacysearch {
Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search"); Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
} else if (Domains.matchesList(client, sb.networkWhitelist)) { } else if (Domains.matchesList(client, sb.networkWhitelist)) {
Log.logInfo("LOCAL_SEARCH", "ACCECC CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions"); Log.logInfo("LOCAL_SEARCH", "ACCECC CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions");
} else if (global || snippetFetchStrategy.isAllowedToFetchOnline()) { } else if (!authenticated && (global || snippetFetchStrategy.isAllowedToFetchOnline())) {
// in case that we do a global search or we want to fetch snippets, we check for DoS cases // in case that we do a global search or we want to fetch snippets, we check for DoS cases
synchronized (trackerHandles) { synchronized (trackerHandles) {
int accInOneSecond = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000)).size(); int accInOneSecond = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000)).size();

@ -222,10 +222,6 @@ public final class QueryParams {
return this.contentdom.toString(); return this.contentdom.toString();
} }
public boolean isGlobal() {
return this.domType != SEARCHDOM_LOCAL;
}
public boolean isLocal() { public boolean isLocal() {
return this.domType == SEARCHDOM_LOCAL; return this.domType == SEARCHDOM_LOCAL;
} }
@ -418,8 +414,8 @@ public final class QueryParams {
context.append('-'); context.append('-');
context.append(hashSet2hashString(this.excludeHashes)); context.append(hashSet2hashString(this.excludeHashes));
} }
context.append(asterisk); //context.append(asterisk);
context.append(this.domType); //context.append(this.domType);
context.append(asterisk); context.append(asterisk);
context.append(this.contentdom); context.append(this.contentdom);
context.append(asterisk); context.append(asterisk);

@ -347,9 +347,9 @@ public final class RankingProcess extends Thread {
* @param timeout the time this method may take for a result computation * @param timeout the time this method may take for a result computation
* @return a metadata entry for a url * @return a metadata entry for a url
*/ */
public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) { public URIMetadataRow takeURL(final boolean skipDoubleDom, final long timeout) {
// returns from the current RWI list the best URL entry and removes this entry from the list // returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + timeout; long timeLimit = System.currentTimeMillis() + Math.max(10, timeout);
int p = -1; int p = -1;
byte[] urlhash; byte[] urlhash;
long timeleft; long timeleft;

@ -164,12 +164,12 @@ public class ResultFetcher {
if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break; if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break;
// get next entry // get next entry
page = rankedCache.takeURL(true, taketimeout); page = rankedCache.takeURL(true, this.timeout - System.currentTimeMillis());
//if (page == null) page = rankedCache.takeURL(false, taketimeout); //if (page == null) page = rankedCache.takeURL(false, taketimeout);
if (page == null) break; if (page == null) break;
if (failedURLs.has(page.hash())) continue; if (failedURLs.has(page.hash())) continue;
final ResultEntry resultEntry = fetchSnippet(page, query.host == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0 final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
if (resultEntry == null) continue; // the entry had some problems, cannot be used if (resultEntry == null) continue; // the entry had some problems, cannot be used
//if (result.contains(resultEntry)) continue; //if (result.contains(resultEntry)) continue;
@ -228,7 +228,7 @@ public class ResultFetcher {
((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))), ((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))),
180, 180,
Integer.MAX_VALUE, Integer.MAX_VALUE,
query.isGlobal()); !query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime; final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
@ -247,7 +247,7 @@ public class ResultFetcher {
} else { } else {
// attach media information // attach media information
startTime = System.currentTimeMillis(); startTime = System.currentTimeMillis();
final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, query.isGlobal()); final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime; final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime); Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime);

@ -155,9 +155,9 @@ public final class SearchEvent {
} else { } else {
// do a local search // do a local search
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1); this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1);
this.rankedCache.run(); // this is not started concurrently here on purpose!
if (generateAbstracts) { if (generateAbstracts) {
this.rankedCache.run(); // this is not started concurrently here on purpose!
// compute index abstracts // compute index abstracts
final long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
int maxcount = -1; int maxcount = -1;
@ -182,6 +182,8 @@ public final class SearchEvent {
IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString()); IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString());
} }
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
} else {
this.rankedCache.start(); // start concurrently
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets

@ -475,11 +475,11 @@ public final class yacyClient {
continue; // db-error continue; // db-error
} }
if (urlEntry.snippet() != null) { if (urlEntry.snippet() != null && urlEntry.snippet().length() > 0 && !urlEntry.snippet().equals("null")) {
// we don't store the snippets along the url entry, // we don't store the snippets along the url entry,
// because they are search-specific. // because they are search-specific.
// instead, they are placed in a snipped-search cache. // instead, they are placed in a snipped-search cache.
// System.out.println("--- RECEIVED SNIPPET '" + link.snippet() + "'"); // System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'");
TextSnippet.storeToCache(wordhashes, new String(urlEntry.hash()), urlEntry.snippet()); TextSnippet.storeToCache(wordhashes, new String(urlEntry.hash()), urlEntry.snippet());
} }

@ -76,10 +76,11 @@ public class YaCySearchClient {
} }
public static class RSSEntry { public static class RSSEntry {
String title, link; String title, link, snippet;
public RSSEntry(Element element) { public RSSEntry(Element element) {
title = val(element, "title", ""); title = val(element, "title", "");
link = val(element, "link", ""); link = val(element, "link", "");
snippet = val(element, "description", "");
} }
private String val(Element parent, String label, String dflt) { private String val(Element parent, String label, String dflt) {
Element e = (Element) parent.getElementsByTagName(label).item(0); Element e = (Element) parent.getElementsByTagName(label).item(0);
@ -88,7 +89,7 @@ public class YaCySearchClient {
((CharacterData) child).getData() : dflt; ((CharacterData) child).getData() : dflt;
} }
public String toString() { public String toString() {
return "Title : " + title + "\nLink : " + link + "\n"; return "Title : " + title + "\nLink : " + link + "\nDescription: " + snippet + "\n";
} }
} }

@ -147,7 +147,7 @@ public class genericImageParser extends AbstractParser implements Parser {
props.put(tag.getTagName(), tag.getDescription()); props.put(tag.getTagName(), tag.getDescription());
ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n"); ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n");
} catch (MetadataException e) { } catch (MetadataException e) {
Log.logException(e); //Log.logException(e);
} }
} }
title = props.get("Image Description"); title = props.get("Image Description");

Loading…
Cancel
Save