changed search process for images: no more media snippet load process,

show only links from index which had been on the text search page
before. This creates a superfast search process for images!
pull/1/head
Michael Peter Christen 13 years ago
parent f5efdb21fd
commit a3badd3205

@ -160,7 +160,7 @@ public class ViewFile {
} }
// loading the resource content as byte array // loading the resource content as byte array
prop.put("error_incache", Cache.has(url) ? 1 : 0); prop.put("error_incache", Cache.has(url.hash()) ? 1 : 0);
Response response = null; Response response = null;
try { try {

@ -215,29 +215,30 @@ public class yacysearchitem {
// image search; shows thumbnails // image search; shows thumbnails
prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
final MediaSnippet ms = theSearch.result().oneImage(item); //final MediaSnippet ms = theSearch.result().oneImage(item);
final ResultEntry ms = theSearch.oneResult(item, theQuery.isLocal() ? 1000 : 5000);
if (ms == null) { if (ms == null) {
prop.put("content_item", "0"); prop.put("content_item", "0");
} else { } else {
final String resultUrlstring = ms.href.toNormalform(true, false); final String resultUrlstring = ms.url().toNormalform(true, false);
final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self"); final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
final String license = sb.licensedURLs.aquireLicense(ms.href); final String license = sb.licensedURLs.aquireLicense(ms.url());
sb.loader.loadIfNotExistBackground(ms.href, 1024 * 1024 * 10); sb.loader.loadIfNotExistBackground(ms.url(), 1024 * 1024 * 10);
prop.putHTML("content_item_hrefCache", (auth) ? "/ViewImage.png?url=" + resultUrlstring : resultUrlstring); prop.putHTML("content_item_hrefCache", (auth) ? "/ViewImage.png?url=" + resultUrlstring : resultUrlstring);
prop.putHTML("content_item_href", resultUrlstring); prop.putHTML("content_item_href", resultUrlstring);
prop.putHTML("content_item_target", target); prop.putHTML("content_item_target", target);
prop.put("content_item_code", license); prop.put("content_item_code", license);
prop.putHTML("content_item_name", shorten(ms.name, MAX_NAME_LENGTH)); prop.putHTML("content_item_name", shorten(ms.title(), MAX_NAME_LENGTH));
prop.put("content_item_mimetype", ms.mime); prop.put("content_item_mimetype", "");
prop.put("content_item_fileSize", ms.fileSize); prop.put("content_item_fileSize", 0);
prop.put("content_item_width", ms.width); prop.put("content_item_width", 0);
prop.put("content_item_height", ms.height); prop.put("content_item_height", 0);
prop.put("content_item_attr", (ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"); // attributes, here: original size of image prop.put("content_item_attr", ""/*(ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"*/); // attributes, here: original size of image
prop.put("content_item_urlhash", ASCII.String(ms.source.hash())); prop.put("content_item_urlhash", ASCII.String(ms.url().hash()));
prop.put("content_item_source", ms.source.toNormalform(true, false)); prop.put("content_item_source", ms.url().toNormalform(true, false));
prop.putXML("content_item_source-xml", ms.source.toNormalform(true, false)); prop.putXML("content_item_source-xml", ms.url().toNormalform(true, false));
prop.put("content_item_sourcedom", ms.source.getHost()); prop.put("content_item_sourcedom", ms.url().getHost());
prop.put("content_item_nl", (item == theQuery.offset) ? 0 : 1); prop.put("content_item_nl", (item == theQuery.offset) ? 0 : 1);
prop.put("content_item", 1); prop.put("content_item", 1);
} }

@ -612,27 +612,6 @@ public class SnippetProcess {
Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason); Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
return null; return null;
} }
} else if (page.url().getContentDomain() == Classification.ContentDomain.IMAGE) {
// attach media information
startTime = System.currentTimeMillis();
final List<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(page.url(), this.snippetFetchWordHashes, this.query.contentdom, cacheStrategy, 6000, !this.query.isLocal());
final long snippetComputationTime = System.currentTimeMillis() - startTime;
Log.logInfo("SEARCH", "media snippet load time for " + page.url() + ": " + snippetComputationTime);
if (mediaSnippets != null && !mediaSnippets.isEmpty()) {
// found media snippets, return entry
return new ResultEntry(page, this.query.getSegment(), this.peers, null, mediaSnippets, dbRetrievalTime, snippetComputationTime);
} else if (cacheStrategy.mustBeOffline()) {
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, snippetComputationTime);
} else {
// problems with snippet fetch
final String reason = "no media snippet";
if (this.deleteIfSnippetFail) {
this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason);
}
Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
return null;
}
} else { } else {
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, 0); // result without snippet return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, 0); // result without snippet
} }

@ -266,7 +266,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
// first try to get the snippet from metadata // first try to get the snippet from metadata
String loc; String loc;
final Request request = loader.request(url, true, reindexing); final Request request = loader.request(url, true, reindexing);
final boolean inCache = de.anomic.crawler.Cache.has(row.url()); final boolean inCache = de.anomic.crawler.Cache.has(row.hash());
final boolean noCacheUsage = url.isFile() || url.isSMB() || cacheStrategy == null; final boolean noCacheUsage = url.isFile() || url.isSMB() || cacheStrategy == null;
if (containsAllHashes(loc = row.dc_title(), queryhashes) || if (containsAllHashes(loc = row.dc_title(), queryhashes) ||
containsAllHashes(loc = row.dc_creator(), queryhashes) || containsAllHashes(loc = row.dc_creator(), queryhashes) ||

Loading…
Cancel
Save