From 7ab207d93aa044de969243e5c53372a241ad573f Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 4 May 2010 20:57:09 +0000 Subject: [PATCH] better presentation of search result metadata and fixes to htcache loading git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6851 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/ViewFile.java | 11 +++++++--- htroot/api/yacydoc.html | 2 +- htroot/api/yacydoc.java | 21 ++++++++++--------- htroot/yacysearchitem.html | 2 +- htroot/yacysearchitem.java | 3 +-- .../de/anomic/crawler/CrawlSwitchboard.java | 2 +- 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 8327cb98f..cf7b70952 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -46,7 +46,6 @@ import net.yacy.kelondro.logging.Log; import net.yacy.repository.LoaderDispatcher; import de.anomic.crawler.retrieval.Response; -import de.anomic.http.client.Client; import de.anomic.http.client.Cache; import de.anomic.http.server.RequestHeader; import de.anomic.http.server.ResponseHeader; @@ -226,7 +225,13 @@ public class ViewFile { return prop; } - responseHeader = Client.whead(url.toString()); + try { + Response response = sb.loader.load(url, true, false); + responseHeader = response.getResponseHeader(); + resource = response.getContent(); + } catch (IOException e) { + Log.logException(e); + } if (responseHeader == null) { prop.put("error", "4"); prop.put("error_errorText", "Unable to load resource metadata."); @@ -272,7 +277,7 @@ public class ViewFile { // parsing the resource content Document document = null; try { - document = LoaderDispatcher.parseDocument(url, resource.length, new ByteArrayInputStream(resource), null); + document = LoaderDispatcher.parseDocument(url, resource.length, new ByteArrayInputStream(resource), responseHeader); if (document == null) { prop.put("error", "5"); prop.put("error_errorText", "Unknown error"); diff --git a/htroot/api/yacydoc.html b/htroot/api/yacydoc.html index d3a6c08ab..46f3ae45d 100644 --- a/htroot/api/yacydoc.html +++ b/htroot/api/yacydoc.html @@ -29,7 +29,7 @@ you can validate it with http://validator.w3.org/
Contributor
#[dc_contributor]#
Date
#[dc_date]#
Type
yacy:doctype:#[dc_type]#
-
Identifier
yacy:urlhash:#[dc_identifier]#
+
Identifier
#[dc_identifier]#
Language
#[dc_language]#
Load Date
#[yacy_loaddate]#
diff --git a/htroot/api/yacydoc.java b/htroot/api/yacydoc.java index 9efeaeda8..82bb372d8 100644 --- a/htroot/api/yacydoc.java +++ b/htroot/api/yacydoc.java @@ -46,14 +46,14 @@ public class yacydoc { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); - Segment segment = null; - if (post == null || !post.containsKey("html")) { - if (post.containsKey("segment") && sb.verifyAuthentication(header, false)) { - segment = sb.indexSegments.segment(post.get("segment")); - } + final Segment segment; + boolean html = post != null && post.containsKey("html"); + prop.setLocalized(html); + if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header, false)) { + segment = sb.indexSegments.segment(post.get("segment")); + } else { + segment = sb.indexSegments.segment(Segments.Process.PUBLIC); } - if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC); - prop.put("dc_title", ""); prop.put("dc_creator", ""); @@ -74,7 +74,8 @@ public class yacydoc { if (urlstring.length() > 0 && urlhash.length() == 0) { try { - urlhash = new String((new DigestURI(urlstring, null)).hash()); + DigestURI url = new DigestURI(urlstring, null); + urlhash = new String(url.hash()); } catch (MalformedURLException e) { Log.logException(e); } @@ -94,11 +95,11 @@ public class yacydoc { prop.putXML("dc_creator", metadata.dc_creator()); prop.putXML("dc_description", ""); prop.putXML("dc_subject", metadata.dc_subject()); - prop.putXML("dc_publisher", metadata.url().toNormalform(false, true)); + prop.putXML("dc_publisher", ""); prop.putXML("dc_contributor", ""); prop.putXML("dc_date", entry.moddate().toString()); prop.putXML("dc_type", String.valueOf(entry.doctype())); - prop.putXML("dc_identifier", urlhash); + prop.putXML("dc_identifier", metadata.url().toNormalform(false, true)); prop.putXML("dc_language", entry.language()); prop.putXML("yacy_loaddate", entry.loaddate().toString()); diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index a823f3c93..8f5fde508 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -17,7 +17,7 @@ #(/authorized)#

#[description]#

#[urlname]#

-

#[date]# | #[sizename]# | YBR-#[ybr]# | Info | Pictures

+

#[date]# | #[sizename]# | Metadata | Parser | Pictures

:: #(item)#::
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 0ab8ae923..94a184624 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -40,7 +40,6 @@ import de.anomic.search.ContentDomain; import de.anomic.search.MediaSnippet; import de.anomic.search.QueryParams; import de.anomic.search.SearchEvent; -import de.anomic.search.RankingProcess; import de.anomic.search.ResultEntry; import de.anomic.search.SearchEventCache; import de.anomic.search.Switchboard; @@ -131,7 +130,7 @@ public class yacysearchitem { prop.putHTML("content_urlname", nxTools.shortenURLString(result.urlname(), urllength)); prop.put("content_date", Switchboard.dateString(result.modified())); prop.put("content_date822", Switchboard.dateString822(result.modified())); - prop.put("content_ybr", RankingProcess.ybr(result.hash())); + //prop.put("content_ybr", RankingProcess.ybr(result.hash())); prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename' prop.putHTML("content_sizename", sizename(result.filesize())); prop.putHTML("content_host", result.url().getHost()); diff --git a/source/de/anomic/crawler/CrawlSwitchboard.java b/source/de/anomic/crawler/CrawlSwitchboard.java index 397ce0181..01b85460f 100644 --- a/source/de/anomic/crawler/CrawlSwitchboard.java +++ b/source/de/anomic/crawler/CrawlSwitchboard.java @@ -178,7 +178,7 @@ public final class CrawlSwitchboard { if (this.defaultTextSnippetLocalProfile == null) { // generate new default entry for snippet fetch and optional crawling defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0, - this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, false, false, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFFRESH); + this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, true, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFFRESH); } if (this.defaultTextSnippetGlobalProfile == null) { // generate new default entry for snippet fetch and optional crawling