better presentation of search result metadata and fixes to htcache loading

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6851 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 5fbf866cae
commit 7ab207d93a

@ -46,7 +46,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.retrieval.Response;
import de.anomic.http.client.Client;
import de.anomic.http.client.Cache;
import de.anomic.http.server.RequestHeader;
import de.anomic.http.server.ResponseHeader;
@ -226,7 +225,13 @@ public class ViewFile {
return prop;
}
responseHeader = Client.whead(url.toString());
try {
Response response = sb.loader.load(url, true, false);
responseHeader = response.getResponseHeader();
resource = response.getContent();
} catch (IOException e) {
Log.logException(e);
}
if (responseHeader == null) {
prop.put("error", "4");
prop.put("error_errorText", "Unable to load resource metadata.");
@ -272,7 +277,7 @@ public class ViewFile {
// parsing the resource content
Document document = null;
try {
document = LoaderDispatcher.parseDocument(url, resource.length, new ByteArrayInputStream(resource), null);
document = LoaderDispatcher.parseDocument(url, resource.length, new ByteArrayInputStream(resource), responseHeader);
if (document == null) {
prop.put("error", "5");
prop.put("error_errorText", "Unknown error");

@ -29,7 +29,7 @@ you can validate it with http://validator.w3.org/
<dt>Contributor</dt><dd property="dc:Contributor">#[dc_contributor]#</dd>
<dt>Date</dt><dd property="dc:Date">#[dc_date]#</dd>
<dt>Type</dt><dd property="dc:Type">yacy:doctype:#[dc_type]#</dd>
<dt>Identifier</dt><dd property="dc:Identifier">yacy:urlhash:#[dc_identifier]#</dd>
<dt>Identifier</dt><dd property="dc:Identifier">#[dc_identifier]#</dd>
<dt>Language</dt><dd property="dc:Language">#[dc_language]#</dd>
<dt>Load Date</dt><dd property="yacy:loaddate">#[yacy_loaddate]#</dd>

@ -46,14 +46,14 @@ public class yacydoc {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
Segment segment = null;
if (post == null || !post.containsKey("html")) {
if (post.containsKey("segment") && sb.verifyAuthentication(header, false)) {
segment = sb.indexSegments.segment(post.get("segment"));
}
final Segment segment;
boolean html = post != null && post.containsKey("html");
prop.setLocalized(html);
if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header, false)) {
segment = sb.indexSegments.segment(post.get("segment"));
} else {
segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
prop.put("dc_title", "");
prop.put("dc_creator", "");
@ -74,7 +74,8 @@ public class yacydoc {
if (urlstring.length() > 0 && urlhash.length() == 0) {
try {
urlhash = new String((new DigestURI(urlstring, null)).hash());
DigestURI url = new DigestURI(urlstring, null);
urlhash = new String(url.hash());
} catch (MalformedURLException e) {
Log.logException(e);
}
@ -94,11 +95,11 @@ public class yacydoc {
prop.putXML("dc_creator", metadata.dc_creator());
prop.putXML("dc_description", "");
prop.putXML("dc_subject", metadata.dc_subject());
prop.putXML("dc_publisher", metadata.url().toNormalform(false, true));
prop.putXML("dc_publisher", "");
prop.putXML("dc_contributor", "");
prop.putXML("dc_date", entry.moddate().toString());
prop.putXML("dc_type", String.valueOf(entry.doctype()));
prop.putXML("dc_identifier", urlhash);
prop.putXML("dc_identifier", metadata.url().toNormalform(false, true));
prop.putXML("dc_language", entry.language());
prop.putXML("yacy_loaddate", entry.loaddate().toString());

@ -17,7 +17,7 @@
#(/authorized)#
<p class="snippet"><span class="snippetLoaded" id="h#[urlhash]#">#[description]#</span></p>
<p class="url"><a href="#[link]#" id="url#[urlhash]#">#[urlname]#</a></p>
<p class="urlinfo">#[date]# | #[sizename]# | YBR-#[ybr]# | <a href="ViewFile.html?urlHash=#[urlhash]#&amp;words=#[words]#&amp;display=#[display]#">Info</a> | <a href="yacysearch.html?cat=image&amp;url=#[link]#&amp;search=#[former]#&amp;display=#[display]#">Pictures</a></p>
<p class="urlinfo">#[date]# | #[sizename]# | <a href="api/yacydoc.html?urlhash=#[urlhash]#" onclick="return hs.htmlExpand(this, { objectType: 'ajax'} )">Metadata</a> | <a href="ViewFile.html?urlHash=#[urlhash]#&amp;words=#[words]#&amp;display=#[display]#">Parser</a> | <a href="yacysearch.html?cat=image&amp;url=#[link]#&amp;search=#[former]#&amp;display=#[display]#">Pictures</a></p>
</div>
::
#(item)#::<div class="thumbcontainer">

@ -40,7 +40,6 @@ import de.anomic.search.ContentDomain;
import de.anomic.search.MediaSnippet;
import de.anomic.search.QueryParams;
import de.anomic.search.SearchEvent;
import de.anomic.search.RankingProcess;
import de.anomic.search.ResultEntry;
import de.anomic.search.SearchEventCache;
import de.anomic.search.Switchboard;
@ -131,7 +130,7 @@ public class yacysearchitem {
prop.putHTML("content_urlname", nxTools.shortenURLString(result.urlname(), urllength));
prop.put("content_date", Switchboard.dateString(result.modified()));
prop.put("content_date822", Switchboard.dateString822(result.modified()));
prop.put("content_ybr", RankingProcess.ybr(result.hash()));
//prop.put("content_ybr", RankingProcess.ybr(result.hash()));
prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
prop.putHTML("content_sizename", sizename(result.filesize()));
prop.putHTML("content_host", result.url().getHost());

@ -178,7 +178,7 @@ public final class CrawlSwitchboard {
if (this.defaultTextSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, false, false, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFFRESH);
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, true, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFFRESH);
}
if (this.defaultTextSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling

Loading…
Cancel
Save