*) Snippet fetching:

Snippet are now fetched synchronous if the query parameter "fetchSnippet=" 
   is appended to the query string on the yacy search page. This is required 
   for the RSS feed.
   See: http://www.yacy-forum.de/viewtopic.php?t=4051
*) Small changes in the XSLT-stylesheet that is used to generate a html page from
   the RSS feed.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3787 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 18 years ago
parent e1a5babff1
commit 051a65f7af

@ -25,8 +25,8 @@ public class snippet {
//get the timeout for snippet-fetching //get the timeout for snippet-fetching
int mediasnippet_timeout = 15000; int mediasnippet_timeout = 15000;
int textsnippet_timeout = 10000; int textsnippet_timeout = 10000;
mediasnippet_timeout = Integer.parseInt((env.getConfig("timeout_text", "15000"))); mediasnippet_timeout = Integer.parseInt(env.getConfig("timeout_text", "15000"));
textsnippet_timeout = Integer.parseInt((env.getConfig("timeout_media", "10000"))); textsnippet_timeout = Integer.parseInt(env.getConfig("timeout_media", "10000"));
// getting url // getting url
String urlString = post.get("url", ""); String urlString = post.get("url", "");

@ -52,6 +52,7 @@ import java.net.MalformedURLException;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
import java.util.TreeSet; import java.util.TreeSet;
@ -69,6 +70,7 @@ import de.anomic.plasma.plasmaSearchPreOrder;
import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile; import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile; import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaSearchResults; import de.anomic.plasma.plasmaSearchResults;
@ -266,9 +268,10 @@ public class yacysearch {
final boolean globalsearch = (global) && (yacyonline) && (!samesearch); final boolean globalsearch = (global) && (yacyonline) && (!samesearch);
// do the search // do the search
TreeSet queryHashes = plasmaCondenser.words2hashes(query[0]);
plasmaSearchQuery thisSearch = new plasmaSearchQuery( plasmaSearchQuery thisSearch = new plasmaSearchQuery(
querystring, querystring,
plasmaCondenser.words2hashes(query[0]), queryHashes,
plasmaCondenser.words2hashes(query[1]), plasmaCondenser.words2hashes(query[1]),
maxDistance, maxDistance,
prefermask, prefermask,
@ -338,9 +341,47 @@ public class yacysearch {
if (result.hasSnippet()) { if (result.hasSnippet()) {
prop.put("type_results_" + i + "_snippet", 1); prop.put("type_results_" + i + "_snippet", 1);
prop.putASIS("type_results_" + i + "_snippet_text", result.getSnippet().getLineMarked(results.getQuery().queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java prop.putASIS("type_results_" + i + "_snippet_text", result.getSnippet().getLineMarked(results.getQuery().queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java
} else { } else {
prop.put("type_results_" + i + "_snippet", 0); if (post.containsKey("fetchSnippet")) {
prop.put("type_results_" + i + "_snippet_text", ""); /* fetch the snippet now */
try {
// snippet fetch timeout
int textsnippet_timeout = Integer.parseInt(env.getConfig("timeout_media", "10000"));
// boolean line_end_with_punctuation
boolean pre = post.get("pre", "false").equals("true");
// if 'remove' is set to true, then RWI references to URLs that do not have the snippet are removed
boolean remove = post.get("remove", "false").equals("true");
URL resultURL = new URL(result.getUrl());
plasmaSnippetCache.TextSnippet snippet = sb.snippetCache.retrieveTextSnippet(
resultURL,
queryHashes,
true,
pre,
260,
textsnippet_timeout
);
if (snippet.getErrorCode() < 11) {
// no problems occurred
//prop.put("text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown");
prop.putASIS("type_results_" + i + "_snippet_text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown");
} else {
// problems with snippet fetch
prop.put("type_results_" + i + "_snippet_text", (remove) ? sb.snippetCache.failConsequences(snippet, queryHashes) : snippet.getError());
}
prop.put("type_results_" + i + "_snippet", 1);
} catch (MalformedURLException e) {
prop.put("type_results_" + i + "_snippet", 0);
prop.put("type_results_" + i + "_snippet_text", "");
}
} else {
/* no snippet available (will be fetched later via ajax) */
prop.put("type_results_" + i + "_snippet", 0);
prop.put("type_results_" + i + "_snippet_text", "");
}
} }
prop.put("type_results", results.numResults()); prop.put("type_results", results.numResults());
prop.put("references", results.getReferences()); prop.put("references", results.getReferences());

@ -3,6 +3,10 @@
<rss version="2.0" <rss version="2.0"
xmlns:yacyTopwords="http://www.yacy.net/yacy/topwords" xmlns:yacyTopwords="http://www.yacy.net/yacy/topwords"
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"> xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
<!--
YACY P2P WEB SEARCH - Results
Hint: append the query-parameter "fetchSnippet=" to embed snippets
-->
<channel> <channel>
<title>YaCy P2P-Search for #[former]#</title> <title>YaCy P2P-Search for #[former]#</title>
<description>Search for #[former]#</description> <description>Search for #[former]#</description>
@ -11,21 +15,21 @@
<title>Search for #[former]#</title> <title>Search for #[former]#</title>
</image> </image>
<opensearch:totalResults>#[type_results]#</opensearch:totalResults> <opensearch:totalResults>#[type_results]#</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex> <opensearch:startIndex>1</opensearch:startIndex>
<opensearch:itemsPerPage>#[type_results]#</opensearch:itemsPerPage> <opensearch:itemsPerPage>#[type_results]#</opensearch:itemsPerPage>
<opensearch:link rel="search" href="opensearchdescription.xml" type="application/opensearchdescription+xml"/> <opensearch:link rel="search" href="opensearchdescription.xml" type="application/opensearchdescription+xml"/>
<opensearch:Query role="request" searchTerms="#[former]#" /> <opensearch:Query role="request" searchTerms="#[former]#" />
#(type)# #(type)#
#{results}# #{results}#<item>
<item>
<title><![CDATA[#[description]#]]></title> <title><![CDATA[#[description]#]]></title>
<link>#[url]#</link> <link>#[url]#</link>
#(snippet)#::<description><![CDATA[#[text]#]]></description>#(/snippet)# #(snippet)#::<description><![CDATA[#[text]#]]></description>#(/snippet)#
<pubDate>#[date]#</pubDate> <pubDate>#[date]#</pubDate>
<guid>#[urlhash]#</guid> <guid>#[urlhash]#</guid>
</item> </item>
#{/results}#
<yacyTopwords:topwords> #{/results}#<yacyTopwords:topwords>
#(combine)# #(combine)#
:: ::
#{words}# #{words}#
@ -34,7 +38,7 @@
</yacyTopwords:item> </yacyTopwords:item>
#{/words}# #{/words}#
#(/combine)# #(/combine)#
</yacyTopwords:topwords> </yacyTopwords:topwords>
:: ::
#(/type)# #(/type)#
</channel> </channel>

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8" ?> <?xml version="1.0" encoding="utf-8" ?>
<?xml-stylesheet type='text/xsl' href='/rss.xsl' version='1.0'?> <?xml-stylesheet type='text/xsl' href='/rss.xsl' version='1.0'?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.0"> <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.0">
<xsl:output method="html"/>
<xsl:template match='/rss'> <xsl:template match='/rss'>
<html> <html>
<head> <head>
@ -9,6 +10,7 @@
<link rel="shortcut icon" href="favicon.ico" /> <link rel="shortcut icon" href="favicon.ico" />
<style type="text/css"> <style type="text/css">
@import "/env/style.css"; @import "/env/style.css";
@import "/env/base.css";
</style> </style>
</head> </head>
<body> <body>
@ -24,11 +26,12 @@
</xsl:template> </xsl:template>
<xsl:template match='item'> <xsl:template match='item'>
<p> <div class="searchresults">
<b><xsl:value-of select='title'/></b><br/> <h4 class="linktitle"><a href="{link}" ><xsl:value-of select='title'/></a></h4>
<a href="{link}" ><xsl:value-of select='link' /></a><br/> <p class="snippet"><span class="snippetLoaded"><xsl:value-of select='description'/></span></p>
<xsl:value-of select='pubDate' /><br/> <p class="url"><a href="{link}" ><xsl:value-of select='link' /></a></p>
</p> <p class="urlinfo"><xsl:value-of select='pubDate' /></p>
</div>
</xsl:template> </xsl:template>
</xsl:stylesheet> </xsl:stylesheet>
Loading…
Cancel
Save