added a snippet test function in ViewFile: you can now search for a

specific word on the document; the servlet returns the snippet in the
same way as it would be shown in a search result.
pull/1/head
Michael Peter Christen 11 years ago
parent c63e93df46
commit 6e1dc444c3

@ -86,9 +86,16 @@ function updatepage(str) {
<dd>
<input type="text" size="60" name="url" id="url" value="#[url]#" />
<input type="submit" name="show" value="Show Metadata" />
<input type="button" value="Browse Host" onClick="location.href='/HostBrowser.html?path=' + document.getElementById('url').value" />
#(moar)#::<input type="button" value="Browse Host" onClick="location.href='/HostBrowser.html?path=' + document.getElementById('url').value" />#(/moar)#
<div id="searchresults"></div>
</dd>
#(moar)#::
<dt>Search in Document:</dt>
<dd>
<input type="text" size="60" name="search" id="search" value="#[search]#" />
<input type="submit" name="show" value="Show Snippet" />
</dd>
#(/moar)#
</dl>
</fieldset>
</form>
@ -142,6 +149,16 @@ function updatepage(str) {
<span class="error">Unsupported protocol.</span>
#(/error)#
</td><td valign="top">
#(showSnippet)#::
<form action="">
<fieldset><legend>Snippet</legend>
<dl>
<dt>Headline</dt><dd>#[headline]#</dd>
<dt>Teaser Text</dt><dd>#[teasertext]#</dd>
</dl>
</fieldset>
</form>
#(/showSnippet)#
#(viewMode)#
:: <!-- 1 -->
<fieldset><legend>Original Content from Web</legend>

@ -33,6 +33,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.AnchorURL;
@ -54,6 +55,8 @@ import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryGoal;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -77,15 +80,15 @@ public class ViewFile {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard)env;
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
if (post == null) {
prop.putHTML("error_words", "");
prop.put("error_vMode-sentences", "1");
prop.put("error", "1");
prop.put("url", "");
prop.put("viewMode", VIEW_MODE_NO_TEXT);
return prop;
}
prop.put("moar", 0);
prop.put("viewMode", VIEW_MODE_NO_TEXT);
prop.putHTML("error_words", "");
prop.put("error_vMode-sentences", "1");
prop.put("error", "1");
prop.put("url", "");
prop.put("showSnippet", 0);
if (post == null) return prop;
// get segment
Segment indexSegment = sb.index;
@ -97,9 +100,18 @@ public class ViewFile {
prop.putHTML("error_words", "");
}
final String viewMode = post.get("viewMode","parsed");
prop.put("error_vMode-iframeWeb", "0");
prop.put("error_vMode-iframeCache", "0");
prop.put("error_vMode-plain", "0");
prop.put("error_vMode-parsed", "0");
prop.put("error_vMode-sentences", "0");
prop.put("error_vMode-words", "0");
prop.put("error_vMode-links", "0");
prop.put("error_vMode-iframeCitations", "0");
final boolean showSnippet = post.get("show", "").equals("Show Snippet");
final String viewMode = showSnippet ? "sentences" : post.get("viewMode", "sentences");
prop.put("error_vMode-" + viewMode, "1");
DigestURL url = null;
String descr = "";
final int wordCount = 0;
@ -155,6 +167,8 @@ public class ViewFile {
return prop;
}
prop.put("url", url.toNormalform(true));
prop.put("moar", 1);
prop.put("moar_search", post.get("search",""));
// loading the resource content as byte array
prop.put("error_incache", Cache.has(url.hash()) ? 1 : 0);
@ -337,6 +351,27 @@ public class ViewFile {
prop.put("viewMode_links", i);
}
// optional: generate snippet
if (showSnippet) {
QueryGoal goal = new QueryGoal(post.get("search", ""));
TextSnippet snippet = new TextSnippet(
null,
urlEntry,
goal.getIncludeHashes(),
CacheStrategy.CACHEONLY,
false,
180,
false);
String titlestr = urlEntry.dc_title();
// if title is empty use filename as title
if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
titlestr = urlEntry.url() != null ? urlEntry.url().getFileName() : "";
}
final String desc = (snippet == null) ? "" : snippet.isMarked() ? snippet.getLineRaw() : snippet.getLineMarked(goal);
prop.put("showSnippet_headline", titlestr);
prop.put("showSnippet_teasertext", desc);
prop.put("showSnippet", 1);
}
if (document != null) document.close();
}
prop.put("error", "0");

@ -338,7 +338,13 @@ public class RobotsTxt {
return sb.toString();
}
public static DigestURL robotsURL(final String urlHostPort) {
/**
* generate a robots.txt url.
* @param urlHostPort a string of the form <host>':'<port> or just <host>
* @return the full robots.txt url
*/
public static DigestURL robotsURL(String urlHostPort) {
if (urlHostPort.endsWith(":80")) urlHostPort = urlHostPort.substring(0, urlHostPort.length() - 3);
DigestURL robotsURL = null;
try {
robotsURL = new DigestURL((urlHostPort.endsWith(":443") ? "https://" : "http://") + urlHostPort + "/robots.txt");

@ -75,7 +75,6 @@ import net.yacy.search.SwitchboardConstants;
import net.yacy.server.http.HTTPDFileHandler;
import net.yacy.server.http.TemplateEngine;
import net.yacy.server.serverClassLoader;
import net.yacy.server.serverCore;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.server.servletProperties;

@ -100,7 +100,7 @@ public class QueryGoal {
}
/**
* Creates a QueryGoal from a serach query string
* Creates a QueryGoal from a search query string
* @param query_words search string (the actual search terms, excluding application specific modifier)
*/
public QueryGoal(String query_words) {

@ -51,8 +51,6 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.HostBalancer;
import net.yacy.document.parser.htmlParser;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.search.schema.CollectionConfiguration.Subgraph;

Loading…
Cancel
Save