fixed a bug with snippet-length

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@359 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 7a7099e9dc
commit eb74fa0c82

@ -101,15 +101,8 @@ from 'late' peers to enrich this search result.
#{results}#
<!-- link begin -->
<p><b>
#[description]#
</b><br>
#(snippet)#
::
<i>
#[text]#
</i><br>
#(/snippet)#
<p><b>#[description]#</b><br>
#(snippet)#::<i>#[text]#</i><br>#(/snippet)#
<a href="#[url]#">#[urlname]#</a><br>
#[date]#<br></p>
<!-- link end -->

@ -444,9 +444,9 @@ public class plasmaCondenser {
}
public static Enumeration wordTokenizer(String s) {
public static Enumeration wordTokenizer(String s, int minLength) {
try {
return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes()), 3);
return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes()), minLength);
} catch (Exception e) {
return null;
}

@ -107,7 +107,8 @@ public class plasmaSnippetCache {
}
public boolean existsInCache(URL url, Set queryhashes) {
return retrieveFromCache(yacySearch.set2string(queryhashes), plasmaURL.urlHash(url)) != null;
String hashes = yacySearch.set2string(queryhashes);
return retrieveFromCache(hashes, plasmaURL.urlHash(url)) != null;
}
public result retrieve(URL url, Set queryhashes, boolean fetchOnline, int snippetMaxLength) {
@ -237,7 +238,7 @@ public class plasmaSnippetCache {
hs = hashSentence(result);
j = queryhashes.iterator();
Integer pos;
int p, minpos = maxLength, maxpos = -1;
int p, minpos = result.length(), maxpos = -1;
while (j.hasNext()) {
pos = (Integer) hs.get((String) j.next());
if (pos != null) {
@ -252,6 +253,24 @@ public class plasmaSnippetCache {
if (maxpos > result.length()) maxpos = result.length();
if (minpos < 0) minpos = 0;
// we have a result, but is it short enough?
if (maxpos - minpos + 10 > maxLength) {
// the string is too long, even if we cut at both ends
// so cut here in the middle of the string
int lenb = result.length();
result = result.substring(0, (minpos + 20 > result.length()) ? result.length() : minpos + 20).trim() +
" [..] " +
result.substring((maxpos + 26 > result.length()) ? result.length() : maxpos + 26).trim();
maxpos = maxpos + lenb - result.length() + 6;
}
if (maxpos > maxLength) {
// the string is too long, even if we cut it at the end
// so cut it here at both ends at once
int newlen = maxpos - minpos + 10;
int around = (maxLength - newlen) / 2;
result = "[..] " + result.substring(minpos - around, maxpos + around).trim() + " [..]";
minpos = around;
maxpos = result.length() - around - 5;
}
if (result.length() > maxLength) {
// trim result, 1st step (cut at right side)
result = result.substring(0, maxpos).trim() + " [..]";
@ -277,7 +296,7 @@ public class plasmaSnippetCache {
private HashMap hashSentence(String sentence) {
// generates a word-wordPos mapping
HashMap map = new HashMap();
Enumeration words = plasmaCondenser.wordTokenizer(sentence);
Enumeration words = plasmaCondenser.wordTokenizer(sentence, 0);
int pos = 0;
String word;
while (words.hasMoreElements()) {

@ -373,12 +373,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
// test routine for snippet fetch
// url = /www.heise.de/mobil/newsticker/meldung/mail/54980
//Set query = new HashSet();
//query.add(plasmaWordIndexEntry.word2hash("Weitergabe"));
//query.add(plasmaWordIndexEntry.word2hash("Zahl"));
//plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/mobil/newsticker/meldung/mail/54980"), query, true);
//plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/security/news/foren/go.shtml?read=1&msg_id=7301419&forum_id=72721"), query, true);
//plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260);
}
private static String ppRamString(int bytes) {

@ -676,37 +676,4 @@ public class yacyCore {
}
}
private class vprobe implements Runnable {
public vprobe() {}
public final void run() {
// read the probe URL
String probeURL=switchboard.getConfig("onetimeAction", null);
if ((probeURL == null) || (probeURL.length() == 0)) return; // not wanted
// read version and date
String proxyHost = switchboard.getConfig("remoteProxyHost", "");
int proxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort", "0"));
if (!(switchboard.getConfig("remoteProxyUse", "false").equals("true"))) {
proxyHost = null; proxyPort = 0;
}
String version = switchboard.getConfig("version", "");
String date = switchboard.getConfig("vdate", "");
probeURL = probeURL + "?version=" + version + "&date=" + date;
// open new connection
try {
latestVersion = new String(httpc.singleGET(new URL(probeURL), 10000, null, null, proxyHost, proxyPort)).trim();
float latest = Float.parseFloat(latestVersion);
float thisver = Float.parseFloat(version);
if (thisver > latest) System.out.println("THIS SOFTWARE VERSION IS A PRE-RELEASE");
if (thisver < latest) {
log.logSystem("****************************************************************");
log.logSystem("* THIS SOFTWARE VERSION IS OUTDATED.");
log.logSystem("* PLEASE GO TO ANOMIC.DE AND DOWNLOAD THE LATEST VERSION " + latestVersion);
log.logSystem("****************************************************************");
}
} catch (Exception e) {
// we do nothing is this case
}
}
}
}

Loading…
Cancel
Save