orbiter 18 years ago
parent ca83ec8a7b
commit 815e3da62f

@ -227,7 +227,7 @@ public class Surftips {
if (url.length() < 12) continue; if (url.length() < 12) continue;
entry = rowdef.newEntry(new byte[][]{ entry = rowdef.newEntry(new byte[][]{
url.getBytes(), url.getBytes(),
((intention.length() == 0) ? record.attribute("startURL", "") : intention).getBytes(), (((intention.length() == 0) || (intention.equals("simple web crawl"))) ? record.attribute("startURL", "") : intention).getBytes(),
("Crawl Start Point").getBytes("UTF-8"), ("Crawl Start Point").getBytes("UTF-8"),
record.id().getBytes() record.id().getBytes()
}); });

@ -209,7 +209,7 @@ public class yacysearch {
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
if (post.get("cat", "href").equals("href")) { if (post.get("cat", "href").equals("href")) {
final TreeSet[] query = plasmaSearchQuery.cleanQuery(querystring); final TreeSet[] query = plasmaSearchQuery.cleanQuery(querystring); // converts also umlaute
// filter out stopwords // filter out stopwords
final TreeSet filtered = kelondroMSetTools.joinConstructive(query[0], plasmaSwitchboard.stopwords); final TreeSet filtered = kelondroMSetTools.joinConstructive(query[0], plasmaSwitchboard.stopwords);
if (filtered.size() > 0) { if (filtered.size() > 0) {

@ -152,10 +152,10 @@ public final class plasmaCondenser {
// phrase 99 is taken from the media Link url and anchor description // phrase 99 is taken from the media Link url and anchor description
// phrase 100 and above are lines from the text // phrase 100 and above are lines from the text
insertTextToWords(document.getTitle(), 1, indexRWIEntry.flag_app_descr, wflags); insertTextToWords(document.getTitle(), 1, indexRWIEntry.flag_app_descr, wflags);
//insertTextToWords(document.getTitle(), 2, indexRWIEntryNew.flag_app_descr, wflags); //insertTextToWords(document.getTitle(), 2, indexRWIEntryNew.flag_app_descr, wflags);
insertTextToWords(document.getAbstract(), 3, indexRWIEntry.flag_app_descr, wflags); insertTextToWords(document.getAbstract(), 3, indexRWIEntry.flag_app_descr, wflags);
insertTextToWords(document.getAuthor(), 4, indexRWIEntry.flag_app_descr, wflags); insertTextToWords(document.getAuthor(), 4, indexRWIEntry.flag_app_descr, wflags);
// missing: tags! // missing: tags!
String[] titles = document.getSectionTitles(); String[] titles = document.getSectionTitles();
for (int i = 0; i < titles.length; i++) { for (int i = 0; i < titles.length; i++) {

@ -627,10 +627,10 @@ javastart_priority=0
# flushed to disc; this may last some minutes. # flushed to disc; this may last some minutes.
wordCacheMaxCount = 20000 wordCacheMaxCount = 20000
wordCacheInitCount = 30000 wordCacheInitCount = 30000
wordFlushSize = 500; wordFlushSize = 500
wordCacheMaxCount__pro = 60000 wordCacheMaxCount__pro = 60000
wordCacheInitCount__pro = 80000 wordCacheInitCount__pro = 80000
wordFlushSize__pro = 1000; wordFlushSize__pro = 1000
# Specifies if yacy can be used as transparent http proxy. # Specifies if yacy can be used as transparent http proxy.
# #

Loading…
Cancel
Save