-
+
-
+
diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java
index 80db10ea7..4422ace8a 100644
--- a/htroot/IndexControlRWIs_p.java
+++ b/htroot/IndexControlRWIs_p.java
@@ -407,7 +407,7 @@ public class IndexControlRWIs_p {
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
- prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash()));
+ prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getQuery().getOrder() == null) ? -1 : ranked.getQuery().getOrder().authority(entry.hash()));
prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());
@@ -502,8 +502,8 @@ public class IndexControlRWIs_p {
}
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
- final QueryParams query = new QueryParams(new String(keyhash), -1, sb.getRanking(), filter);
- final RankingProcess ranked = new RankingProcess(segment, query, Integer.MAX_VALUE, 1);
+ final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking());
+ final RankingProcess ranked = new RankingProcess(query, Integer.MAX_VALUE, 1);
ranked.run();
if (ranked.filteredCount() == 0) {
diff --git a/htroot/env/base.css b/htroot/env/base.css
index 8b79de9bf..de1317e53 100644
--- a/htroot/env/base.css
+++ b/htroot/env/base.css
@@ -371,7 +371,6 @@ div.yacylogo {
float:left;
}
-
/*----------
,
,
,
,
*/
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index 58ea503d2..4e2ad6eb4 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -53,6 +53,7 @@ import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile;
import de.anomic.search.SearchEvent;
import de.anomic.search.SearchEventCache;
+import de.anomic.search.Segment;
import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
import de.anomic.search.ResultEntry;
@@ -189,13 +190,13 @@ public final class search {
SearchEvent theSearch = null;
if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts
+ Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
theQuery = new QueryParams(
null,
abstractSet,
new TreeSet(Base64Order.enhancedCoder),
null,
null,
- rankingProfile,
maxdist,
prefer,
ContentDomain.contentdomParser(contentdom),
@@ -213,13 +214,16 @@ public final class search {
authorhash,
DigestURI.TLD_any_zone_filter,
client,
- false);
+ false,
+ indexSegment,
+ rankingProfile
+ );
theQuery.domType = QueryParams.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
final long timer = System.currentTimeMillis();
//final Map>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls));
- final HashMap> incc = sb.indexSegments.termIndex(Segments.Process.PUBLIC).searchConjunction(theQuery.queryHashes, QueryParams.hashes2StringSet(urls));
+ final HashMap> incc = indexSegment.termIndex().searchConjunction(theQuery.queryHashes, QueryParams.hashes2StringSet(urls));
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.COLLECTION, incc.size(), System.currentTimeMillis() - timer), false);
if (incc != null) {
@@ -247,10 +251,9 @@ public final class search {
excludehashes,
null,
null,
- rankingProfile,
maxdist,
- prefer,
- ContentDomain.contentdomParser(contentdom),
+ prefer,
+ ContentDomain.contentdomParser(contentdom),
language,
"", // no navigation
false,
@@ -265,13 +268,16 @@ public final class search {
authorhash,
DigestURI.TLD_any_zone_filter,
client,
- false);
+ false,
+ sb.indexSegments.segment(Segments.Process.PUBLIC),
+ rankingProfile
+ );
theQuery.domType = QueryParams.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
RSSFeed.channels(RSSFeed.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));
// make event
- theSearch = SearchEventCache.getEvent(theQuery, sb.indexSegments.segment(Segments.Process.PUBLIC), sb.peers, sb.crawlResults, null, true);
+ theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, null, true);
// set statistic details of search result and find best result index set
if (theSearch.getRankingResult().getLocalResourceSize() == 0) {
diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html
index bca85169c..c6ca02d94 100644
--- a/htroot/yacysearch.html
+++ b/htroot/yacysearch.html
@@ -165,6 +165,7 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results"
#(resultTable)#::#(/resultTable)#
+#(pageNavBottom)#::
#[resnav]#
#(/pageNavBottom)#
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 2fbf190d3..8c74602b7 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -440,7 +440,6 @@ public class yacysearch {
Word.words2hashes(query[1]),
Word.words2hashes(query[2]),
tenant,
- ranking,
maxDistance,
prefermask,
contentdomCode,
@@ -459,7 +458,9 @@ public class yacysearch {
authorhash,
DigestURI.TLD_any_zone_filter,
client,
- authenticated);
+ authenticated,
+ indexSegment,
+ ranking);
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.INITIALIZATION, 0, 0), false);
// tell all threads to do nothing for a specific time
@@ -478,7 +479,7 @@ public class yacysearch {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0;
}
- final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, indexSegment, sb.peers, sb.crawlResults, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false);
+ final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false);
// generate result object
//serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + (System.currentTimeMillis() - timestamp) + " ms");
@@ -571,7 +572,7 @@ public class yacysearch {
final int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
prop.put("num-results_offset", offset);
- prop.put("num-results_itemscount", "0");
+ prop.put("num-results_itemscount", Formatter.number(0, true));
prop.put("num-results_itemsPerPage", itemsPerPage);
prop.put("num-results_totalcount", Formatter.number(totalcount, true));
prop.put("num-results_globalresults", (globalsearch) ? "1" : "0");
@@ -611,7 +612,10 @@ public class yacysearch {
resnav.append(QueryParams.navurl("html", thispage + 1, display, theQuery, originalUrlMask, null, navigation));
resnav.append("\">");
}
- prop.put("num-results_resnav", resnav.toString());
+ String resnavs = resnav.toString();
+ prop.put("num-results_resnav", resnavs);
+ prop.put("pageNavBottom", (totalcount - offset > 6) ? 1 : 0); // if there are more results than may fit on the page we add a navigation at the bottom
+ prop.put("pageNavBottom_resnav", resnavs);
// generate the search result lines; the content will be produced by another servlet
for (int i = 0; i < theQuery.displayResults(); i++) {
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index 3b278d77e..0afb06ac7 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -86,10 +86,11 @@ public class yacysearchitem {
final QueryParams theQuery = theSearch.getQuery();
// dynamically update count values
+ final int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
final int offset = theQuery.neededResults() - theQuery.displayResults() + 1;
prop.put("offset", offset);
- prop.put("itemscount", (item < 0) ? theQuery.neededResults() : item + 1);
- prop.put("totalcount", Formatter.number(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), true));
+ prop.put("itemscount", Formatter.number(Math.min((item < 0) ? theQuery.neededResults() : item + 1, totalcount)));
+ prop.put("totalcount", Formatter.number(totalcount, true));
prop.put("localResourceSize", Formatter.number(theSearch.getRankingResult().getLocalResourceSize(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));
@@ -169,7 +170,7 @@ public class yacysearchitem {
prop.putHTML("content_item_href", ms.href.toNormalform(true, false));
prop.put("content_item_code", sb.licensedURLs.aquireLicense(ms.href));
prop.putHTML("content_item_name", shorten(ms.name, namelength));
- prop.put("content_item_mime", ms.mime);
+ prop.put("content_item_mimetype", ms.mime);
prop.put("content_item_fileSize", ms.fileSize);
prop.put("content_item_width", ms.width);
prop.put("content_item_height", ms.height);
diff --git a/htroot/yacysearchitem.xml b/htroot/yacysearchitem.xml
index 863b82342..2280f0844 100644
--- a/htroot/yacysearchitem.xml
+++ b/htroot/yacysearchitem.xml
@@ -9,7 +9,8 @@
#[path]##[file]##[urlhash]#
-::#(item)#::
+::
+#(item)#::#[name]#
#[source]#
@@ -18,32 +19,32 @@
#[sourcedom]#
+ url="#[href]#"
+ fileSize="#[fileSize]#"
+ type="#[mimetype]#"
+ medium="image"
+ isDefault="true"
+ expression="full"
+ height="#[width]#"
+ width="#[height]#" />
+ url="#[hrefCache]#"
+ fileSize="#[fileSize]#"
+ type="#[mimetype]#"
+ medium="image"
+ isDefault="false"
+ expression="full"
+ height="#[width]#"
+ width="#[height]#" />
+ url="/ViewImage.png?maxwidth=96&maxheight=96&code=#[code]#"
+ fileSize="#[fileSize]#"
+ type="#[mimetype]#"
+ medium="image"
+ isDefault="false"
+ expression="sample"
+ height="96"
+ width="96" />
#(/item)#::
#(/content)#
\ No newline at end of file
diff --git a/source/de/anomic/search/DocumentIndex.java b/source/de/anomic/search/DocumentIndex.java
index 831d76f81..f2d31d977 100644
--- a/source/de/anomic/search/DocumentIndex.java
+++ b/source/de/anomic/search/DocumentIndex.java
@@ -194,15 +194,13 @@ public class DocumentIndex extends Segment {
public static final ArrayList findMetadata(
final String querystring,
final Segment indexSegment) {
- QueryParams query = new QueryParams(querystring, 100, textRankingDefault, null);
- return findMetadata(query, indexSegment);
+ QueryParams query = new QueryParams(querystring, 100, null, indexSegment, textRankingDefault);
+ return findMetadata(query);
}
- public static final ArrayList findMetadata(
- final QueryParams query,
- final Segment indexSegment) {
+ public static final ArrayList findMetadata(final QueryParams query) {
- RankingProcess rankedCache = new RankingProcess(indexSegment, query, 1000, 2);
+ RankingProcess rankedCache = new RankingProcess(query, 1000, 2);
rankedCache.run();
ArrayList result = new ArrayList();
diff --git a/source/de/anomic/search/MediaSnippet.java b/source/de/anomic/search/MediaSnippet.java
index 4b482399d..04aa0b388 100644
--- a/source/de/anomic/search/MediaSnippet.java
+++ b/source/de/anomic/search/MediaSnippet.java
@@ -71,6 +71,7 @@ public class MediaSnippet implements Comparable, Comparator(Base64Order.enhancedCoder);
@@ -124,6 +127,8 @@ public final class QueryParams {
this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = false;
this.navigators = "all";
+ this.order = new ReferenceOrder(this.ranking, this.targetlang);
+ this.indexSegment = indexSegment;
}
public QueryParams(
@@ -131,7 +136,6 @@ public final class QueryParams {
final TreeSet excludeHashes,
final TreeSet fullqueryHashes,
final String tenant,
- final RankingProfile ranking,
final int maxDistance, final String prefer, final ContentDomain contentdom,
final String language,
final String navigators,
@@ -143,7 +147,9 @@ public final class QueryParams {
final String authorhash,
final int domainzone,
final String host,
- final boolean specialRights) {
+ final boolean specialRights,
+ final Segment indexSegment,
+ final RankingProfile ranking) {
this.queryString = queryString;
this.queryHashes = queryHashes;
this.excludeHashes = excludeHashes;
@@ -171,6 +177,16 @@ public final class QueryParams {
this.remotepeer = null;
this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = specialRights;
+ this.order = new ReferenceOrder(this.ranking, this.targetlang);
+ this.indexSegment = indexSegment;
+ }
+
+ public ReferenceOrder getOrder() {
+ return this.order;
+ }
+
+ public Segment getSegment() {
+ return this.indexSegment;
}
public int neededResults() {
diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java
index acdfc415d..8d7bb944a 100644
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@@ -65,10 +65,8 @@ public final class RankingProcess extends Thread {
private static boolean useYBR = true;
private static final int maxDoubleDomAll = 20, maxDoubleDomSpecial = 10000;
- private final Segment indexSegment;
private final QueryParams query;
private final int maxentries;
- private final ReferenceOrder order;
private final ConcurrentHashMap urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter
private final TreeSet misses; // contains url-hashes that could not been found in the LURL-DB
@@ -86,11 +84,7 @@ public final class RankingProcess extends Thread {
private final ConcurrentHashMap authorNavigator;
- public RankingProcess(
- final Segment indexSegment,
- final QueryParams query,
- final int maxentries,
- final int concurrency) {
+ public RankingProcess(final QueryParams query, final int maxentries, final int concurrency) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
@@ -98,7 +92,6 @@ public final class RankingProcess extends Thread {
this.stack = new SortStack(maxentries);
this.doubleDomCache = new HashMap>();
this.handover = new HashSet();
- this.order = (query == null) ? null : new ReferenceOrder(query.ranking, query.targetlang);
this.query = query;
this.maxentries = maxentries;
this.remote_peerCount = 0;
@@ -107,7 +100,6 @@ public final class RankingProcess extends Thread {
this.local_resourceSize = 0;
this.urlhashes = new ConcurrentHashMap(0, 0.75f, concurrency);
this.misses = new TreeSet();
- this.indexSegment = indexSegment;
this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
this.hostNavigator = new ConcurrentHashMap();
@@ -119,6 +111,10 @@ public final class RankingProcess extends Thread {
assert this.feeders >= 1;
}
+ public QueryParams getQuery() {
+ return this.query;
+ }
+
public void run() {
// do a search
@@ -126,7 +122,7 @@ public final class RankingProcess extends Thread {
// so following sortings together with the global results will be fast
try {
long timer = System.currentTimeMillis();
- final TermSearch search = this.indexSegment.termIndex().query(
+ final TermSearch search = this.query.getSegment().termIndex().query(
query.queryHashes,
query.excludeHashes,
null,
@@ -146,14 +142,6 @@ public final class RankingProcess extends Thread {
oneFeederTerminated();
}
- public long ranking(final WordReferenceVars word) {
- return order.cardinal(word);
- }
-
- public int[] zones() {
- return this.domZones;
- }
-
public void add(final ReferenceContainer index, final boolean local, final int fullResource) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
@@ -170,7 +158,7 @@ public final class RankingProcess extends Thread {
long timer = System.currentTimeMillis();
// normalize entries
- final BlockingQueue decodedEntries = this.order.normalizeWith(index);
+ final BlockingQueue decodedEntries = this.query.getOrder().normalizeWith(index);
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false);
// iterate over normalized entries and select some that are better than currently stored
@@ -244,7 +232,7 @@ public final class RankingProcess extends Thread {
for (WordReferenceVars fEntry: filteredEntries) {
// kick out entries that are too bad according to current findings
- r = Long.valueOf(order.cardinal(fEntry));
+ r = Long.valueOf(this.query.getOrder().cardinal(fEntry));
assert maxentries != 0;
if ((maxentries >= 0) && (stack.size() >= maxentries) && (stack.bottom(r.longValue()))) continue;
@@ -367,6 +355,15 @@ public final class RankingProcess extends Thread {
return bestEntry;
}
+ /**
+ * get one metadata entry from the ranked results. This will be the 'best' entry so far
+ * according to the applied ranking. If there are no more entries left or the timeout
+ * limit is reached then null is returned. The caller may distinguish the timeout case
+ * from the case where there will be no more also in the future by calling this.feedingIsFinished()
+ * @param skipDoubleDom should be true if it is wanted that double domain entries are skipped
+ * @param timeout the time this method may take for a result computation
+ * @return a metadata entry for a url
+ */
public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) {
// returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + timeout;
@@ -377,7 +374,7 @@ public final class RankingProcess extends Thread {
try {Thread.sleep(50);} catch (final InterruptedException e1) {}
continue;
}
- final URIMetadataRow page = indexSegment.urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue());
+ final URIMetadataRow page = this.query.getSegment().urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue());
if (page == null) {
misses.add(obrwi.element.metadataHash());
continue;
@@ -412,7 +409,7 @@ public final class RankingProcess extends Thread {
(query.constraint.get(Condenser.flag_cat_indexof)) &&
(!(pagetitle.startsWith("index of")))) {
final Iterator wi = query.queryHashes.iterator();
- while (wi.hasNext()) try { indexSegment.termIndex().remove(wi.next(), page.hash()); } catch (IOException e) {}
+ while (wi.hasNext()) try { this.query.getSegment().termIndex().remove(wi.next(), page.hash()); } catch (IOException e) {}
continue;
}
@@ -564,7 +561,7 @@ public final class RankingProcess extends Thread {
DigestURI url;
String hostname;
for (int i = 0; i < rc; i++) {
- mr = indexSegment.urlMetadata().load(hsa[i].hashsample, null, 0);
+ mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0);
if (mr == null) continue;
url = mr.metadata().url();
if (url == null) continue;
@@ -655,10 +652,6 @@ public final class RankingProcess extends Thread {
return result;
}
- public ReferenceOrder getOrder() {
- return this.order;
- }
-
public static void loadYBR(final File rankingPath, final int count) {
// load ranking tables
if (rankingPath.exists()) {
diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java
index 0a5f3dab1..cad90cd7c 100644
--- a/source/de/anomic/search/ResultFetcher.java
+++ b/source/de/anomic/search/ResultFetcher.java
@@ -54,7 +54,6 @@ public class ResultFetcher {
// input values
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
QueryParams query;
- private final Segment indexSegment;
private final yacySeedDB peers;
// result values
@@ -71,13 +70,11 @@ public class ResultFetcher {
public ResultFetcher(
RankingProcess rankedCache,
final QueryParams query,
- final Segment indexSegment,
final yacySeedDB peers,
final int taketimeout) {
this.rankedCache = rankedCache;
this.query = query;
- this.indexSegment = indexSegment;
this.peers = peers;
this.taketimeout = taketimeout;
@@ -121,7 +118,6 @@ public class ResultFetcher {
return false;
}
-
public long getURLRetrievalTime() {
return this.urlRetrievalAllTime;
}
@@ -166,7 +162,7 @@ public class ResultFetcher {
if (page == null) break;
if (failedURLs.get(page.hash()) != null) continue;
- final ResultEntry resultEntry = fetchSnippet(page, snippetMode);
+ final ResultEntry resultEntry = fetchSnippet(page, snippetMode); // does not fetch snippets if snippetMode == 0
if (resultEntry == null) continue; // the entry had some problems, cannot be used
if (result.exists(resultEntry)) continue;
@@ -177,7 +173,7 @@ public class ResultFetcher {
// place the result to the result vector
// apply post-ranking
- long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word()));
+ long ranking = Long.valueOf(query.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, rankedCache.getTopics());
//System.out.println("*** resultEntry.hash = " + resultEntry.hash());
result.push(resultEntry, ranking);
@@ -209,7 +205,7 @@ public class ResultFetcher {
final long dbRetrievalTime = System.currentTimeMillis() - startTime;
if (snippetMode == 0) {
- return new ResultEntry(page, indexSegment, peers, null, null, dbRetrievalTime, 0); // result without snippet
+ return new ResultEntry(page, query.getSegment(), peers, null, null, dbRetrievalTime, 0); // result without snippet
}
// load snippet
@@ -222,17 +218,17 @@ public class ResultFetcher {
if (snippet.getErrorCode() < 11) {
// we loaded the file and found the snippet
- return new ResultEntry(page, indexSegment, peers, snippet, null, dbRetrievalTime, snippetComputationTime); // result with snippet attached
+ return new ResultEntry(page, query.getSegment(), peers, snippet, null, dbRetrievalTime, snippetComputationTime); // result with snippet attached
} else if (snippetMode == 1) {
// we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result
// this may happen during a remote search, because snippet loading is omitted to retrieve results faster
- return new ResultEntry(page, indexSegment, peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet
+ return new ResultEntry(page, query.getSegment(), peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet
} else {
// problems with snippet fetch
registerFailure(page.hash(), "no text snippet for URL " + metadata.url());
if (!peers.mySeed().isVirgin())
try {
- TextSnippet.failConsequences(this.indexSegment, page.word(), snippet, query.id(false));
+ TextSnippet.failConsequences(query.getSegment(), page.word(), snippet, query.id(false));
} catch (IOException e) {
Log.logException(e);
}
@@ -247,9 +243,9 @@ public class ResultFetcher {
if ((mediaSnippets != null) && (mediaSnippets.size() > 0)) {
// found media snippets, return entry
- return new ResultEntry(page, indexSegment, peers, null, mediaSnippets, dbRetrievalTime, snippetComputationTime);
+ return new ResultEntry(page, query.getSegment(), peers, null, mediaSnippets, dbRetrievalTime, snippetComputationTime);
} else if (snippetMode == 1) {
- return new ResultEntry(page, indexSegment, peers, null, null, dbRetrievalTime, snippetComputationTime);
+ return new ResultEntry(page, query.getSegment(), peers, null, null, dbRetrievalTime, snippetComputationTime);
} else {
// problems with snippet fetch
registerFailure(page.hash(), "no media snippet for URL " + metadata.url());
diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java
index d85f4291a..eb1c86e88 100644
--- a/source/de/anomic/search/SearchEvent.java
+++ b/source/de/anomic/search/SearchEvent.java
@@ -64,7 +64,6 @@ public final class SearchEvent {
// class variables that may be implemented with an abstract class
private long eventTime;
private QueryParams query;
- private final Segment indexSegment;
private final yacySeedDB peers;
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher results;
@@ -82,13 +81,11 @@ public final class SearchEvent {
private byte[] IAmaxcounthash, IAneardhthash;
@SuppressWarnings("unchecked") SearchEvent(final QueryParams query,
- final Segment indexSegment,
final yacySeedDB peers,
final ResultURLs crawlResults,
final TreeMap preselectedPeerHashes,
final boolean generateAbstracts) {
this.eventTime = System.currentTimeMillis(); // for lifetime check
- this.indexSegment = indexSegment;
this.peers = peers;
this.crawlResults = crawlResults;
this.query = query;
@@ -109,7 +106,7 @@ public final class SearchEvent {
// initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads
- this.rankedCache = new RankingProcess(indexSegment, query, max_results_preparation, fetchpeers + 1);
+ this.rankedCache = new RankingProcess(query, max_results_preparation, fetchpeers + 1);
// start a local search concurrently
this.rankedCache.start();
@@ -128,7 +125,7 @@ public final class SearchEvent {
query.authorhash == null ? "" : query.authorhash,
query.displayResults(),
query.maxDistance,
- indexSegment,
+ query.getSegment(),
peers,
crawlResults,
rankedCache,
@@ -149,10 +146,10 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
- this.results = new ResultFetcher(rankedCache, query, indexSegment, peers, 10000);
+ this.results = new ResultFetcher(rankedCache, query, peers, 10000);
} else {
// do a local search
- this.rankedCache = new RankingProcess(indexSegment, query, max_results_preparation, 2);
+ this.rankedCache = new RankingProcess(query, max_results_preparation, 2);
this.rankedCache.run();
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
@@ -184,7 +181,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
- this.results = new ResultFetcher(rankedCache, query, indexSegment, peers, 10);
+ this.results = new ResultFetcher(rankedCache, query, peers, 10);
}
// clean up events
@@ -223,7 +220,7 @@ public final class SearchEvent {
final Iterator j = removeWords.iterator();
// remove the same url hashes for multiple words
while (j.hasNext()) {
- this.indexSegment.termIndex().remove(j.next(), this.results.failedURLs.keySet());
+ this.query.getSegment().termIndex().remove(j.next(), this.results.failedURLs.keySet());
}
} catch (IOException e) {
Log.logException(e);
@@ -376,7 +373,7 @@ public final class SearchEvent {
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
- words, "", urls, indexSegment, peers, crawlResults, this.rankedCache, peer, Switchboard.urlBlacklist,
+ words, "", urls, this.query.getSegment(), peers, crawlResults, this.rankedCache, peer, Switchboard.urlBlacklist,
query.ranking, query.constraint, preselectedPeerHashes);
}
diff --git a/source/de/anomic/search/SearchEventCache.java b/source/de/anomic/search/SearchEventCache.java
index 6b7e9f77a..e3ae76287 100644
--- a/source/de/anomic/search/SearchEventCache.java
+++ b/source/de/anomic/search/SearchEventCache.java
@@ -66,7 +66,6 @@ public class SearchEventCache {
public static SearchEvent getEvent(
final QueryParams query,
- final Segment indexSegment,
final yacySeedDB peers,
final ResultURLs crawlResults,
final TreeMap preselectedPeerHashes,
@@ -90,7 +89,7 @@ public class SearchEventCache {
}
if (event == null) {
// start a new event
- event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
+ event = new SearchEvent(query, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
}
return event;
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 42b3ed526..e0432952f 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -603,7 +603,7 @@ public final class Switchboard extends serverSwitch {
int indexerThreads = Math.max(1, WorkflowProcessor.useCPU / 2);
this.indexingStorageProcessor = new WorkflowProcessor(
"storeDocumentIndex",
- "This is the sequencing step of the indexing queue: no concurrency is wanted here, because the access of the indexer works better if it is not concurrent. Files are written as streams, councurrency would destroy IO performance. In this process the words are written to the RWI cache, which flushes if it is full.",
+ "This is the sequencing step of the indexing queue. Files are written as streams, too much councurrency would destroy IO performance. In this process the words are written to the RWI cache, which flushes if it is full.",
new String[]{"RWI/Cache/Collections"},
this, "storeDocumentIndex", WorkflowProcessor.useCPU + 40, null, indexerThreads);
this.indexingAnalysisProcessor = new WorkflowProcessor(
diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java
index 1666f1a4c..a59c73552 100644
--- a/source/net/yacy/document/Condenser.java
+++ b/source/net/yacy/document/Condenser.java
@@ -61,6 +61,7 @@ import net.yacy.kelondro.util.SetTools;
public final class Condenser {
// this is the page analysis class
+ final static boolean pseudostemming = false; // switch for removal of words that appear in shortened form
// category flags that show how the page can be distinguished in different interest groups
public static final int flag_cat_indexof = 0; // a directory listing page (i.e. containing 'index of')
@@ -110,7 +111,7 @@ public final class Condenser {
) throws UnsupportedEncodingException {
// if addMedia == true, then all the media links are also parsed and added to the words
// added media words are flagged with the appropriate media flag
- this.wordminsize = 3;
+ this.wordminsize = 2;
this.wordcut = 2;
this.words = new HashMap();
this.RESULT_FLAGS = new Bitfield(4);
@@ -408,39 +409,41 @@ public final class Condenser {
}
}
- Map.Entry entry;
- // we search for similar words and reorganize the corresponding sentences
- // a word is similar, if a shortened version is equal
- final Iterator> wi = words.entrySet().iterator(); // enumerates the keys in descending order
- wordsearch: while (wi.hasNext()) {
- entry = wi.next();
- word = entry.getKey();
- wordlen = word.length();
- wsp = entry.getValue();
- for (int i = wordcut; i > 0; i--) {
- if (wordlen > i) {
- k = word.substring(0, wordlen - i);
- if (words.containsKey(k)) {
- // we will delete the word 'word' and repoint the
- // corresponding links
- // in sentences that use this word
- wsp1 = words.get(k);
- final Iterator it1 = wsp.phrases(); // we iterate over all sentences that refer to this word
- while (it1.hasNext()) {
- idx = it1.next().intValue(); // number of a sentence
- s = (String[]) orderedSentences[idx];
- for (int j = 2; j < s.length; j++) {
- if (s[j].equals(intString(wsp.posInText, numlength)))
- s[j] = intString(wsp1.posInText, numlength);
+ if (pseudostemming) {
+ Map.Entry entry;
+ // we search for similar words and reorganize the corresponding sentences
+ // a word is similar, if a shortened version is equal
+ final Iterator> wi = words.entrySet().iterator(); // enumerates the keys in descending order
+ wordsearch: while (wi.hasNext()) {
+ entry = wi.next();
+ word = entry.getKey();
+ wordlen = word.length();
+ wsp = entry.getValue();
+ for (int i = wordcut; i > 0; i--) {
+ if (wordlen > i) {
+ k = word.substring(0, wordlen - i);
+ if (words.containsKey(k)) {
+ // we will delete the word 'word' and repoint the
+ // corresponding links
+ // in sentences that use this word
+ wsp1 = words.get(k);
+ final Iterator it1 = wsp.phrases(); // we iterate over all sentences that refer to this word
+ while (it1.hasNext()) {
+ idx = it1.next().intValue(); // number of a sentence
+ s = (String[]) orderedSentences[idx];
+ for (int j = 2; j < s.length; j++) {
+ if (s[j].equals(intString(wsp.posInText, numlength)))
+ s[j] = intString(wsp1.posInText, numlength);
+ }
+ orderedSentences[idx] = s;
}
- orderedSentences[idx] = s;
+ // update word counter
+ wsp1.count = wsp1.count + wsp.count;
+ words.put(k, wsp1);
+ // remove current word
+ wi.remove();
+ continue wordsearch;
}
- // update word counter
- wsp1.count = wsp1.count + wsp.count;
- words.put(k, wsp1);
- // remove current word
- wi.remove();
- continue wordsearch;
}
}
}
diff --git a/source/net/yacy/kelondro/util/SortStack.java b/source/net/yacy/kelondro/util/SortStack.java
index 259d756de..2428db8f4 100644
--- a/source/net/yacy/kelondro/util/SortStack.java
+++ b/source/net/yacy/kelondro/util/SortStack.java
@@ -44,6 +44,10 @@ public class SortStack {
private ConcurrentHashMap instack; // keeps track which element has been on the stack
protected int maxsize;
+ public SortStack() {
+ this(-1);
+ }
+
public SortStack(final int maxsize) {
// the maxsize is the maximum number of entries in the stack
// if this is set to -1, the size is unlimited
diff --git a/source/net/yacy/kelondro/workflow/BlockingThread.java b/source/net/yacy/kelondro/workflow/BlockingThread.java
index b86835549..beb4fa000 100644
--- a/source/net/yacy/kelondro/workflow/BlockingThread.java
+++ b/source/net/yacy/kelondro/workflow/BlockingThread.java
@@ -28,6 +28,8 @@ package net.yacy.kelondro.workflow;
public interface BlockingThread extends WorkflowThread {
public void setManager(WorkflowProcessor queue);
+
+
public WorkflowProcessor getManager();
public J job(J next) throws Exception;
diff --git a/source/net/yacy/kelondro/workflow/InstantBlockingThread.java b/source/net/yacy/kelondro/workflow/InstantBlockingThread.java
index e72e2a096..a17969ef5 100644
--- a/source/net/yacy/kelondro/workflow/InstantBlockingThread.java
+++ b/source/net/yacy/kelondro/workflow/InstantBlockingThread.java
@@ -55,6 +55,20 @@ public class InstantBlockingThread extends AbstractBlocki
this.handle = Long.valueOf(System.currentTimeMillis() + this.getName().hashCode());
}
+ public InstantBlockingThread(final Object env, final Method jobExecMethod, final WorkflowProcessor manager) {
+ // jobExec is the name of a method of the object 'env' that executes the one-step-run
+ // jobCount is the name of a method that returns the size of the job
+
+ // set the manager of blocking queues for input and output
+ this.setManager(manager);
+
+ // define execution class
+ this.jobExecMethod = jobExecMethod;
+ this.environment = (env instanceof Class>) ? null : env;
+ this.setName(jobExecMethod.getClass().getName() + "." + jobExecMethod.getName() + "." + handleCounter++);
+ this.handle = Long.valueOf(System.currentTimeMillis() + this.getName().hashCode());
+ }
+
protected static Method execMethod(final Object env, final String jobExec) {
final Class> theClass = (env instanceof Class>) ? (Class>) env : env.getClass();
try {
diff --git a/source/net/yacy/kelondro/workflow/WorkflowJob.java b/source/net/yacy/kelondro/workflow/WorkflowJob.java
index 366089a99..6d2b7cf58 100644
--- a/source/net/yacy/kelondro/workflow/WorkflowJob.java
+++ b/source/net/yacy/kelondro/workflow/WorkflowJob.java
@@ -1,4 +1,4 @@
-// serverProcessor.java
+// WorkflowJob.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 29.02.2008 on http://yacy.net
//
diff --git a/source/net/yacy/kelondro/workflow/WorkflowProcessor.java b/source/net/yacy/kelondro/workflow/WorkflowProcessor.java
index 12efd3769..67d66e2ee 100644
--- a/source/net/yacy/kelondro/workflow/WorkflowProcessor.java
+++ b/source/net/yacy/kelondro/workflow/WorkflowProcessor.java
@@ -56,7 +56,8 @@ public class WorkflowProcessor {
public WorkflowProcessor(
String name, String description, String[] childnames,
- final Object env, final String jobExecMethod, final int inputQueueSize, final WorkflowProcessor output, final int poolsize) {
+ final Object env, final String jobExecMethod,
+ final int inputQueueSize, final WorkflowProcessor output, final int poolsize) {
// start a fixed number of executors that handle entries in the process queue
this.environment = env;
this.processName = name;
@@ -135,7 +136,7 @@ public class WorkflowProcessor {
Log.logWarning("PROCESSOR", "executing job " + environment.getClass().getName() + "." + methodName + " serialized");
try {
final J out = (J) InstantBlockingThread.execMethod(this.environment, this.methodName).invoke(environment, new Object[]{in});
- if ((out != null) && (output != null)) output.enQueue(out);
+ if (out != null && this.output != null) this.output.enQueue(out);
} catch (final IllegalArgumentException e) {
Log.logException(e);
} catch (final IllegalAccessException e) {