- some refactoring in workflow

- some refactoring in search process
- fixed image search for json and rss output
- search navigation on bottom of search result page in cases where there are more than 6 results on page
- fixes for number of displayed documents
- disabled pseudostemming

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6504 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 969123385b
commit 491ba6a1ba

@ -47,9 +47,9 @@
<dt><label for="adminuser">Peer User:</label></dt> <dt><label for="adminuser">Peer User:</label></dt>
<dd><input type="text" name="adminuser" id="adminuser" value="#[defaultUser]#" size="16" maxlength="32" /></dd> <dd><input type="text" name="adminuser" id="adminuser" value="#[defaultUser]#" size="16" maxlength="32" /></dd>
<dt><label for="adminpw1">New Peer Password:</label></dt> <dt><label for="adminpw1">New Peer Password:</label></dt>
<dd><input type="password" name="adminpw1" id="adminpw1" value="" size="16" maxlength="32" /></dd> <dd><input type="password" name="adminpw1" id="adminpw1" value="" size="16" maxlength="1024" /></dd>
<dt><label for="adminpw2">Repeat Peer Password:</label></dt> <dt><label for="adminpw2">Repeat Peer Password:</label></dt>
<dd><input type="password" name="adminpw2" id="adminpw2" value="" size="16" maxlength="32" /></dd> <dd><input type="password" name="adminpw2" id="adminpw2" value="" size="16" maxlength="1024" /></dd>
</dl> </dl>
</fieldset> </fieldset>
<input type="submit" name="setAdmin" value="Define Administrator" /> <input type="submit" name="setAdmin" value="Define Administrator" />

@ -407,7 +407,7 @@ public class IndexControlRWIs_p {
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getQuery().getOrder() == null) ? -1 : ranked.getQuery().getOrder().authority(entry.hash()));
prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified()))); prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());
@ -502,8 +502,8 @@ public class IndexControlRWIs_p {
} }
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) { public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
final QueryParams query = new QueryParams(new String(keyhash), -1, sb.getRanking(), filter); final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking());
final RankingProcess ranked = new RankingProcess(segment, query, Integer.MAX_VALUE, 1); final RankingProcess ranked = new RankingProcess(query, Integer.MAX_VALUE, 1);
ranked.run(); ranked.run();
if (ranked.filteredCount() == 0) { if (ranked.filteredCount() == 0) {

@ -371,7 +371,6 @@ div.yacylogo {
float:left; float:left;
} }
/*---------- /*----------
<h1>, <h2>, <h3>, <h4>, <h5> <h1>, <h2>, <h3>, <h4>, <h5>
*/ */

@ -53,6 +53,7 @@ import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile; import de.anomic.search.RankingProfile;
import de.anomic.search.SearchEvent; import de.anomic.search.SearchEvent;
import de.anomic.search.SearchEventCache; import de.anomic.search.SearchEventCache;
import de.anomic.search.Segment;
import de.anomic.search.Segments; import de.anomic.search.Segments;
import de.anomic.search.Switchboard; import de.anomic.search.Switchboard;
import de.anomic.search.ResultEntry; import de.anomic.search.ResultEntry;
@ -189,13 +190,13 @@ public final class search {
SearchEvent theSearch = null; SearchEvent theSearch = null;
if ((query.length() == 0) && (abstractSet != null)) { if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts // this is _not_ a normal search, only a request for index abstracts
Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
theQuery = new QueryParams( theQuery = new QueryParams(
null, null,
abstractSet, abstractSet,
new TreeSet<byte[]>(Base64Order.enhancedCoder), new TreeSet<byte[]>(Base64Order.enhancedCoder),
null, null,
null, null,
rankingProfile,
maxdist, maxdist,
prefer, prefer,
ContentDomain.contentdomParser(contentdom), ContentDomain.contentdomParser(contentdom),
@ -213,13 +214,16 @@ public final class search {
authorhash, authorhash,
DigestURI.TLD_any_zone_filter, DigestURI.TLD_any_zone_filter,
client, client,
false); false,
indexSegment,
rankingProfile
);
theQuery.domType = QueryParams.SEARCHDOM_LOCAL; theQuery.domType = QueryParams.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
final long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
//final Map<byte[], ReferenceContainer<WordReference>>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls)); //final Map<byte[], ReferenceContainer<WordReference>>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls));
final HashMap<byte[], ReferenceContainer<WordReference>> incc = sb.indexSegments.termIndex(Segments.Process.PUBLIC).searchConjunction(theQuery.queryHashes, QueryParams.hashes2StringSet(urls)); final HashMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.queryHashes, QueryParams.hashes2StringSet(urls));
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.COLLECTION, incc.size(), System.currentTimeMillis() - timer), false); MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.COLLECTION, incc.size(), System.currentTimeMillis() - timer), false);
if (incc != null) { if (incc != null) {
@ -247,10 +251,9 @@ public final class search {
excludehashes, excludehashes,
null, null,
null, null,
rankingProfile,
maxdist, maxdist,
prefer, prefer,
ContentDomain.contentdomParser(contentdom), ContentDomain.contentdomParser(contentdom),
language, language,
"", // no navigation "", // no navigation
false, false,
@ -265,13 +268,16 @@ public final class search {
authorhash, authorhash,
DigestURI.TLD_any_zone_filter, DigestURI.TLD_any_zone_filter,
client, client,
false); false,
sb.indexSegments.segment(Segments.Process.PUBLIC),
rankingProfile
);
theQuery.domType = QueryParams.SEARCHDOM_LOCAL; theQuery.domType = QueryParams.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
RSSFeed.channels(RSSFeed.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), "")); RSSFeed.channels(RSSFeed.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));
// make event // make event
theSearch = SearchEventCache.getEvent(theQuery, sb.indexSegments.segment(Segments.Process.PUBLIC), sb.peers, sb.crawlResults, null, true); theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, null, true);
// set statistic details of search result and find best result index set // set statistic details of search result and find best result index set
if (theSearch.getRankingResult().getLocalResourceSize() == 0) { if (theSearch.getRankingResult().getLocalResourceSize() == 0) {

@ -165,6 +165,7 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results"
#(resultTable)#::</table>#(/resultTable)# #(resultTable)#::</table>#(/resultTable)#
<!-- linklist end --> <!-- linklist end -->
<!-- attach the bottomline --> <!-- attach the bottomline -->
#(pageNavBottom)#::<div id="pageNavBottom" align="center">#[resnav]#</div>#(/pageNavBottom)#
</div> </div>
<div style="width=220px;"> <div style="width=220px;">
<!--#include virtual="yacysearchtrailer.html?eventID=#[eventID]#&display=#[display]#" --> <!--#include virtual="yacysearchtrailer.html?eventID=#[eventID]#&display=#[display]#" -->

@ -440,7 +440,6 @@ public class yacysearch {
Word.words2hashes(query[1]), Word.words2hashes(query[1]),
Word.words2hashes(query[2]), Word.words2hashes(query[2]),
tenant, tenant,
ranking,
maxDistance, maxDistance,
prefermask, prefermask,
contentdomCode, contentdomCode,
@ -459,7 +458,9 @@ public class yacysearch {
authorhash, authorhash,
DigestURI.TLD_any_zone_filter, DigestURI.TLD_any_zone_filter,
client, client,
authenticated); authenticated,
indexSegment,
ranking);
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.INITIALIZATION, 0, 0), false); MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.INITIALIZATION, 0, 0), false);
// tell all threads to do nothing for a specific time // tell all threads to do nothing for a specific time
@ -478,7 +479,7 @@ public class yacysearch {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0; offset = 0;
} }
final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, indexSegment, sb.peers, sb.crawlResults, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false); final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.crawlResults, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false);
// generate result object // generate result object
//serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + (System.currentTimeMillis() - timestamp) + " ms"); //serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + (System.currentTimeMillis() - timestamp) + " ms");
@ -571,7 +572,7 @@ public class yacysearch {
final int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); final int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
prop.put("num-results_offset", offset); prop.put("num-results_offset", offset);
prop.put("num-results_itemscount", "0"); prop.put("num-results_itemscount", Formatter.number(0, true));
prop.put("num-results_itemsPerPage", itemsPerPage); prop.put("num-results_itemsPerPage", itemsPerPage);
prop.put("num-results_totalcount", Formatter.number(totalcount, true)); prop.put("num-results_totalcount", Formatter.number(totalcount, true));
prop.put("num-results_globalresults", (globalsearch) ? "1" : "0"); prop.put("num-results_globalresults", (globalsearch) ? "1" : "0");
@ -611,7 +612,10 @@ public class yacysearch {
resnav.append(QueryParams.navurl("html", thispage + 1, display, theQuery, originalUrlMask, null, navigation)); resnav.append(QueryParams.navurl("html", thispage + 1, display, theQuery, originalUrlMask, null, navigation));
resnav.append("\"><img src=\"env/grafics/navdr.gif\" width=\"16\" height=\"16\"></a>"); resnav.append("\"><img src=\"env/grafics/navdr.gif\" width=\"16\" height=\"16\"></a>");
} }
prop.put("num-results_resnav", resnav.toString()); String resnavs = resnav.toString();
prop.put("num-results_resnav", resnavs);
prop.put("pageNavBottom", (totalcount - offset > 6) ? 1 : 0); // if there are more results than may fit on the page we add a navigation at the bottom
prop.put("pageNavBottom_resnav", resnavs);
// generate the search result lines; the content will be produced by another servlet // generate the search result lines; the content will be produced by another servlet
for (int i = 0; i < theQuery.displayResults(); i++) { for (int i = 0; i < theQuery.displayResults(); i++) {

@ -86,10 +86,11 @@ public class yacysearchitem {
final QueryParams theQuery = theSearch.getQuery(); final QueryParams theQuery = theSearch.getQuery();
// dynamically update count values // dynamically update count values
final int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
final int offset = theQuery.neededResults() - theQuery.displayResults() + 1; final int offset = theQuery.neededResults() - theQuery.displayResults() + 1;
prop.put("offset", offset); prop.put("offset", offset);
prop.put("itemscount", (item < 0) ? theQuery.neededResults() : item + 1); prop.put("itemscount", Formatter.number(Math.min((item < 0) ? theQuery.neededResults() : item + 1, totalcount)));
prop.put("totalcount", Formatter.number(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), true)); prop.put("totalcount", Formatter.number(totalcount, true));
prop.put("localResourceSize", Formatter.number(theSearch.getRankingResult().getLocalResourceSize(), true)); prop.put("localResourceSize", Formatter.number(theSearch.getRankingResult().getLocalResourceSize(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true)); prop.put("remoteResourceSize", Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true)); prop.put("remoteIndexCount", Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));
@ -169,7 +170,7 @@ public class yacysearchitem {
prop.putHTML("content_item_href", ms.href.toNormalform(true, false)); prop.putHTML("content_item_href", ms.href.toNormalform(true, false));
prop.put("content_item_code", sb.licensedURLs.aquireLicense(ms.href)); prop.put("content_item_code", sb.licensedURLs.aquireLicense(ms.href));
prop.putHTML("content_item_name", shorten(ms.name, namelength)); prop.putHTML("content_item_name", shorten(ms.name, namelength));
prop.put("content_item_mime", ms.mime); prop.put("content_item_mimetype", ms.mime);
prop.put("content_item_fileSize", ms.fileSize); prop.put("content_item_fileSize", ms.fileSize);
prop.put("content_item_width", ms.width); prop.put("content_item_width", ms.width);
prop.put("content_item_height", ms.height); prop.put("content_item_height", ms.height);

@ -9,7 +9,8 @@
<yacy:path>#[path]#</yacy:path> <yacy:path>#[path]#</yacy:path>
<yacy:file>#[file]#</yacy:file> <yacy:file>#[file]#</yacy:file>
<guid isPermaLink="false">#[urlhash]#</guid> <guid isPermaLink="false">#[urlhash]#</guid>
</item>::#(item)#::<item> </item>::
#(item)#::<item>
<title>#[name]#</title> <title>#[name]#</title>
<link>#[source]#</link> <link>#[source]#</link>
<description></description> <description></description>
@ -18,32 +19,32 @@
<yacy:host>#[sourcedom]#</yacy:host> <yacy:host>#[sourcedom]#</yacy:host>
<media:group> <media:group>
<media:content <media:content
url="#[href]#" url="#[href]#"
fileSize="#[fileSize]#" fileSize="#[fileSize]#"
type="#[mime]#" type="#[mimetype]#"
medium="image" medium="image"
isDefault="true" isDefault="true"
expression="full" expression="full"
height="#[width]#" height="#[width]#"
width="#[height]#" /> width="#[height]#" />
<media:content <media:content
url="#[hrefCache]#" url="#[hrefCache]#"
fileSize="#[fileSize]#" fileSize="#[fileSize]#"
type="#[mime]#" type="#[mimetype]#"
medium="image" medium="image"
isDefault="false" isDefault="false"
expression="full" expression="full"
height="#[width]#" height="#[width]#"
width="#[height]#" /> width="#[height]#" />
<media:content <media:content
url="/ViewImage.png?maxwidth=96&amp;maxheight=96&amp;code=#[code]#" url="/ViewImage.png?maxwidth=96&amp;maxheight=96&amp;code=#[code]#"
fileSize="#[fileSize]#" fileSize="#[fileSize]#"
type="#[mime]#" type="#[mimetype]#"
medium="image" medium="image"
isDefault="false" isDefault="false"
expression="sample" expression="sample"
height="96" height="96"
width="96" /> width="96" />
</media:group> </media:group>
</item>#(/item)#:: </item>#(/item)#::
#(/content)# #(/content)#

@ -194,15 +194,13 @@ public class DocumentIndex extends Segment {
public static final ArrayList<URIMetadataRow> findMetadata( public static final ArrayList<URIMetadataRow> findMetadata(
final String querystring, final String querystring,
final Segment indexSegment) { final Segment indexSegment) {
QueryParams query = new QueryParams(querystring, 100, textRankingDefault, null); QueryParams query = new QueryParams(querystring, 100, null, indexSegment, textRankingDefault);
return findMetadata(query, indexSegment); return findMetadata(query);
} }
public static final ArrayList<URIMetadataRow> findMetadata( public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query) {
final QueryParams query,
final Segment indexSegment) {
RankingProcess rankedCache = new RankingProcess(indexSegment, query, 1000, 2); RankingProcess rankedCache = new RankingProcess(query, 1000, 2);
rankedCache.run(); rankedCache.run();
ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>(); ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>();

@ -71,6 +71,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
public MediaSnippet(final ContentDomain type, final DigestURI href, final String mime, final String name, final long fileSize, final int width, final int height, final int ranking, final DigestURI source) { public MediaSnippet(final ContentDomain type, final DigestURI href, final String mime, final String name, final long fileSize, final int width, final int height, final int ranking, final DigestURI source) {
this.type = type; this.type = type;
this.href = href; this.href = href;
this.mime = mime;
this.fileSize = fileSize; this.fileSize = fileSize;
this.source = source; // the web page where the media resource appeared this.source = source; // the web page where the media resource appeared
this.name = name; this.name = name;

@ -75,6 +75,8 @@ public final class QueryParams {
public boolean allofconstraint; public boolean allofconstraint;
public boolean onlineSnippetFetch; public boolean onlineSnippetFetch;
public RankingProfile ranking; public RankingProfile ranking;
private Segment indexSegment;
private final ReferenceOrder order;
public String host; // this is the client host that starts the query, not a site operator public String host; // this is the client host that starts the query, not a site operator
public String sitehash; // this is a domain hash, 6 bytes long or null public String sitehash; // this is a domain hash, 6 bytes long or null
public String authorhash; public String authorhash;
@ -88,8 +90,9 @@ public final class QueryParams {
public QueryParams(final String queryString, public QueryParams(final String queryString,
final int itemsPerPage, final int itemsPerPage,
final RankingProfile ranking, final Bitfield constraint,
final Bitfield constraint) { final Segment indexSegment,
final RankingProfile ranking) {
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) { if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) {
this.queryString = null; this.queryString = null;
this.queryHashes = new TreeSet<byte[]>(Base64Order.enhancedCoder); this.queryHashes = new TreeSet<byte[]>(Base64Order.enhancedCoder);
@ -124,6 +127,8 @@ public final class QueryParams {
this.handle = Long.valueOf(System.currentTimeMillis()); this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = false; this.specialRights = false;
this.navigators = "all"; this.navigators = "all";
this.order = new ReferenceOrder(this.ranking, this.targetlang);
this.indexSegment = indexSegment;
} }
public QueryParams( public QueryParams(
@ -131,7 +136,6 @@ public final class QueryParams {
final TreeSet<byte[]> excludeHashes, final TreeSet<byte[]> excludeHashes,
final TreeSet<byte[]> fullqueryHashes, final TreeSet<byte[]> fullqueryHashes,
final String tenant, final String tenant,
final RankingProfile ranking,
final int maxDistance, final String prefer, final ContentDomain contentdom, final int maxDistance, final String prefer, final ContentDomain contentdom,
final String language, final String language,
final String navigators, final String navigators,
@ -143,7 +147,9 @@ public final class QueryParams {
final String authorhash, final String authorhash,
final int domainzone, final int domainzone,
final String host, final String host,
final boolean specialRights) { final boolean specialRights,
final Segment indexSegment,
final RankingProfile ranking) {
this.queryString = queryString; this.queryString = queryString;
this.queryHashes = queryHashes; this.queryHashes = queryHashes;
this.excludeHashes = excludeHashes; this.excludeHashes = excludeHashes;
@ -171,6 +177,16 @@ public final class QueryParams {
this.remotepeer = null; this.remotepeer = null;
this.handle = Long.valueOf(System.currentTimeMillis()); this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = specialRights; this.specialRights = specialRights;
this.order = new ReferenceOrder(this.ranking, this.targetlang);
this.indexSegment = indexSegment;
}
public ReferenceOrder getOrder() {
return this.order;
}
public Segment getSegment() {
return this.indexSegment;
} }
public int neededResults() { public int neededResults() {

@ -65,10 +65,8 @@ public final class RankingProcess extends Thread {
private static boolean useYBR = true; private static boolean useYBR = true;
private static final int maxDoubleDomAll = 20, maxDoubleDomSpecial = 10000; private static final int maxDoubleDomAll = 20, maxDoubleDomSpecial = 10000;
private final Segment indexSegment;
private final QueryParams query; private final QueryParams query;
private final int maxentries; private final int maxentries;
private final ReferenceOrder order;
private final ConcurrentHashMap<String, Integer> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private final ConcurrentHashMap<String, Integer> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter private final int[] flagcount; // flag counter
private final TreeSet<String> misses; // contains url-hashes that could not been found in the LURL-DB private final TreeSet<String> misses; // contains url-hashes that could not been found in the LURL-DB
@ -86,11 +84,7 @@ public final class RankingProcess extends Thread {
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator; private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
public RankingProcess( public RankingProcess(final QueryParams query, final int maxentries, final int concurrency) {
final Segment indexSegment,
final QueryParams query,
final int maxentries,
final int concurrency) {
// we collect the urlhashes and construct a list with urlEntry objects // we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime // attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking // sortorder: 0 = hash, 1 = url, 2 = ranking
@ -98,7 +92,6 @@ public final class RankingProcess extends Thread {
this.stack = new SortStack<WordReferenceVars>(maxentries); this.stack = new SortStack<WordReferenceVars>(maxentries);
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>(); this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
this.handover = new HashSet<String>(); this.handover = new HashSet<String>();
this.order = (query == null) ? null : new ReferenceOrder(query.ranking, query.targetlang);
this.query = query; this.query = query;
this.maxentries = maxentries; this.maxentries = maxentries;
this.remote_peerCount = 0; this.remote_peerCount = 0;
@ -107,7 +100,6 @@ public final class RankingProcess extends Thread {
this.local_resourceSize = 0; this.local_resourceSize = 0;
this.urlhashes = new ConcurrentHashMap<String, Integer>(0, 0.75f, concurrency); this.urlhashes = new ConcurrentHashMap<String, Integer>(0, 0.75f, concurrency);
this.misses = new TreeSet<String>(); this.misses = new TreeSet<String>();
this.indexSegment = indexSegment;
this.flagcount = new int[32]; this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
this.hostNavigator = new ConcurrentHashMap<String, HostInfo>(); this.hostNavigator = new ConcurrentHashMap<String, HostInfo>();
@ -119,6 +111,10 @@ public final class RankingProcess extends Thread {
assert this.feeders >= 1; assert this.feeders >= 1;
} }
public QueryParams getQuery() {
return this.query;
}
public void run() { public void run() {
// do a search // do a search
@ -126,7 +122,7 @@ public final class RankingProcess extends Thread {
// so following sortings together with the global results will be fast // so following sortings together with the global results will be fast
try { try {
long timer = System.currentTimeMillis(); long timer = System.currentTimeMillis();
final TermSearch<WordReference> search = this.indexSegment.termIndex().query( final TermSearch<WordReference> search = this.query.getSegment().termIndex().query(
query.queryHashes, query.queryHashes,
query.excludeHashes, query.excludeHashes,
null, null,
@ -146,14 +142,6 @@ public final class RankingProcess extends Thread {
oneFeederTerminated(); oneFeederTerminated();
} }
public long ranking(final WordReferenceVars word) {
return order.cardinal(word);
}
public int[] zones() {
return this.domZones;
}
public void add(final ReferenceContainer<WordReference> index, final boolean local, final int fullResource) { public void add(final ReferenceContainer<WordReference> index, final boolean local, final int fullResource) {
// we collect the urlhashes and construct a list with urlEntry objects // we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime // attention: if minEntries is too high, this method will not terminate within the maxTime
@ -170,7 +158,7 @@ public final class RankingProcess extends Thread {
long timer = System.currentTimeMillis(); long timer = System.currentTimeMillis();
// normalize entries // normalize entries
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index); final BlockingQueue<WordReferenceVars> decodedEntries = this.query.getOrder().normalizeWith(index);
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false); MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false);
// iterate over normalized entries and select some that are better than currently stored // iterate over normalized entries and select some that are better than currently stored
@ -244,7 +232,7 @@ public final class RankingProcess extends Thread {
for (WordReferenceVars fEntry: filteredEntries) { for (WordReferenceVars fEntry: filteredEntries) {
// kick out entries that are too bad according to current findings // kick out entries that are too bad according to current findings
r = Long.valueOf(order.cardinal(fEntry)); r = Long.valueOf(this.query.getOrder().cardinal(fEntry));
assert maxentries != 0; assert maxentries != 0;
if ((maxentries >= 0) && (stack.size() >= maxentries) && (stack.bottom(r.longValue()))) continue; if ((maxentries >= 0) && (stack.size() >= maxentries) && (stack.bottom(r.longValue()))) continue;
@ -367,6 +355,15 @@ public final class RankingProcess extends Thread {
return bestEntry; return bestEntry;
} }
/**
* get one metadata entry from the ranked results. This will be the 'best' entry so far
* according to the applied ranking. If there are no more entries left or the timeout
* limit is reached then null is returned. The caller may distinguish the timeout case
* from the case where there will be no more also in the future by calling this.feedingIsFinished()
* @param skipDoubleDom should be true if it is wanted that double domain entries are skipped
* @param timeout the time this method may take for a result computation
* @return a metadata entry for a url
*/
public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) { public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) {
// returns from the current RWI list the best URL entry and removes this entry from the list // returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + timeout; long timeLimit = System.currentTimeMillis() + timeout;
@ -377,7 +374,7 @@ public final class RankingProcess extends Thread {
try {Thread.sleep(50);} catch (final InterruptedException e1) {} try {Thread.sleep(50);} catch (final InterruptedException e1) {}
continue; continue;
} }
final URIMetadataRow page = indexSegment.urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue()); final URIMetadataRow page = this.query.getSegment().urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue());
if (page == null) { if (page == null) {
misses.add(obrwi.element.metadataHash()); misses.add(obrwi.element.metadataHash());
continue; continue;
@ -412,7 +409,7 @@ public final class RankingProcess extends Thread {
(query.constraint.get(Condenser.flag_cat_indexof)) && (query.constraint.get(Condenser.flag_cat_indexof)) &&
(!(pagetitle.startsWith("index of")))) { (!(pagetitle.startsWith("index of")))) {
final Iterator<byte[]> wi = query.queryHashes.iterator(); final Iterator<byte[]> wi = query.queryHashes.iterator();
while (wi.hasNext()) try { indexSegment.termIndex().remove(wi.next(), page.hash()); } catch (IOException e) {} while (wi.hasNext()) try { this.query.getSegment().termIndex().remove(wi.next(), page.hash()); } catch (IOException e) {}
continue; continue;
} }
@ -564,7 +561,7 @@ public final class RankingProcess extends Thread {
DigestURI url; DigestURI url;
String hostname; String hostname;
for (int i = 0; i < rc; i++) { for (int i = 0; i < rc; i++) {
mr = indexSegment.urlMetadata().load(hsa[i].hashsample, null, 0); mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0);
if (mr == null) continue; if (mr == null) continue;
url = mr.metadata().url(); url = mr.metadata().url();
if (url == null) continue; if (url == null) continue;
@ -655,10 +652,6 @@ public final class RankingProcess extends Thread {
return result; return result;
} }
public ReferenceOrder getOrder() {
return this.order;
}
public static void loadYBR(final File rankingPath, final int count) { public static void loadYBR(final File rankingPath, final int count) {
// load ranking tables // load ranking tables
if (rankingPath.exists()) { if (rankingPath.exists()) {

@ -54,7 +54,6 @@ public class ResultFetcher {
// input values // input values
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
QueryParams query; QueryParams query;
private final Segment indexSegment;
private final yacySeedDB peers; private final yacySeedDB peers;
// result values // result values
@ -71,13 +70,11 @@ public class ResultFetcher {
public ResultFetcher( public ResultFetcher(
RankingProcess rankedCache, RankingProcess rankedCache,
final QueryParams query, final QueryParams query,
final Segment indexSegment,
final yacySeedDB peers, final yacySeedDB peers,
final int taketimeout) { final int taketimeout) {
this.rankedCache = rankedCache; this.rankedCache = rankedCache;
this.query = query; this.query = query;
this.indexSegment = indexSegment;
this.peers = peers; this.peers = peers;
this.taketimeout = taketimeout; this.taketimeout = taketimeout;
@ -121,7 +118,6 @@ public class ResultFetcher {
return false; return false;
} }
public long getURLRetrievalTime() { public long getURLRetrievalTime() {
return this.urlRetrievalAllTime; return this.urlRetrievalAllTime;
} }
@ -166,7 +162,7 @@ public class ResultFetcher {
if (page == null) break; if (page == null) break;
if (failedURLs.get(page.hash()) != null) continue; if (failedURLs.get(page.hash()) != null) continue;
final ResultEntry resultEntry = fetchSnippet(page, snippetMode); final ResultEntry resultEntry = fetchSnippet(page, snippetMode); // does not fetch snippets if snippetMode == 0
if (resultEntry == null) continue; // the entry had some problems, cannot be used if (resultEntry == null) continue; // the entry had some problems, cannot be used
if (result.exists(resultEntry)) continue; if (result.exists(resultEntry)) continue;
@ -177,7 +173,7 @@ public class ResultFetcher {
// place the result to the result vector // place the result to the result vector
// apply post-ranking // apply post-ranking
long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word())); long ranking = Long.valueOf(query.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, rankedCache.getTopics()); ranking += postRanking(resultEntry, rankedCache.getTopics());
//System.out.println("*** resultEntry.hash = " + resultEntry.hash()); //System.out.println("*** resultEntry.hash = " + resultEntry.hash());
result.push(resultEntry, ranking); result.push(resultEntry, ranking);
@ -209,7 +205,7 @@ public class ResultFetcher {
final long dbRetrievalTime = System.currentTimeMillis() - startTime; final long dbRetrievalTime = System.currentTimeMillis() - startTime;
if (snippetMode == 0) { if (snippetMode == 0) {
return new ResultEntry(page, indexSegment, peers, null, null, dbRetrievalTime, 0); // result without snippet return new ResultEntry(page, query.getSegment(), peers, null, null, dbRetrievalTime, 0); // result without snippet
} }
// load snippet // load snippet
@ -222,17 +218,17 @@ public class ResultFetcher {
if (snippet.getErrorCode() < 11) { if (snippet.getErrorCode() < 11) {
// we loaded the file and found the snippet // we loaded the file and found the snippet
return new ResultEntry(page, indexSegment, peers, snippet, null, dbRetrievalTime, snippetComputationTime); // result with snippet attached return new ResultEntry(page, query.getSegment(), peers, snippet, null, dbRetrievalTime, snippetComputationTime); // result with snippet attached
} else if (snippetMode == 1) { } else if (snippetMode == 1) {
// we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result // we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result
// this may happen during a remote search, because snippet loading is omitted to retrieve results faster // this may happen during a remote search, because snippet loading is omitted to retrieve results faster
return new ResultEntry(page, indexSegment, peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet return new ResultEntry(page, query.getSegment(), peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet
} else { } else {
// problems with snippet fetch // problems with snippet fetch
registerFailure(page.hash(), "no text snippet for URL " + metadata.url()); registerFailure(page.hash(), "no text snippet for URL " + metadata.url());
if (!peers.mySeed().isVirgin()) if (!peers.mySeed().isVirgin())
try { try {
TextSnippet.failConsequences(this.indexSegment, page.word(), snippet, query.id(false)); TextSnippet.failConsequences(query.getSegment(), page.word(), snippet, query.id(false));
} catch (IOException e) { } catch (IOException e) {
Log.logException(e); Log.logException(e);
} }
@ -247,9 +243,9 @@ public class ResultFetcher {
if ((mediaSnippets != null) && (mediaSnippets.size() > 0)) { if ((mediaSnippets != null) && (mediaSnippets.size() > 0)) {
// found media snippets, return entry // found media snippets, return entry
return new ResultEntry(page, indexSegment, peers, null, mediaSnippets, dbRetrievalTime, snippetComputationTime); return new ResultEntry(page, query.getSegment(), peers, null, mediaSnippets, dbRetrievalTime, snippetComputationTime);
} else if (snippetMode == 1) { } else if (snippetMode == 1) {
return new ResultEntry(page, indexSegment, peers, null, null, dbRetrievalTime, snippetComputationTime); return new ResultEntry(page, query.getSegment(), peers, null, null, dbRetrievalTime, snippetComputationTime);
} else { } else {
// problems with snippet fetch // problems with snippet fetch
registerFailure(page.hash(), "no media snippet for URL " + metadata.url()); registerFailure(page.hash(), "no media snippet for URL " + metadata.url());

@ -64,7 +64,6 @@ public final class SearchEvent {
// class variables that may be implemented with an abstract class // class variables that may be implemented with an abstract class
private long eventTime; private long eventTime;
private QueryParams query; private QueryParams query;
private final Segment indexSegment;
private final yacySeedDB peers; private final yacySeedDB peers;
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher results; private ResultFetcher results;
@ -82,13 +81,11 @@ public final class SearchEvent {
private byte[] IAmaxcounthash, IAneardhthash; private byte[] IAmaxcounthash, IAneardhthash;
@SuppressWarnings("unchecked") SearchEvent(final QueryParams query, @SuppressWarnings("unchecked") SearchEvent(final QueryParams query,
final Segment indexSegment,
final yacySeedDB peers, final yacySeedDB peers,
final ResultURLs crawlResults, final ResultURLs crawlResults,
final TreeMap<byte[], String> preselectedPeerHashes, final TreeMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts) { final boolean generateAbstracts) {
this.eventTime = System.currentTimeMillis(); // for lifetime check this.eventTime = System.currentTimeMillis(); // for lifetime check
this.indexSegment = indexSegment;
this.peers = peers; this.peers = peers;
this.crawlResults = crawlResults; this.crawlResults = crawlResults;
this.query = query; this.query = query;
@ -109,7 +106,7 @@ public final class SearchEvent {
// initialize a ranking process that is the target for data // initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads // that is generated concurrently from local and global search threads
this.rankedCache = new RankingProcess(indexSegment, query, max_results_preparation, fetchpeers + 1); this.rankedCache = new RankingProcess(query, max_results_preparation, fetchpeers + 1);
// start a local search concurrently // start a local search concurrently
this.rankedCache.start(); this.rankedCache.start();
@ -128,7 +125,7 @@ public final class SearchEvent {
query.authorhash == null ? "" : query.authorhash, query.authorhash == null ? "" : query.authorhash,
query.displayResults(), query.displayResults(),
query.maxDistance, query.maxDistance,
indexSegment, query.getSegment(),
peers, peers,
crawlResults, crawlResults,
rankedCache, rankedCache,
@ -149,10 +146,10 @@ public final class SearchEvent {
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.results = new ResultFetcher(rankedCache, query, indexSegment, peers, 10000); this.results = new ResultFetcher(rankedCache, query, peers, 10000);
} else { } else {
// do a local search // do a local search
this.rankedCache = new RankingProcess(indexSegment, query, max_results_preparation, 2); this.rankedCache = new RankingProcess(query, max_results_preparation, 2);
this.rankedCache.run(); this.rankedCache.run();
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process); //CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
@ -184,7 +181,7 @@ public final class SearchEvent {
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.results = new ResultFetcher(rankedCache, query, indexSegment, peers, 10); this.results = new ResultFetcher(rankedCache, query, peers, 10);
} }
// clean up events // clean up events
@ -223,7 +220,7 @@ public final class SearchEvent {
final Iterator<byte[]> j = removeWords.iterator(); final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words // remove the same url hashes for multiple words
while (j.hasNext()) { while (j.hasNext()) {
this.indexSegment.termIndex().remove(j.next(), this.results.failedURLs.keySet()); this.query.getSegment().termIndex().remove(j.next(), this.results.failedURLs.keySet());
} }
} catch (IOException e) { } catch (IOException e) {
Log.logException(e); Log.logException(e);
@ -376,7 +373,7 @@ public final class SearchEvent {
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls); //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words); //System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch( secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, "", urls, indexSegment, peers, crawlResults, this.rankedCache, peer, Switchboard.urlBlacklist, words, "", urls, this.query.getSegment(), peers, crawlResults, this.rankedCache, peer, Switchboard.urlBlacklist,
query.ranking, query.constraint, preselectedPeerHashes); query.ranking, query.constraint, preselectedPeerHashes);
} }

@ -66,7 +66,6 @@ public class SearchEventCache {
public static SearchEvent getEvent( public static SearchEvent getEvent(
final QueryParams query, final QueryParams query,
final Segment indexSegment,
final yacySeedDB peers, final yacySeedDB peers,
final ResultURLs crawlResults, final ResultURLs crawlResults,
final TreeMap<byte[], String> preselectedPeerHashes, final TreeMap<byte[], String> preselectedPeerHashes,
@ -90,7 +89,7 @@ public class SearchEventCache {
} }
if (event == null) { if (event == null) {
// start a new event // start a new event
event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts); event = new SearchEvent(query, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
} }
return event; return event;

@ -603,7 +603,7 @@ public final class Switchboard extends serverSwitch {
int indexerThreads = Math.max(1, WorkflowProcessor.useCPU / 2); int indexerThreads = Math.max(1, WorkflowProcessor.useCPU / 2);
this.indexingStorageProcessor = new WorkflowProcessor<indexingQueueEntry>( this.indexingStorageProcessor = new WorkflowProcessor<indexingQueueEntry>(
"storeDocumentIndex", "storeDocumentIndex",
"This is the sequencing step of the indexing queue: no concurrency is wanted here, because the access of the indexer works better if it is not concurrent. Files are written as streams, councurrency would destroy IO performance. In this process the words are written to the RWI cache, which flushes if it is full.", "This is the sequencing step of the indexing queue. Files are written as streams, too much councurrency would destroy IO performance. In this process the words are written to the RWI cache, which flushes if it is full.",
new String[]{"RWI/Cache/Collections"}, new String[]{"RWI/Cache/Collections"},
this, "storeDocumentIndex", WorkflowProcessor.useCPU + 40, null, indexerThreads); this, "storeDocumentIndex", WorkflowProcessor.useCPU + 40, null, indexerThreads);
this.indexingAnalysisProcessor = new WorkflowProcessor<indexingQueueEntry>( this.indexingAnalysisProcessor = new WorkflowProcessor<indexingQueueEntry>(

@ -61,6 +61,7 @@ import net.yacy.kelondro.util.SetTools;
public final class Condenser { public final class Condenser {
// this is the page analysis class // this is the page analysis class
final static boolean pseudostemming = false; // switch for removal of words that appear in shortened form
// category flags that show how the page can be distinguished in different interest groups // category flags that show how the page can be distinguished in different interest groups
public static final int flag_cat_indexof = 0; // a directory listing page (i.e. containing 'index of') public static final int flag_cat_indexof = 0; // a directory listing page (i.e. containing 'index of')
@ -110,7 +111,7 @@ public final class Condenser {
) throws UnsupportedEncodingException { ) throws UnsupportedEncodingException {
// if addMedia == true, then all the media links are also parsed and added to the words // if addMedia == true, then all the media links are also parsed and added to the words
// added media words are flagged with the appropriate media flag // added media words are flagged with the appropriate media flag
this.wordminsize = 3; this.wordminsize = 2;
this.wordcut = 2; this.wordcut = 2;
this.words = new HashMap<String, Word>(); this.words = new HashMap<String, Word>();
this.RESULT_FLAGS = new Bitfield(4); this.RESULT_FLAGS = new Bitfield(4);
@ -408,39 +409,41 @@ public final class Condenser {
} }
} }
Map.Entry<String, Word> entry; if (pseudostemming) {
// we search for similar words and reorganize the corresponding sentences Map.Entry<String, Word> entry;
// a word is similar, if a shortened version is equal // we search for similar words and reorganize the corresponding sentences
final Iterator<Map.Entry<String, Word>> wi = words.entrySet().iterator(); // enumerates the keys in descending order // a word is similar, if a shortened version is equal
wordsearch: while (wi.hasNext()) { final Iterator<Map.Entry<String, Word>> wi = words.entrySet().iterator(); // enumerates the keys in descending order
entry = wi.next(); wordsearch: while (wi.hasNext()) {
word = entry.getKey(); entry = wi.next();
wordlen = word.length(); word = entry.getKey();
wsp = entry.getValue(); wordlen = word.length();
for (int i = wordcut; i > 0; i--) { wsp = entry.getValue();
if (wordlen > i) { for (int i = wordcut; i > 0; i--) {
k = word.substring(0, wordlen - i); if (wordlen > i) {
if (words.containsKey(k)) { k = word.substring(0, wordlen - i);
// we will delete the word 'word' and repoint the if (words.containsKey(k)) {
// corresponding links // we will delete the word 'word' and repoint the
// in sentences that use this word // corresponding links
wsp1 = words.get(k); // in sentences that use this word
final Iterator<Integer> it1 = wsp.phrases(); // we iterate over all sentences that refer to this word wsp1 = words.get(k);
while (it1.hasNext()) { final Iterator<Integer> it1 = wsp.phrases(); // we iterate over all sentences that refer to this word
idx = it1.next().intValue(); // number of a sentence while (it1.hasNext()) {
s = (String[]) orderedSentences[idx]; idx = it1.next().intValue(); // number of a sentence
for (int j = 2; j < s.length; j++) { s = (String[]) orderedSentences[idx];
if (s[j].equals(intString(wsp.posInText, numlength))) for (int j = 2; j < s.length; j++) {
s[j] = intString(wsp1.posInText, numlength); if (s[j].equals(intString(wsp.posInText, numlength)))
s[j] = intString(wsp1.posInText, numlength);
}
orderedSentences[idx] = s;
} }
orderedSentences[idx] = s; // update word counter
wsp1.count = wsp1.count + wsp.count;
words.put(k, wsp1);
// remove current word
wi.remove();
continue wordsearch;
} }
// update word counter
wsp1.count = wsp1.count + wsp.count;
words.put(k, wsp1);
// remove current word
wi.remove();
continue wordsearch;
} }
} }
} }

@ -44,6 +44,10 @@ public class SortStack<E> {
private ConcurrentHashMap<E, Object> instack; // keeps track which element has been on the stack private ConcurrentHashMap<E, Object> instack; // keeps track which element has been on the stack
protected int maxsize; protected int maxsize;
public SortStack() {
this(-1);
}
public SortStack(final int maxsize) { public SortStack(final int maxsize) {
// the maxsize is the maximum number of entries in the stack // the maxsize is the maximum number of entries in the stack
// if this is set to -1, the size is unlimited // if this is set to -1, the size is unlimited

@ -28,6 +28,8 @@ package net.yacy.kelondro.workflow;
public interface BlockingThread<J extends WorkflowJob> extends WorkflowThread { public interface BlockingThread<J extends WorkflowJob> extends WorkflowThread {
public void setManager(WorkflowProcessor<J> queue); public void setManager(WorkflowProcessor<J> queue);
public WorkflowProcessor<J> getManager(); public WorkflowProcessor<J> getManager();
public J job(J next) throws Exception; public J job(J next) throws Exception;

@ -55,6 +55,20 @@ public class InstantBlockingThread<J extends WorkflowJob> extends AbstractBlocki
this.handle = Long.valueOf(System.currentTimeMillis() + this.getName().hashCode()); this.handle = Long.valueOf(System.currentTimeMillis() + this.getName().hashCode());
} }
public InstantBlockingThread(final Object env, final Method jobExecMethod, final WorkflowProcessor<J> manager) {
// jobExec is the name of a method of the object 'env' that executes the one-step-run
// jobCount is the name of a method that returns the size of the job
// set the manager of blocking queues for input and output
this.setManager(manager);
// define execution class
this.jobExecMethod = jobExecMethod;
this.environment = (env instanceof Class<?>) ? null : env;
this.setName(jobExecMethod.getClass().getName() + "." + jobExecMethod.getName() + "." + handleCounter++);
this.handle = Long.valueOf(System.currentTimeMillis() + this.getName().hashCode());
}
protected static Method execMethod(final Object env, final String jobExec) { protected static Method execMethod(final Object env, final String jobExec) {
final Class<?> theClass = (env instanceof Class<?>) ? (Class<?>) env : env.getClass(); final Class<?> theClass = (env instanceof Class<?>) ? (Class<?>) env : env.getClass();
try { try {

@ -1,4 +1,4 @@
// serverProcessor.java // WorkflowJob.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 29.02.2008 on http://yacy.net // first published 29.02.2008 on http://yacy.net
// //

@ -56,7 +56,8 @@ public class WorkflowProcessor<J extends WorkflowJob> {
public WorkflowProcessor( public WorkflowProcessor(
String name, String description, String[] childnames, String name, String description, String[] childnames,
final Object env, final String jobExecMethod, final int inputQueueSize, final WorkflowProcessor<J> output, final int poolsize) { final Object env, final String jobExecMethod,
final int inputQueueSize, final WorkflowProcessor<J> output, final int poolsize) {
// start a fixed number of executors that handle entries in the process queue // start a fixed number of executors that handle entries in the process queue
this.environment = env; this.environment = env;
this.processName = name; this.processName = name;
@ -135,7 +136,7 @@ public class WorkflowProcessor<J extends WorkflowJob> {
Log.logWarning("PROCESSOR", "executing job " + environment.getClass().getName() + "." + methodName + " serialized"); Log.logWarning("PROCESSOR", "executing job " + environment.getClass().getName() + "." + methodName + " serialized");
try { try {
final J out = (J) InstantBlockingThread.execMethod(this.environment, this.methodName).invoke(environment, new Object[]{in}); final J out = (J) InstantBlockingThread.execMethod(this.environment, this.methodName).invoke(environment, new Object[]{in});
if ((out != null) && (output != null)) output.enQueue(out); if (out != null && this.output != null) this.output.enQueue(out);
} catch (final IllegalArgumentException e) { } catch (final IllegalArgumentException e) {
Log.logException(e); Log.logException(e);
} catch (final IllegalAccessException e) { } catch (final IllegalAccessException e) {

Loading…
Cancel
Save