refactoring:

RankingProcess -> RWIProcess
ResultFetcher -> SnippetProcess


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7974 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent d2ea250d99
commit 2c3161b4ac

@ -91,7 +91,7 @@ attr_inboundlinks_tag
#attr_inboundlinks_rel
## internal links, the rel property of the a-tag, coded binary
#attr_inboundlinks_relcode
#attr_inboundlinks_relflags
## internal links, the text content of the a-tag
#attr_inboundlinks_text
@ -117,6 +117,9 @@ attr_outboundlinks_tag
## external links, the rel property of the a-tag
#attr_outboundlinks_rel
## external links, the rel property of the a-tag, coded binary
#attr_outboundlinks_relflags
## external links, the text content of the a-tag
#attr_outboundlinks_text

@ -64,9 +64,9 @@ import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.ranking.BlockRank;
import net.yacy.search.ranking.RankingProcess;
import net.yacy.search.ranking.ReferenceOrder;
import de.anomic.crawler.ResultURLs;
import de.anomic.data.ListManager;
@ -134,7 +134,7 @@ public class IndexControlRWIs_p {
if (post.containsKey("keystringsearch")) {
keyhash = Word.word2hash(keystring);
prop.put("keyhash", keyhash);
final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 1);
prop.putHTML("searchresult_word", keystring);
@ -145,7 +145,7 @@ public class IndexControlRWIs_p {
if (keystring.length() == 0 || !ByteBuffer.equals(Word.word2hash(keystring), keyhash)) {
prop.put("keystring", "<not possible to compute word from hash>");
}
final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 2);
prop.putHTML("searchresult_wordhash", ASCII.String(keyhash));
@ -240,7 +240,7 @@ public class IndexControlRWIs_p {
}
final Bitfield flags = compileFlags(post);
final int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1);
final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, flags);
final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, flags);
genURLList(prop, keyhash, keystring, ranking, flags, count);
}
@ -425,7 +425,7 @@ public class IndexControlRWIs_p {
return prop;
}
public static void genURLList(final serverObjects prop, final byte[] keyhash, final String keystring, final RankingProcess ranked, final Bitfield flags, final int maxlines) {
public static void genURLList(final serverObjects prop, final byte[] keyhash, final String keystring, final RWIProcess ranked, final Bitfield flags, final int maxlines) {
// search for a word hash and generate a list of url links
final String keyhashs = ASCII.String(keyhash);
prop.put("genUrlList_keyHash", keyhashs);
@ -557,10 +557,10 @@ public class IndexControlRWIs_p {
prop.put("searchresult_hosts", hc);
}
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, final Segment segment, final byte[] keyhash, final Bitfield filter) {
public static RWIProcess genSearchresult(final serverObjects prop, final Switchboard sb, final Segment segment, final byte[] keyhash, final Bitfield filter) {
final QueryParams query = new QueryParams(ASCII.String(keyhash), -1, filter, segment, sb.getRanking(), "IndexControlRWIs_p");
final ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang));
final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE);
final RWIProcess ranked = new RWIProcess(query, order, Integer.MAX_VALUE);
ranked.run();
if (ranked.filteredCount() == 0) {

@ -162,6 +162,7 @@ public class SolrScheme extends ConfigurationSet {
if (isEmpty() || contains("attr_inboundlinks_urlstub")) addSolr(solrdoc, "attr_inboundlinks_urlstub", inboundlinksURLStub);
if (isEmpty() || contains("attr_inboundlinks_name")) addSolr(solrdoc, "attr_inboundlinks_name", inboundlinksName);
if (isEmpty() || contains("attr_inboundlinks_rel")) addSolr(solrdoc, "attr_inboundlinks_rel", inboundlinksRel);
if (isEmpty() || contains("attr_inboundlinks_relflags")) addSolr(solrdoc, "attr_inboundlinks_relflags", relEval(inboundlinksRel));
if (isEmpty() || contains("attr_inboundlinks_text")) addSolr(solrdoc, "attr_inboundlinks_text", inboundlinksText);
c = 0;
@ -198,6 +199,7 @@ public class SolrScheme extends ConfigurationSet {
if (isEmpty() || contains("attr_outboundlinks_urlstub")) addSolr(solrdoc, "attr_outboundlinks_urlstub", outboundlinksURLStub);
if (isEmpty() || contains("attr_outboundlinks_name")) addSolr(solrdoc, "attr_outboundlinks_name", outboundlinksName);
if (isEmpty() || contains("attr_outboundlinks_rel")) addSolr(solrdoc, "attr_outboundlinks_rel", outboundlinksRel);
if (isEmpty() || contains("attr_outboundlinks_relflags")) addSolr(solrdoc, "attr_outboundlinks_relflags", relEval(inboundlinksRel));
if (isEmpty() || contains("attr_outboundlinks_text")) addSolr(solrdoc, "attr_outboundlinks_text", outboundlinksText);
@ -352,6 +354,16 @@ public class SolrScheme extends ConfigurationSet {
return solrdoc;
}
private int relEval(String[] rel) {
int i = 0;
for (String s: rel) {
String s0 = s.toLowerCase().trim();
if ("me".equals(s0)) i += 1;
if ("nofollow".equals(s0)) i += 2;
}
return i;
}
public String solrGetID(final SolrDocument solr) {
return (String) solr.getFieldValue("id");
}

@ -97,8 +97,8 @@ import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.ranking.RankingProcess;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ContentDomain;
import net.yacy.search.snippet.TextSnippet;
@ -440,7 +440,7 @@ public final class yacyClient {
final int partitions,
final yacySeed target,
final Segment indexSegment,
final RankingProcess containerCache,
final RWIProcess containerCache,
final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser,
final Blacklist blacklist,
final RankingProfile rankingProfile,

@ -36,8 +36,8 @@ import net.yacy.peers.dht.PeerSelection;
import net.yacy.repository.Blacklist;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.ranking.RankingProcess;
import net.yacy.search.ranking.RankingProfile;
@ -49,7 +49,7 @@ public class yacySearch extends Thread {
final private boolean global;
final private int partitions;
final private Segment indexSegment;
final private RankingProcess containerCache;
final private RWIProcess containerCache;
final private SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser;
final private Blacklist blacklist;
final private yacySeed targetPeer;
@ -75,7 +75,7 @@ public class yacySearch extends Thread {
final yacySeed targetPeer,
final Segment indexSegment,
final yacySeedDB peers,
final RankingProcess containerCache,
final RWIProcess containerCache,
final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser,
final Blacklist blacklist,
final RankingProfile rankingProfile,
@ -162,7 +162,7 @@ public class yacySearch extends Thread {
final int count, long time, final int maxDist,
final Segment indexSegment,
final yacySeedDB peers,
final RankingProcess containerCache,
final RWIProcess containerCache,
final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser,
final Blacklist blacklist,
final RankingProfile rankingProfile,
@ -211,7 +211,7 @@ public class yacySearch extends Thread {
final long time,
final Segment indexSegment,
final yacySeedDB peers,
final RankingProcess containerCache,
final RWIProcess containerCache,
final String targethash, final Blacklist blacklist,
final RankingProfile rankingProfile,
final Bitfield constraint, final SortedMap<byte[], String> clusterselection) {

@ -45,8 +45,8 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.meta.URIMetadataRow.Components;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.ranking.RankingProcess;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ContentDomain;
@ -207,7 +207,7 @@ public class DocumentIndex extends Segment {
// make a query and start a search
final QueryParams query = new QueryParams(querystring, count, null, this, textRankingDefault, "DocumentIndex");
final ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang));
final RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation);
final RWIProcess rankedCache = new RWIProcess(query, order, SearchEvent.max_results_preparation);
rankedCache.start();
// search is running; retrieve results

@ -63,8 +63,8 @@ import net.yacy.kelondro.util.ISO639;
import net.yacy.repository.Blacklist;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.ranking.RankingProcess;
import de.anomic.crawler.retrieval.Response;
public class Segment {
@ -219,7 +219,7 @@ public class Segment {
final int outlinksOther,
final SearchEvent searchEvent,
final String sourceName) {
final RankingProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult();
final RWIProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult();
if (rankingProcess != null) rankingProcess.moreFeeders(1);
int wordCount = 0;
final int urlLength = url.toNormalform(true, true).length();

@ -24,7 +24,7 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.search.ranking;
package net.yacy.search.query;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
@ -60,13 +60,11 @@ import net.yacy.kelondro.util.EventTracker;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEvent.Type;
import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ContentDomain;
import net.yacy.search.snippet.ResultEntry;
public final class RankingProcess extends Thread {
public final class RWIProcess extends Thread {
private static final int maxDoubleDomAll = 1000, maxDoubleDomSpecial = 10000;
@ -79,7 +77,7 @@ public final class RankingProcess extends Thread {
private SortedMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private int remote_resourceSize, remote_indexCount, remote_peerCount;
private int local_resourceSize, local_indexCount;
private int local_indexCount;
private final WeakPriorityBlockingQueue<WordReferenceVars> stack;
private int feeders;
private final ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
@ -99,7 +97,7 @@ public final class RankingProcess extends Thread {
private final ScoreMap<String> filetypeNavigator; // a counter for file types
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) {
public RWIProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
@ -112,7 +110,6 @@ public final class RankingProcess extends Thread {
this.remote_peerCount = 0;
this.remote_resourceSize = 0;
this.remote_indexCount = 0;
this.local_resourceSize = 0;
this.local_indexCount = 0;
this.urlhashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 100);
this.misses = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 100);
@ -181,9 +178,7 @@ public final class RankingProcess extends Thread {
assert (index != null);
if (index.isEmpty()) return;
if (local) {
this.local_resourceSize += index.size();
} else {
if (!local) {
assert fullResource >= 0 : "fullResource = " + fullResource;
this.remote_resourceSize += fullResource;
this.remote_peerCount++;

@ -53,8 +53,7 @@ import net.yacy.peers.dht.FlatWordPartitionScheme;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.query.ResultFetcher.Worker;
import net.yacy.search.ranking.RankingProcess;
import net.yacy.search.query.SnippetProcess.Worker;
import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ResultEntry;
import de.anomic.data.WorkTables;
@ -73,8 +72,8 @@ public final class SearchEvent {
private QueryParams query;
private final yacySeedDB peers;
private final WorkTables workTables;
private RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher resultFetcher;
private RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
private SnippetProcess resultFetcher;
private final SecondarySearchSuperviser secondarySearchSuperviser;
@ -122,7 +121,7 @@ public final class SearchEvent {
if (remote) {
// initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads
this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
this.rankingProcess = new RWIProcess(this.query, this.order, max_results_preparation);
// start a local search concurrently
this.rankingProcess.start();
@ -163,10 +162,10 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 3000, deleteIfSnippetFail);
this.resultFetcher = new SnippetProcess(loader, this.rankingProcess, query, this.peers, this.workTables, 3000, deleteIfSnippetFail);
} else {
// do a local search
this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
this.rankingProcess = new RWIProcess(this.query, this.order, max_results_preparation);
if (generateAbstracts) {
this.rankingProcess.run(); // this is not started concurrently here on purpose!
@ -207,7 +206,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 500, deleteIfSnippetFail);
this.resultFetcher = new SnippetProcess(loader, this.rankingProcess, query, this.peers, this.workTables, 500, deleteIfSnippetFail);
}
// clean up events
@ -330,7 +329,7 @@ public final class SearchEvent {
return this.secondarySearchThreads;
}
public RankingProcess getRankingResult() {
public RWIProcess getRankingResult() {
return this.rankingProcess;
}
@ -571,7 +570,7 @@ public final class SearchEvent {
}
public ResultFetcher result() {
public SnippetProcess result() {
return this.resultFetcher;
}

@ -51,7 +51,6 @@ import net.yacy.peers.yacySeedDB;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.ranking.RankingProcess;
import net.yacy.search.snippet.ContentDomain;
import net.yacy.search.snippet.MediaSnippet;
import net.yacy.search.snippet.ResultEntry;
@ -63,10 +62,10 @@ import org.apache.solr.common.SolrDocumentList;
import de.anomic.data.WorkTables;
import de.anomic.http.client.Cache;
public class ResultFetcher {
public class SnippetProcess {
// input values
final RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
final RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container
QueryParams query;
private final yacySeedDB peers;
private final WorkTables workTables;
@ -83,9 +82,9 @@ public class ResultFetcher {
private final boolean deleteIfSnippetFail;
private boolean cleanupState;
public ResultFetcher(
public SnippetProcess(
final LoaderDispatcher loader,
final RankingProcess rankedCache,
final RWIProcess rankedCache,
final QueryParams query,
final yacySeedDB peers,
final WorkTables workTables,
@ -355,7 +354,7 @@ public class ResultFetcher {
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults;
this.shallrun = true;
this.solr = ResultFetcher.this.rankingProcess.getQuery().getSegment().getSolr();
this.solr = SnippetProcess.this.rankingProcess.getQuery().getSegment().getSolr();
}
@Override
@ -365,7 +364,7 @@ public class ResultFetcher {
URIMetadataRow page;
ResultEntry resultEntry;
//final int fetchAhead = snippetMode == 0 ? 0 : 10;
final boolean nav_topics = ResultFetcher.this.query.navigators.equals("all") || ResultFetcher.this.query.navigators.indexOf("topics") >= 0;
final boolean nav_topics = SnippetProcess.this.query.navigators.equals("all") || SnippetProcess.this.query.navigators.indexOf("topics") >= 0;
try {
//System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
int loops = 0;
@ -377,25 +376,25 @@ public class ResultFetcher {
}
// check if we have enough
if (ResultFetcher.this.result.sizeAvailable() >= this.neededResults) {
if (SnippetProcess.this.result.sizeAvailable() >= this.neededResults) {
//Log.logWarning("ResultFetcher", ResultFetcher.this.result.sizeAvailable() + " = result.sizeAvailable() >= this.neededResults = " + this.neededResults);
break;
}
// check if we can succeed if we try to take another url
if (ResultFetcher.this.rankingProcess.feedingIsFinished() && ResultFetcher.this.rankingProcess.sizeQueue() == 0) {
if (SnippetProcess.this.rankingProcess.feedingIsFinished() && SnippetProcess.this.rankingProcess.sizeQueue() == 0) {
//Log.logWarning("ResultFetcher", "rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0");
break;
}
// get next entry
page = ResultFetcher.this.rankingProcess.takeURL(true, Math.min(100, this.timeout - System.currentTimeMillis()));
page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(100, this.timeout - System.currentTimeMillis()));
//if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
if (page == null) {
//System.out.println("page == null");
break; // no more available
}
if (ResultFetcher.this.query.filterfailurls && ResultFetcher.this.workTables.failURLsContains(page.hash())) continue;
if (SnippetProcess.this.query.filterfailurls && SnippetProcess.this.workTables.failURLsContains(page.hash())) continue;
// in case that we have an attached solr, we load also the solr document
String solrContent = null;
@ -415,16 +414,16 @@ public class ResultFetcher {
//if (rawLine != null && !this.snippetPattern.matcher(rawLine).matches()) continue;
//if (result.contains(resultEntry)) continue;
ResultFetcher.this.urlRetrievalAllTime += resultEntry.dbRetrievalTime;
ResultFetcher.this.snippetComputationAllTime += resultEntry.snippetComputationTime;
SnippetProcess.this.urlRetrievalAllTime += resultEntry.dbRetrievalTime;
SnippetProcess.this.snippetComputationAllTime += resultEntry.snippetComputationTime;
// place the result to the result vector
// apply post-ranking
long ranking = Long.valueOf(ResultFetcher.this.rankingProcess.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, ResultFetcher.this.rankingProcess.getTopicNavigator(10));
long ranking = Long.valueOf(SnippetProcess.this.rankingProcess.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, SnippetProcess.this.rankingProcess.getTopicNavigator(10));
resultEntry.ranking = ranking;
ResultFetcher.this.result.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow
if (nav_topics) ResultFetcher.this.rankingProcess.addTopics(resultEntry);
SnippetProcess.this.result.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow
if (nav_topics) SnippetProcess.this.rankingProcess.addTopics(resultEntry);
}
//System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops);
} catch (final Exception e) {
Loading…
Cancel
Save