- introduced rankingProfile Class

- selection of ranking and timing profiles for each search


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1539 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent eab1805bca
commit d98418390b

@ -101,7 +101,7 @@ public class PerformanceSearch_p {
int c;
char sequence;
if (se != null) for (int i = 0; i < plasmaSearchTimingProfile.sequence.length; i++) {
t = se.getLocalProfile().getYieldTime(plasmaSearchTimingProfile.sequence[i]);
t = se.getLocalTiming().getYieldTime(plasmaSearchTimingProfile.sequence[i]);
if (t > 0) time += t;
}
for (int i = 0; i < plasmaSearchTimingProfile.sequence.length; i++) {
@ -113,10 +113,10 @@ public class PerformanceSearch_p {
prop.put("latestLocalTimeRel_" + sequence, "-");
prop.put("latestLocalCountAbs_" + sequence, "-");
} else {
t = se.getLocalProfile().getYieldTime(sequence);
t = se.getLocalTiming().getYieldTime(sequence);
prop.put("latestLocalTimeAbs_" + sequence, (t < 0) ? "-" : Long.toString(t));
prop.put("latestLocalTimeRel_" + sequence, ((t < 0) ? 0 : (t * 100 / time)) + "%");
c = se.getLocalProfile().getYieldCount(sequence);
c = se.getLocalTiming().getYieldCount(sequence);
prop.put("latestLocalCountAbs_" + sequence, (c < 0) ? "-" : Integer.toString(c));
}
}

@ -54,6 +54,8 @@ import java.util.TreeSet;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchPreOrder;
@ -177,10 +179,14 @@ public class index {
}
// do the search
plasmaSearchQuery thisSearch = new plasmaSearchQuery(query, maxDistance, new String[]{order1, order2, order3}, count, searchtime, urlmask, referer,
plasmaSearchQuery thisSearch = new plasmaSearchQuery(query, maxDistance, count, searchtime, urlmask, referer,
((global) && (yacyonline) && (!(env.getConfig("last-search","").equals(querystring)))) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT : plasmaSearchQuery.SEARCHDOM_LOCAL,
"", 20);
final serverObjects prop = sb.searchFromLocal(thisSearch);
plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile(new String[]{order1, order2, order3});
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
final serverObjects prop = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming);
/*
final serverObjects prop = sb.searchFromLocal(query, order1, order2, count,
((global) && (yacyonline) && (!(env.getConfig("last-search","").equals(querystring)))),

@ -51,7 +51,9 @@ import java.util.HashSet;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchResult;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndexEntry;
@ -103,15 +105,17 @@ public final class search {
}
final long timestamp = System.currentTimeMillis();
plasmaSearchQuery squery = new plasmaSearchQuery(keyhashes, maxdist, new String[]{plasmaSearchQuery.ORDER_YBR, plasmaSearchQuery.ORDER_DATE, plasmaSearchQuery.ORDER_QUALITY},
count, duetime, ".*");
plasmaSearchQuery squery = new plasmaSearchQuery(keyhashes, maxdist, count, duetime, ".*");
squery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
serverObjects prop = new serverObjects();
yacyCore.log.logInfo("INIT HASH SEARCH: " + squery.queryHashes + " - " + squery.wantedResults + " links");
long timestamp1 = System.currentTimeMillis();
plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache);
plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchQuery.ORDER_YBR, plasmaSearchQuery.ORDER_DATE, plasmaSearchQuery.ORDER_QUALITY});
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults);
plasmaSearchTimingProfile remoteTiming = null;
plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache);
plasmaSearchResult acc = null;
int idxc = 0;
idxc = theSearch.localSearch();

@ -60,6 +60,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private serverLog log;
private plasmaSearchQuery query;
private plasmaSearchRankingProfile ranking;
private plasmaWordIndex wordIndex;
private plasmaCrawlLURL urlStore;
private plasmaSnippetCache snippetCache;
@ -68,22 +69,25 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private plasmaSearchTimingProfile profileLocal, profileGlobal;
private yacySearch[] searchThreads;
public plasmaSearchEvent(plasmaSearchQuery query, serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL urlStore, plasmaSnippetCache snippetCache) {
public plasmaSearchEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchTimingProfile localTiming,
plasmaSearchTimingProfile remoteTiming,
serverLog log,
plasmaWordIndex wordIndex,
plasmaCrawlLURL urlStore,
plasmaSnippetCache snippetCache) {
this.log = log;
this.wordIndex = wordIndex;
this.query = query;
this.ranking = ranking;
this.urlStore = urlStore;
this.snippetCache = snippetCache;
this.rcLocal = new plasmaWordIndexEntryContainer(null);
this.rcGlobal = new plasmaWordIndexEntryContainer(null);
this.rcGlobalCount = 0;
if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) {
this.profileLocal = new plasmaSearchTimingProfile(4 * query.maximumTime / 10, query.wantedResults);
this.profileGlobal = new plasmaSearchTimingProfile(6 * query.maximumTime / 10, query.wantedResults);
} else {
this.profileLocal = new plasmaSearchTimingProfile(query.maximumTime, query.wantedResults);
this.profileGlobal = null;
}
this.profileLocal = localTiming;
this.profileGlobal = remoteTiming;
this.searchThreads = null;
}
@ -91,7 +95,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
return query;
}
public plasmaSearchTimingProfile getLocalProfile() {
public plasmaSearchTimingProfile getLocalTiming() {
return profileLocal;
}
@ -207,13 +211,13 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
long postorderTime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_POSTSORT);
profileLocal.startTimer();
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query);
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking);
preorder.addContainer(searchResult, preorderTime);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_PRESORT);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_PRESORT, rcLocal.size());
profileLocal.startTimer();
plasmaSearchResult acc = new plasmaSearchResult(query);
plasmaSearchResult acc = new plasmaSearchResult(query, ranking);
if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty
if (searchResult.size() == 0) return acc; // case that we have nothing to do

@ -1,4 +1,4 @@
// plasmaSearchPreOder.java
// plasmaSearchPreOrder.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
@ -59,6 +59,7 @@ public final class plasmaSearchPreOrder {
private plasmaWordIndexEntry entryMin, entryMax;
private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry
private plasmaSearchQuery query;
private plasmaSearchRankingProfile ranking;
public static void loadYBR(File rankingPath, int count) {
// load ranking tables
@ -96,16 +97,17 @@ public final class plasmaSearchPreOrder {
useYBR = usage;
}
public plasmaSearchPreOrder(plasmaSearchQuery query) {
public plasmaSearchPreOrder(plasmaSearchQuery query, plasmaSearchRankingProfile ranking) {
entryMin = null;
entryMax = null;
this.pageAcc = new TreeMap();
this.query = query;
this.ranking = ranking;
}
public plasmaSearchPreOrder cloneSmart() {
// clones only the top structure
plasmaSearchPreOrder theClone = new plasmaSearchPreOrder(query);
plasmaSearchPreOrder theClone = new plasmaSearchPreOrder(query, ranking);
theClone.pageAcc = (TreeMap) this.pageAcc.clone();
return theClone;
}
@ -138,7 +140,7 @@ public final class plasmaSearchPreOrder {
i = container.entries();
for (int j = 0; j < count; j++) {
indexEntry = (plasmaWordIndexEntry) i.next();
pageAcc.put(serverCodings.encodeHex(query.ranking(indexEntry.generateNormalized(entryMin, entryMax)), 16) + indexEntry.getUrlHash(), indexEntry);
pageAcc.put(serverCodings.encodeHex(this.ranking.ranking(indexEntry.generateNormalized(entryMin, entryMax)), 16) + indexEntry.getUrlHash(), indexEntry);
}
}

@ -65,7 +65,6 @@ public final class plasmaSearchQuery {
public Set queryWords;
public Set queryHashes;
public String referrer;
public String[] order;
public int wantedResults;
public long maximumTime;
public String urlMask;
@ -75,13 +74,11 @@ public final class plasmaSearchQuery {
public int maxDistance;
public plasmaSearchQuery(Set queryWords, int maxDistance,
String[] order, int wantedResults, long maximumTime, String urlMask,
String referrer,
int wantedResults, long maximumTime, String urlMask, String referrer,
int domType, String domGroupName, int domMaxTargets) {
this.queryWords = queryWords;
this.maxDistance = maxDistance;
this.queryHashes = words2hashes(queryWords);
this.order = order;
this.wantedResults = wantedResults;
this.maximumTime = maximumTime;
this.urlMask = urlMask;
@ -92,11 +89,10 @@ public final class plasmaSearchQuery {
}
public plasmaSearchQuery(Set queryHashes, int maxDistance,
String[] order, int wantedResults, long maximumTime, String urlMask) {
int wantedResults, long maximumTime, String urlMask) {
this.queryWords = null;
this.maxDistance = maxDistance;
this.queryHashes = queryHashes;
this.order = order;
this.wantedResults = wantedResults;
this.maximumTime = maximumTime;
this.urlMask = urlMask;
@ -105,20 +101,16 @@ public final class plasmaSearchQuery {
this.domMaxTargets = -1;
}
public String orderString() {
return order[0] + "-" + order[1] + "-" + order[2];
}
public static Set words2hashes(String[] words) {
TreeSet hashes = new TreeSet();
TreeSet hashes = new TreeSet();
for (int i = 0; i < words.length; i++) hashes.add(plasmaWordIndexEntry.word2hash(words[i]));
return hashes;
}
public static Set words2hashes(Set words) {
Iterator i = words.iterator();
TreeSet hashes = new TreeSet();
while (i.hasNext()) hashes.add(plasmaWordIndexEntry.word2hash((String) i.next()));
Iterator i = words.iterator();
TreeSet hashes = new TreeSet();
while (i.hasNext()) hashes.add(plasmaWordIndexEntry.word2hash((String) i.next()));
return hashes;
}
@ -177,19 +169,5 @@ public final class plasmaSearchQuery {
if (blueList.contains(word)) it.remove();
}
}
public long ranking(plasmaWordIndexEntry normalizedEntry) {
long ranking = 0;
for (int i = 0; i < 3; i++) {
if (this.order[i].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking += normalizedEntry.getQuality() << (4 * (3 - i));
else if (this.order[i].equals(plasmaSearchQuery.ORDER_DATE)) ranking += normalizedEntry.getVirtualAge() << (4 * (3 - i));
else if (this.order[i].equals(plasmaSearchQuery.ORDER_YBR)) ranking += plasmaSearchPreOrder.ybr_p(normalizedEntry.getUrlHash()) << (4 * (3 - i));
}
ranking += (normalizedEntry.posintext() == 0) ? 0 : (255 - normalizedEntry.posintext()) << 11;
ranking += (normalizedEntry.worddistance() == 0) ? 0 : (255 - normalizedEntry.worddistance()) << 10;
ranking += (normalizedEntry.hitcount() == 0) ? 0 : normalizedEntry.hitcount() << 9;
ranking += (255 - normalizedEntry.domlengthNormalized()) << 8;
return ranking;
}
}

@ -0,0 +1,70 @@
// plasmaSearchRankingProfile.java
// -------------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2006
// Created: 05.02.2006
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
public class plasmaSearchRankingProfile {
public String[] order;
public plasmaSearchRankingProfile(String[] order) {
this.order = order;
}
public String orderString() {
return order[0] + "-" + order[1] + "-" + order[2];
}
public long ranking(plasmaWordIndexEntry normalizedEntry) {
long ranking = 0;
for (int i = 0; i < 3; i++) {
if (this.order[i].equals(plasmaSearchQuery.ORDER_QUALITY)) ranking += normalizedEntry.getQuality() << (4 * (3 - i));
else if (this.order[i].equals(plasmaSearchQuery.ORDER_DATE)) ranking += normalizedEntry.getVirtualAge() << (4 * (3 - i));
else if (this.order[i].equals(plasmaSearchQuery.ORDER_YBR)) ranking += plasmaSearchPreOrder.ybr_p(normalizedEntry.getUrlHash()) << (4 * (3 - i));
}
ranking += (normalizedEntry.posintext() == 0) ? 0 : (255 - normalizedEntry.posintext()) << 11;
ranking += (normalizedEntry.worddistance() == 0) ? 0 : (255 - normalizedEntry.worddistance()) << 10;
ranking += (normalizedEntry.hitcount() == 0) ? 0 : normalizedEntry.hitcount() << 9;
ranking += (255 - normalizedEntry.domlengthNormalized()) << 8;
return ranking;
}
}

@ -63,14 +63,16 @@ public final class plasmaSearchResult {
private kelondroMScoreCluster ref; // reference score computation for the commonSense heuristic
private ArrayList results; // this is a buffer for plasmaWordIndexEntry + plasmaCrawlLURL.entry - objects
private plasmaSearchQuery query;
private plasmaSearchRankingProfile ranking;
public int globalContributions;
public int localContributions;
public plasmaSearchResult(plasmaSearchQuery query) {
public plasmaSearchResult(plasmaSearchQuery query, plasmaSearchRankingProfile ranking) {
this.pageAcc = new TreeMap();
this.ref = new kelondroMScoreCluster();
this.results = new ArrayList();
this.query = query;
this.ranking = ranking;
this.globalContributions = 0;
this.localContributions = 0;
this.entryMin = null;
@ -79,7 +81,7 @@ public final class plasmaSearchResult {
public plasmaSearchResult cloneSmart() {
// clones only the top structure
plasmaSearchResult theClone = new plasmaSearchResult(query);
plasmaSearchResult theClone = new plasmaSearchResult(this.query, this.ranking);
theClone.pageAcc = (TreeMap) this.pageAcc.clone();
theClone.ref = this.ref;
theClone.results = this.results;
@ -147,7 +149,7 @@ public final class plasmaSearchResult {
indexEntry = (plasmaWordIndexEntry) resultVector[0];
// apply pre-calculated order attributes
ranking = query.ranking(indexEntry.generateNormalized(entryMin, entryMax));
ranking = this.ranking.ranking(indexEntry.generateNormalized(entryMin, entryMax));
// apply 'common-sense' heuristic using references
urlcomps = (String[]) resultVector[2];

@ -1627,7 +1627,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (date == null) return ""; else return DateFormatter.format(date);
}
public serverObjects searchFromLocal(plasmaSearchQuery query) {
public serverObjects searchFromLocal(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchTimingProfile localTiming,
plasmaSearchTimingProfile remoteTiming) {
// tell all threads to do nothing for a specific time
wordIndex.intermission(2 * query.maximumTime);
@ -1651,7 +1654,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
//}
// create a new search event
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, log, wordIndex, urlPool.loadedURL, snippetCache);
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, log, wordIndex, urlPool.loadedURL, snippetCache);
plasmaSearchResult acc = theSearch.search();
// fetch snippets
@ -1721,7 +1724,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (snippet.source == plasmaSnippetCache.ERROR_NO_MATCH) {
// suppress line: there is no match in that resource
} else {
prop.put("results_" + i + "_delete", "/index.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + query.orderString() + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
prop.put("results_" + i + "_delete", "/index.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + ranking.orderString() + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
prop.put("results_" + i + "_description", descr);
prop.put("results_" + i + "_url", urlstring);
prop.put("results_" + i + "_urlhash", urlhash);

Loading…
Cancel
Save