- enhancements to detailed search page

- enhancements to search ranking computation process
- removed bugs in postranking

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2516 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 76f9dc5d26
commit 96c6e4e322

@ -56,7 +56,7 @@ function checkers(name, n) {
</tr>
<tr valign="center"><td class="small" colspan="2">&nbsp;</td></tr>
<tr valign="center" class="TableHeader" width="600"><td class="small" colspan="2">Local Ranking</td></tr>
<tr valign="center" class="TableHeader" width="600"><td class="small" colspan="2">Local Pre-Ranking</td></tr>
<tr valign="center" class="TableCellDark">
<td class=small>Entropy</td>
<td class=small>
@ -99,6 +99,9 @@ function checkers(name, n) {
<script type="text/javascript">checkers("localdomlength", #[localdomlength]#)</script>
</td>
</tr>
<tr valign="center"><td class="small" colspan="2">&nbsp;</td></tr>
<tr valign="center" class="TableHeader" width="600"><td class="small" colspan="2">Local Post-Ranking</td></tr>
<tr valign="center" class="TableCellDark">
<td class=small>URL Length</td>
<td class=small>
@ -147,17 +150,23 @@ function checkers(name, n) {
<script type="text/javascript">checkers("localdescrcompintoplist", #[localdescrcompintoplist]#)</script>
</td>
</tr>
</table>
</td>
<td colspan="2">
<table border="0" cellpadding="0" cellspacing="1">
<tr valign="center" class="TableHeader"><td class="small" colspan="2">Global Query</td></tr>
<tr valign="center" class="TableHeader"><td class="small" colspan="2">Query Attributes</td></tr>
<tr valign="center" class="TableCellDark">
<td class=small colspan="2">Global:
<input type="checkbox" name="global" align="top" #[globalChecked]#>
</td>
</tr>
<tr valign="top" class="TableCellDark"><td class="small" colspan="2"><i>(to be filled with more global options)</i></td></tr>
<tr valign="center" class="TableCellDark">
<td class=small colspan="2">Post-Sort:
<input type="checkbox" name="postsort" align="top" #[postsortChecked]#>
</td>
</tr>
</table></td>
</tr>
</table>
@ -191,6 +200,7 @@ function checkers(name, n) {
#(snippet)#::<img src="/env/grafics/empty.gif">&nbsp;<i>#[text]#</i><br>#(/snippet)#
<img src="/env/grafics/empty.gif">&nbsp;<a href="#[url]#">#[urlname]#</a><br>
<img src="/env/grafics/empty.gif"><span class="ResultDateYBR">&nbsp;#[date]#</span> | <span class="ResultDateYBR">YBR-#[ybr]#</span> | <a href="ViewFile.html?urlHash=#[urlhash]#&words=#[words]#">Info</a><br>
<img src="/env/grafics/empty.gif"><span class="ResultDateYBR">#[rankingprops]#</span><br>
</p>
<!-- link end -->
#{/results}#

@ -77,7 +77,9 @@ public class DetailedSearch {
prop.put("resultbottomline", 0);
prop.put("localCount", 10);
prop.put("localWDist", 999);
prop.put("globalChecked", "checked");
//prop.put("globalChecked", "checked");
prop.put("globalChecked", "");
prop.put("postsortChecked", "checked");
prop.put("localTime", 6);
prop.put("results", "");
prop.put("urlmaskoptions", 0);
@ -88,6 +90,7 @@ public class DetailedSearch {
}
boolean global = (post == null) ? false : post.get("global", "").equals("on");
boolean postsort = (post == null) ? false : post.get("postsort", "").equals("on");
final boolean indexDistributeGranted = sb.getConfig("allowDistributeIndex", "true").equals("true");
final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true");
if (!indexDistributeGranted || !indexReceiveGranted) { global = false; }
@ -138,7 +141,7 @@ public class DetailedSearch {
plasmaSearchRankingProfile localRanking = new plasmaSearchRankingProfile("local", post.toString());
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
final serverObjects prop = sb.searchFromLocal(thisSearch, localRanking, localTiming, remoteTiming);
final serverObjects prop = sb.searchFromLocal(thisSearch, localRanking, localTiming, remoteTiming, postsort);
// remember the last search expression
env.setConfig("last-search", querystring);
@ -193,6 +196,7 @@ public class DetailedSearch {
prop.put("localCount", count);
prop.put("localWDist", wdist);
prop.put("globalChecked", (global) ? "checked" : "");
prop.put("postsortChecked", (postsort) ? "checked" : "");
prop.put("localTime", searchtime/1000);
prop.put("search", post.get("search", ""));
prop.putAll(localRanking.toExternalMap("local"));

@ -120,7 +120,7 @@ public final class search {
plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchRankingProfile.ORDER_YBR, plasmaSearchRankingProfile.ORDER_DATE, plasmaSearchRankingProfile.ORDER_QUALITY});
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults);
plasmaSearchTimingProfile remoteTiming = null;
plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache);
plasmaSearchEvent theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.urlPool.loadedURL, sb.snippetCache);
Set containers = theSearch.localSearchContainers();
indexContainer localResults = theSearch.localSearchJoin(containers);
int joincount = localResults.size();

@ -201,7 +201,7 @@ public class yacysearch {
plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile( new String[] { order1, order2, order3 });
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
prop = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming);
prop = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true);
/*
* final serverObjects prop = sb.searchFromLocal(query, order1,

@ -32,7 +32,7 @@ public interface indexEntry {
public Object clone();
public byte[] toEncodedByteArrayForm(boolean includeUrlHash); // shall be replaced by toKelondroEntry()
public String toPropertyForm();
public String toPropertyForm(boolean displayFormat);
public kelondroRow.Entry toKelondroEntry();
public String urlHash();

@ -147,8 +147,8 @@ public class indexURLEntry implements Cloneable, indexEntry {
return b;
}
public String toPropertyForm() {
return entry.toPropertyForm(true, false);
public String toPropertyForm(boolean displayFormat) {
return entry.toPropertyForm(true, displayFormat, displayFormat);
}
public Entry toKelondroEntry() {
@ -259,7 +259,10 @@ public class indexURLEntry implements Cloneable, indexEntry {
}
static void normalize(indexURLEntry t, indexEntry min, indexEntry max) {
if (1 + max.worddistance() - min.worddistance() == 0) System.out.println("min = " + min.toPropertyForm() + "\nmax=" + max.toPropertyForm());
if (1 + max.worddistance() - min.worddistance() == 0) System.out.println("min = " + min.toPropertyForm(true) + "\nmax=" + max.toPropertyForm(true));
//System.out.println("Normalize:\nentry = " + t.toPropertyForm(true));
//System.out.println("min = " + min.toPropertyForm(true));
//System.out.println("max = " + max.toPropertyForm(true));
t.entry.setCol(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount()));
t.entry.setCol(col_wordcount , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount()));
t.entry.setCol(col_phrasecount , (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount()));
@ -269,6 +272,7 @@ public class indexURLEntry implements Cloneable, indexEntry {
t.entry.setCol(col_worddistance , (t.worddistance() == 0) ? 0 : 1 + 255 * (t.worddistance() - min.worddistance()) / (1 + max.worddistance() - min.worddistance())); // FIXME: hier gibts ein division by zero, was nur sein kann wenn die Normalisierung nicht geklappt hat.
t.entry.setCol(col_lastModified , (t.lastModified() == 0) ? 0 : 1 + 255 * (t.lastModified() - min.lastModified()) / (1 + max.lastModified() - min.lastModified()));
t.entry.setCol(col_quality , (t.quality() == 0) ? 0 : 1 + 255 * (t.quality() - min.quality() ) / (1 + max.quality() - min.quality()));
//System.out.println("out = " + t.toPropertyForm(true));
}
public void normalize(indexEntry min, indexEntry max) {

@ -347,6 +347,11 @@ public class kelondroRow {
}
public long getColLong(int encoder, int offset, int length) {
// start - fix for badly stored parameters
if ((length >= 3) && (rowinstance[offset] == '[') && (rowinstance[offset + 1] == 'B') && (rowinstance[offset + 2] == '@')) return 0;
if ((length == 2) && (rowinstance[offset] == '[') && (rowinstance[offset + 1] == 'B')) return 0;
if ((length == 1) && (rowinstance[offset] == '[')) return 0;
// stop - fix for badly stored parameters
switch (encoder) {
case kelondroColumn.encoder_none:
throw new kelondroException("ROW", "getColLong has celltype none, no encoder given");
@ -375,23 +380,28 @@ public class kelondroRow {
return c;
}
public String toPropertyForm(boolean includeBraces, boolean decimalCardinal) {
public String toPropertyForm(boolean includeBraces, boolean decimalCardinal, boolean longname) {
serverByteBuffer bb = new serverByteBuffer();
if (includeBraces) bb.append('{');
for (int i = 0; i < row.length; i++) {
bb.append(row[i].nickname());
bb.append((longname) ? row[i].description() : row[i].nickname());
bb.append('=');
bb.append(rowinstance, colstart[i], row[i].cellwidth());
bb.append(',');
if ((row[i].celltype() == kelondroColumn.celltype_cardinal) && (decimalCardinal))
bb.append(Long.toString(getColLong(i)));
else
bb.append(rowinstance, colstart[i], row[i].cellwidth());
if (i < row.length - 1) {
bb.append(',');
if (longname) bb.append(' ');
}
}
if (bb.byteAt(bb.length() - 1) == ',') bb.deleteByteAt(bb.length() - 1); // remove ',' at end
if (includeBraces) bb.append('}');
//System.out.println("DEBUG-ROW " + bb.toString());
return bb.toString();
}
public String toString() {
return toPropertyForm(true, true);
return toPropertyForm(true, true, true);
}
}

@ -648,7 +648,7 @@ public final class plasmaCrawlLURL extends indexURL {
if (this.word != null) {
// append also word properties
corePropStr.append(",word=").append(kelondroBase64Order.enhancedCoder.encodeString(word.toPropertyForm()));
corePropStr.append(",word=").append(kelondroBase64Order.enhancedCoder.encodeString(word.toPropertyForm(false)));
}
return corePropStr;

@ -68,12 +68,14 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private indexContainer rcGlobal; // cache for results
private int rcGlobalCount;
private plasmaSearchTimingProfile profileLocal, profileGlobal;
private boolean postsort;
private yacySearch[] searchThreads;
public plasmaSearchEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchTimingProfile localTiming,
plasmaSearchTimingProfile remoteTiming,
boolean postsort,
serverLog log,
plasmaWordIndex wordIndex,
plasmaCrawlLURL urlStore,
@ -88,6 +90,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.rcGlobalCount = 0;
this.profileLocal = localTiming;
this.profileGlobal = remoteTiming;
this.postsort = postsort;
this.searchThreads = null;
}
@ -233,17 +236,21 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
indexEntry entry;
plasmaCrawlLURL.Entry page;
Long preranking;
Object[] preorderEntry;
int minEntries = profileLocal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT);
try {
while (preorder.hasNext()) {
//if ((acc.sizeFetched() >= 50) && ((acc.sizeFetched() >= minEntries) || (System.currentTimeMillis() >= postorderLimitTime))) break;
if (acc.sizeFetched() >= minEntries) break;
if (System.currentTimeMillis() >= postorderLimitTime) break;
entry = preorder.next();
preorderEntry = preorder.next();
entry = (indexEntry) preorderEntry[0];
preranking = (Long) preorderEntry[1];
// find the url entry
page = urlStore.load(entry.urlHash(), entry);
// add a result
if (page != null) acc.addResult(entry, page);
if (page != null) acc.addResult(page, preranking);
}
} catch (kelondroException ee) {
serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
@ -253,7 +260,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// start postsorting
profileLocal.startTimer();
acc.sortResults();
acc.sortResults(postsort);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_POSTSORT);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_POSTSORT, acc.sizeOrdered());
@ -285,17 +292,21 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
indexEntry entry;
plasmaCrawlLURL.Entry page;
Long preranking;
Object[] preorderEntry;
int minEntries = profileLocal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT);
try {
while (preorder.hasNext()) {
//if ((acc.sizeFetched() >= 50) && ((acc.sizeFetched() >= minEntries) || (System.currentTimeMillis() >= postorderLimitTime))) break;
if (acc.sizeFetched() >= minEntries) break;
if (System.currentTimeMillis() >= postorderLimitTime) break;
entry = preorder.next();
preorderEntry = preorder.next();
entry = (indexEntry) preorderEntry[0];
preranking = (Long) preorderEntry[1];
// find the url entry
page = urlStore.load(entry.urlHash(), entry);
// add a result
if (page != null) acc.addResult(entry, page);
if (page != null) acc.addResult(page, preranking);
}
} catch (kelondroException ee) {
serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
@ -305,7 +316,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// start postsorting
profileLocal.startTimer();
acc.sortResults();
acc.sortResults(postsort);
profileLocal.setYieldTime(plasmaSearchTimingProfile.PROCESS_POSTSORT);
profileLocal.setYieldCount(plasmaSearchTimingProfile.PROCESS_POSTSORT, acc.sizeOrdered());

@ -149,9 +149,11 @@ public final class plasmaSearchPreOrder {
return pageAcc.size() > 0;
}
public indexEntry next() {
Object top = pageAcc.lastKey();
return (indexEntry) pageAcc.remove(top);
public Object[] /*{indexEntry, Long}*/ next() {
String top = (String) pageAcc.lastKey();
//System.out.println("preorder-key: " + top);
Long preranking = new Long(Long.parseLong(top.substring(0, 16), 16));
return new Object[]{(indexEntry) pageAcc.remove(top), preranking};
}
public indexEntry[] getNormalizer() {

@ -178,7 +178,7 @@ public class plasmaSearchRankingProfile {
}
public long postRanking(
indexEntry normalizedEntry,
long preranking,
plasmaSearchQuery query,
Set topwords,
String[] urlcomps,
@ -186,7 +186,7 @@ public class plasmaSearchRankingProfile {
plasmaCrawlLURL.Entry page) {
// apply pre-calculated order attributes
long ranking = this.preRanking(normalizedEntry);
long ranking = preranking;
// prefer hit with 'prefer' pattern
if (page.url().toString().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue();

@ -55,13 +55,11 @@ import java.net.MalformedURLException;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.server.serverCodings;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
public final class plasmaSearchResult {
private indexEntry entryMin, entryMax;
private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry
private kelondroMScoreCluster ref; // reference score computation for the commonSense heuristic
private ArrayList results; // this is a buffer for plasmaWordIndexEntry + plasmaCrawlLURL.entry - objects
@ -78,8 +76,6 @@ public final class plasmaSearchResult {
this.ranking = ranking;
this.globalContributions = 0;
this.localContributions = 0;
this.entryMin = null;
this.entryMax = null;
}
public plasmaSearchResult cloneSmart() {
@ -105,14 +101,11 @@ public final class plasmaSearchResult {
public plasmaCrawlLURL.Entry nextElement() {
Object top = pageAcc.lastKey();
//System.out.println("postorder-key: " + ((String) top));
return (plasmaCrawlLURL.Entry) pageAcc.remove(top);
}
protected void addResult(indexEntry iEntry, plasmaCrawlLURL.Entry page) {
// make min/max for normalization
if (entryMin == null) entryMin = (indexEntry) iEntry.clone(); else entryMin.min(iEntry);
if (entryMax == null) entryMax = (indexEntry) iEntry.clone(); else entryMax.max(iEntry);
protected void addResult(plasmaCrawlLURL.Entry page, Long preranking) {
// take out relevant information for reference computation
URL url = page.url();
@ -122,15 +115,14 @@ public final class plasmaSearchResult {
String[] descrcomps = descr.toLowerCase().split(htmlFilterContentScraper.splitrex); // words in the description
// store everything
Object[] resultVector = new Object[] {iEntry, page, urlcomps, descrcomps};
results.add(resultVector);
results.add(new Object[] {page, urlcomps, descrcomps, preranking});
// add references
addScoreFiltered(urlcomps);
addScoreFiltered(descrcomps);
}
protected void sortResults() {
protected void sortResults(boolean postsort) {
// finally sort the results
// create a commonSense - set that represents a set of words that is
@ -140,28 +132,29 @@ public final class plasmaSearchResult {
for (int i = 0; i < references.length; i++) commonSense.add(references[i]);
Object[] resultVector;
indexEntry iEntry;
plasmaCrawlLURL.Entry page;
long ranking;
for (int i = 0; i < results.size(); i++) {
// take out values from result array
resultVector = (Object[]) results.get(i);
iEntry = (indexEntry) resultVector[0];
page = (plasmaCrawlLURL.Entry) resultVector[1];
page = (plasmaCrawlLURL.Entry) resultVector[0];
// calculate ranking
ranking = this.ranking.postRanking(
iEntry,
if (postsort)
ranking = this.ranking.postRanking(
((Long) resultVector[3]).longValue(),
query,
commonSense,
(String[]) resultVector[1],
(String[]) resultVector[2],
(String[]) resultVector[3],
page
);
else
ranking = ((Long) resultVector[3]).longValue();
// insert value
//System.out.println("Ranking " + ranking + ", YBR-" + plasmaSearchPreOrder.ybr(indexEntry.getUrlHash()) + " for URL " + page.url());
pageAcc.put(serverCodings.encodeHex(ranking, 16) + iEntry.urlHash(), page);
pageAcc.put(serverCodings.encodeHex(ranking, 16) + page.hash(), page);
}
// flush memory

@ -1979,7 +1979,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public serverObjects searchFromLocal(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchTimingProfile localTiming,
plasmaSearchTimingProfile remoteTiming) {
plasmaSearchTimingProfile remoteTiming,
boolean postsort) {
// tell all threads to do nothing for a specific time
intermissionAllThreads(2 * query.maximumTime);
@ -2002,7 +2003,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
//}
// create a new search event
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, log, wordIndex, urlPool.loadedURL, snippetCache);
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, postsort, log, wordIndex, urlPool.loadedURL, snippetCache);
plasmaSearchResult acc = theSearch.search();
// fetch snippets
@ -2083,6 +2084,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
prop.put("type_results_" + i + "_size", Long.toString(urlentry.size()));
prop.put("type_results_" + i + "_words", URLEncoder.encode(query.queryWords.toString(),"UTF-8"));
prop.put("type_results_" + i + "_former", formerSearch);
prop.put("type_results_" + i + "_rankingprops", urlentry.word().toPropertyForm(true));
// adding snippet if available
if (snippet.exists()) {
prop.put("type_results_" + i + "_snippet", 1);

@ -1008,7 +1008,7 @@ public final class yacyClient {
while (eenum.hasNext()) {
entry = (indexEntry) eenum.next();
entrypost.append(indexes[i].getWordHash())
.append(entry.toPropertyForm())
.append(entry.toPropertyForm(false))
.append(serverCore.crlfString);
indexcount++;
}

Loading…
Cancel
Save