introduced new 'authority' ranking property

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4265 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent a1b80017e0
commit 9e23acf2d6

@ -137,7 +137,7 @@
<tr class="TableHeader">
<td colspan="3">Resource</td>
<td colspan="8">Negative Ranking Factors</td>
<td colspan="7">Positive Ranking Factors</td>
<td colspan="8">Positive Ranking Factors</td>
<td rowspan="2">Reverse Normalized Weighted Ranking Sum</td>
</tr>
<tr class="TableHeader">
@ -152,6 +152,7 @@
<td>pos of phrase</td>
<td>pos in phrase</td>
<td>word distance</td>
<td>authority</td>
<td>date</td>
<td>words in title</td>
<td>words in text</td>
@ -180,6 +181,7 @@
<td class="TableCellDark">#[phrase]#</td>
<td class="TableCellDark">#[posinphrase]#</td>
<td class="TableCellDark">#[worddistance]#</td>
<td>#[authority]#</td>
<td>#[date]#</td>
<td>#[wordsintitle]#</td>
<td>#[wordsintext]#</td>

@ -419,6 +419,7 @@ public class IndexControlRWIs_p {
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ranking", (entry.ranking() - rn));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", yacyURL.domLengthEstimation(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", plasmaSearchRankingProcess.ybr(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", ranked.getOrder().authority(entry.hash()));
prop.put("genUrlList_urlList_"+i+"_urlExists_date", serverDate.shortDayTime(new Date(entry.word().lastModified())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());

@ -50,6 +50,7 @@ public class Ranking_p {
rankingParameters.put(plasmaSearchRankingProfile.APPREF, "Appearance In Reference");
rankingParameters.put(plasmaSearchRankingProfile.APPTAGS, "Appearance In Tags");
rankingParameters.put(plasmaSearchRankingProfile.APPURL, "Appearance In URL");
rankingParameters.put(plasmaSearchRankingProfile.AUTHORITY, "Authority of Domain");
rankingParameters.put(plasmaSearchRankingProfile.CATHASAPP, "Category App, Appearance");
rankingParameters.put(plasmaSearchRankingProfile.CATHASAUDIO, "Category Audio Appearance");
rankingParameters.put(plasmaSearchRankingProfile.CATHASIMAGE, "Category Image Appearance");

@ -26,8 +26,13 @@
package de.anomic.index;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import de.anomic.kelondro.kelondroAbstractOrder;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaSearchRankingProcess;
@ -37,6 +42,8 @@ import de.anomic.yacy.yacyURL;
public class indexRWIEntryOrder extends kelondroAbstractOrder implements kelondroOrder {
private indexRWIVarEntry min, max;
private plasmaSearchRankingProfile ranking;
private kelondroMScoreCluster doms;
private int maxdomcount;
private static final int processors = Runtime.getRuntime().availableProcessors(); // for multiprocessor support, used during normalization
@ -44,6 +51,8 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder implements kelondr
this.min = null;
this.max = null;
this.ranking = profile;
this.doms = new kelondroMScoreCluster();
this.maxdomcount = 0;
}
public void extend(indexContainer container) {
@ -59,9 +68,20 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder implements kelondr
mmf1.run(); // execute other fork in this thread
if (this.min == null) this.min = mmf1.entryMin; else indexRWIVarEntry.min(this.min, mmf1.entryMin);
if (this.max == null) this.max = mmf1.entryMax; else indexRWIVarEntry.max(this.max, mmf1.entryMax);
Map.Entry entry;
Iterator di = mmf1.domcount().entrySet().iterator();
while (di.hasNext()) {
entry = (Map.Entry) di.next();
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
}
try {mmf0.join();} catch (InterruptedException e) {} // wait for fork thread to finish
if (this.min == null) this.min = mmf0.entryMin; else indexRWIVarEntry.min(this.min, mmf0.entryMin);
if (this.max == null) this.max = mmf0.entryMax; else indexRWIVarEntry.max(this.max, mmf0.entryMax);
di = mmf0.domcount().entrySet().iterator();
while (di.hasNext()) {
entry = (Map.Entry) di.next();
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
}
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
//System.out.println("***DEBUG*** indexRWIEntry.Order (2-THREADED): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond");
} else if (container.size() > 0) {
@ -70,15 +90,26 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder implements kelondr
mmf.run(); // execute without multi-threading
if (this.min == null) this.min = mmf.entryMin; else indexRWIVarEntry.min(this.min, mmf.entryMin);
if (this.max == null) this.max = mmf.entryMax; else indexRWIVarEntry.max(this.max, mmf.entryMax);
Map.Entry entry;
Iterator di = mmf.domcount().entrySet().iterator();
while (di.hasNext()) {
entry = (Map.Entry) di.next();
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
}
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
//System.out.println("***DEBUG*** indexRWIEntry.Order (ONETHREAD): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond");
}
if (this.doms.size() > 0) this.maxdomcount = this.doms.getMaxScore();
}
public Object clone() {
return null;
}
public int authority(String urlHash) {
return (doms.getScore(urlHash.substring(6)) << 8) / (1 + this.maxdomcount);
}
public long cardinal(byte[] key) {
return cardinal(new indexRWIRowEntry(key));
}
@ -95,14 +126,15 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder implements kelondr
+ ((t.posintext() == 0) ? 0 : ((256 - (((t.posintext() - min.posintext() ) << 8) / (1 + max.posintext() - min.posintext()) )) << ranking.coeff_posintext))
+ ((t.posofphrase() == 0) ? 0 : ((256 - (((t.posofphrase() - min.posofphrase() ) << 8) / (1 + max.posofphrase() - min.posofphrase()) )) << ranking.coeff_posofphrase))
+ ((t.posinphrase() == 0) ? 0 : ((256 - (((t.posinphrase() - min.posinphrase() ) << 8) / (1 + max.posinphrase() - min.posinphrase()) )) << ranking.coeff_posinphrase))
+ ((256 - (((t.worddistance() - min.worddistance() ) << 8) / (1 + max.worddistance() - min.worddistance()) )) << ranking.coeff_worddistance)
+ ( (((t.virtualAge() - min.virtualAge() ) << 8) / (1 + max.virtualAge() - min.virtualAge()) ) << ranking.coeff_date)
+ ( (((t.wordsintitle() - min.wordsintitle() ) << 8) / (1 + max.wordsintitle() - min.wordsintitle()) ) << ranking.coeff_wordsintitle)
+ ( (((t.wordsintext() - min.wordsintext() ) << 8) / (1 + max.wordsintext() - min.wordsintext()) ) << ranking.coeff_wordsintext)
+ ( (((t.phrasesintext()- min.phrasesintext()) << 8) / (1 + max.phrasesintext()- min.phrasesintext()) ) << ranking.coeff_phrasesintext)
+ ( (((t.llocal() - min.llocal() ) << 8) / (1 + max.llocal() - min.llocal()) ) << ranking.coeff_llocal)
+ ( (((t.lother() - min.lother() ) << 8) / (1 + max.lother() - min.lother()) ) << ranking.coeff_lother)
+ ( (((t.hitcount() - min.hitcount() ) << 8) / (1 + max.hitcount() - min.hitcount()) ) << ranking.coeff_hitcount)
+ ((256 - (((t.worddistance() - min.worddistance() ) << 8) / (1 + max.worddistance() - min.worddistance()) )) << ranking.coeff_worddistance)
+ ( (((t.virtualAge() - min.virtualAge() ) << 8) / (1 + max.virtualAge() - min.virtualAge()) ) << ranking.coeff_date)
+ ( (((t.wordsintitle() - min.wordsintitle() ) << 8) / (1 + max.wordsintitle() - min.wordsintitle()) ) << ranking.coeff_wordsintitle)
+ ( (((t.wordsintext() - min.wordsintext() ) << 8) / (1 + max.wordsintext() - min.wordsintext()) ) << ranking.coeff_wordsintext)
+ ( (((t.phrasesintext()- min.phrasesintext() ) << 8) / (1 + max.phrasesintext()- min.phrasesintext()) ) << ranking.coeff_phrasesintext)
+ ( (((t.llocal() - min.llocal() ) << 8) / (1 + max.llocal() - min.llocal()) ) << ranking.coeff_llocal)
+ ( (((t.lother() - min.lother() ) << 8) / (1 + max.lother() - min.lother()) ) << ranking.coeff_lother)
+ ( (((t.hitcount() - min.hitcount() ) << 8) / (1 + max.hitcount() - min.hitcount()) ) << ranking.coeff_hitcount)
+ ( authority(t.urlHash()) << ranking.coeff_authority)
+ (((flags.get(indexRWIEntry.flag_app_url)) ? 255 << ranking.coeff_appurl : 0))
+ (((flags.get(indexRWIEntry.flag_app_descr)) ? 255 << ranking.coeff_appdescr : 0))
+ (((flags.get(indexRWIEntry.flag_app_author)) ? 255 << ranking.coeff_appauthor : 0))
@ -157,11 +189,15 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder implements kelondr
private indexRWIVarEntry entryMin, entryMax;
private indexContainer container;
private int start, end;
private HashMap doms;
private Integer int1;
public minmaxfinder(indexContainer container, int start /*including*/, int end /*excluding*/) {
this.container = container;
this.start = start;
this.end = end;
this.doms = new HashMap();
this.int1 = new Integer(1);
}
public void run() {
@ -170,12 +206,27 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder implements kelondr
this.entryMax = null;
indexRWIRowEntry iEntry;
int p = this.start;
String dom;
Integer count;
while (p < this.end) {
iEntry = new indexRWIRowEntry(container.get(p++));
// find min/max
if (this.entryMin == null) this.entryMin = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.min(this.entryMin, iEntry);
if (this.entryMax == null) this.entryMax = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.max(this.entryMax, iEntry);
// update domcount
dom = iEntry.urlHash().substring(6);
count = (Integer) doms.get(dom);
if (count == null) {
doms.put(dom, int1);
} else {
doms.put(dom, new Integer(count.intValue() + 1));
}
}
}
public HashMap domcount() {
return this.doms;
}
}
}

@ -62,6 +62,7 @@ public class plasmaSearchRankingProfile {
public static final String POSINTEXT = "posintext";
public static final String POSOFPHRASE = "posofphrase";
public static final String POSINPHRASE = "posinphrase";
public static final String AUTHORITY = "authority";
public static final String WORDDISTANCE = "worddistance";
public static final String APPURL = "appurl";
public static final String APPDESCR = "appdescr";
@ -83,7 +84,7 @@ public class plasmaSearchRankingProfile {
public int
coeff_domlength, coeff_ybr, coeff_date, coeff_wordsintitle, coeff_wordsintext, coeff_phrasesintext,
coeff_llocal, coeff_lother, coeff_urllength, coeff_urlcomps, coeff_hitcount,
coeff_posintext, coeff_posofphrase, coeff_posinphrase, coeff_worddistance,
coeff_posintext, coeff_posofphrase, coeff_posinphrase, coeff_authority, coeff_worddistance,
coeff_appurl, coeff_appdescr, coeff_appauthor, coeff_apptags, coeff_appref, coeff_appemph,
coeff_catindexof, coeff_cathasimage, coeff_cathasaudio, coeff_cathasvideo, coeff_cathasapp,
coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer;
@ -91,34 +92,35 @@ public class plasmaSearchRankingProfile {
public plasmaSearchRankingProfile(int mediatype) {
// set default-values
coeff_domlength = 8;
coeff_ybr = 8;
coeff_ybr = 9;
coeff_date = 4;
coeff_wordsintitle = 4;
coeff_wordsintext = 1;
coeff_phrasesintext = 1;
coeff_wordsintext = 2;
coeff_phrasesintext = 3;
coeff_llocal = 2;
coeff_lother = 3;
coeff_urllength = 14;
coeff_urlcomps = 14;
coeff_hitcount = 5;
coeff_posintext = 7;
coeff_posofphrase = 6;
coeff_urllength = 15;
coeff_urlcomps = 15;
coeff_hitcount = 4;
coeff_posintext = 11;
coeff_posofphrase = 9;
coeff_posinphrase = 1;
coeff_authority = 13;
coeff_worddistance = 15;
coeff_appurl = 14;
coeff_appdescr = 13;
coeff_appdescr = 12;
coeff_appauthor = 13;
coeff_apptags = 8;
coeff_appref = 9;
coeff_appemph = 13;
coeff_appref = 8;
coeff_appemph = 12;
coeff_urlcompintoplist = 3;
coeff_descrcompintoplist = 2;
coeff_prefer = 15;
coeff_catindexof = (mediatype == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 10;
coeff_cathasimage = (mediatype == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 15 : 1;
coeff_cathasaudio = (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 15 : 1;
coeff_cathasvideo = (mediatype == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 15 : 1;
coeff_cathasapp = (mediatype == plasmaSearchQuery.CONTENTDOM_APP) ? 15 : 1;
coeff_catindexof = (mediatype == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : 15;
coeff_cathasimage = (mediatype == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 15 : 0;
coeff_cathasaudio = (mediatype == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 15 : 0;
coeff_cathasvideo = (mediatype == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 15 : 0;
coeff_cathasapp = (mediatype == plasmaSearchQuery.CONTENTDOM_APP) ? 15 : 0;
}
public plasmaSearchRankingProfile(String prefix, String profile) {
@ -153,6 +155,7 @@ public class plasmaSearchRankingProfile {
coeff_posintext = parseMap(coeff, POSINTEXT, coeff_posintext);
coeff_posofphrase = parseMap(coeff, POSOFPHRASE, coeff_posofphrase);
coeff_posinphrase = parseMap(coeff, POSINPHRASE, coeff_posinphrase);
coeff_authority = parseMap(coeff, AUTHORITY, coeff_authority);
coeff_worddistance = parseMap(coeff, WORDDISTANCE, coeff_worddistance);
coeff_appurl = parseMap(coeff, APPURL, coeff_appurl);
coeff_appdescr = parseMap(coeff, APPDESCR, coeff_appdescr);
@ -207,6 +210,7 @@ public class plasmaSearchRankingProfile {
ext.put(prefix + POSINTEXT, Integer.toString(coeff_posintext));
ext.put(prefix + POSOFPHRASE, Integer.toString(coeff_posofphrase));
ext.put(prefix + POSINPHRASE, Integer.toString(coeff_posinphrase));
ext.put(prefix + AUTHORITY, Integer.toString(coeff_authority));
ext.put(prefix + WORDDISTANCE, Integer.toString(coeff_worddistance));
ext.put(prefix + APPURL, Integer.toString(coeff_appurl));
ext.put(prefix + APPDESCR, Integer.toString(coeff_appdescr));

Loading…
Cancel
Save