From 365a3fff8e48a73f6c139f8a3c00e8a8449011e1 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 7 Feb 2006 23:16:46 +0000 Subject: [PATCH] fixings for ranking attributes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1569 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../plasma/plasmaSearchRankingProfile.java | 16 ++++++++-------- source/de/anomic/plasma/plasmaURL.java | 2 +- .../de/anomic/plasma/plasmaWordIndexEntry.java | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java index 97809a0af..8cc4a98b5 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java @@ -185,10 +185,10 @@ public class plasmaSearchRankingProfile { // apply 'common-sense' heuristic using references for (int j = 0; j < urlcomps.length; j++) { - if (topwords.contains(urlcomps[j])) ranking += 1 << ((Integer) coeff.get(URLCOMPINTOPLIST)).intValue(); + if (topwords.contains(urlcomps[j])) ranking += 256 << ((Integer) coeff.get(URLCOMPINTOPLIST)).intValue(); } for (int j = 0; j < descrcomps.length; j++) { - if (topwords.contains(descrcomps[j])) ranking += 1 << ((Integer) coeff.get(DESCRCOMPINTOPLIST)).intValue(); + if (topwords.contains(descrcomps[j])) ranking += 256 << ((Integer) coeff.get(DESCRCOMPINTOPLIST)).intValue(); } // apply query-in-result matching @@ -198,17 +198,17 @@ public class plasmaSearchRankingProfile { String queryhash; while (shi.hasNext()) { queryhash = (String) shi.next(); - if (urlcomph.contains(queryhash)) ranking += 1 << ((Integer) coeff.get(QUERYINURL)).intValue(); - if (descrcomph.contains(queryhash)) ranking += 1 << ((Integer) coeff.get(QUERYINDESCR)).intValue(); + if (urlcomph.contains(queryhash)) ranking += 256 << ((Integer) coeff.get(QUERYINURL)).intValue(); + if (descrcomph.contains(queryhash)) ranking += 256 << ((Integer) coeff.get(QUERYINDESCR)).intValue(); } // prefer short urls - ranking += (255 - page.url().toString().length()) << ((Integer) coeff.get(URLLENGTH)).intValue(); - ranking += (24 - urlcomps.length) << ((Integer) coeff.get(URLCOMPS)).intValue(); + ranking += (256 - page.url().toString().length()) << ((Integer) coeff.get(URLLENGTH)).intValue(); + ranking += (32 - urlcomps.length) << ((Integer) coeff.get(URLCOMPS)).intValue(); // prefer long descriptions - ranking += (40 - Math.abs(40 - Math.min(40, page.descr().length()))) << ((Integer) coeff.get(DESCRLENGTH)).intValue(); - ranking += (8 - Math.abs(8 - Math.min(8, descrcomps.length))) << ((Integer) coeff.get(DESCRCOMPS)).intValue(); + ranking += (255 * page.descr().length() / 80) << ((Integer) coeff.get(DESCRLENGTH)).intValue(); + ranking += (255 * (12 - Math.abs(12 - Math.min(12, descrcomps.length))) / 12) << ((Integer) coeff.get(DESCRCOMPS)).intValue(); return ranking; } diff --git a/source/de/anomic/plasma/plasmaURL.java b/source/de/anomic/plasma/plasmaURL.java index b26424eb9..650d47d9d 100644 --- a/source/de/anomic/plasma/plasmaURL.java +++ b/source/de/anomic/plasma/plasmaURL.java @@ -538,7 +538,7 @@ public class plasmaURL { public static final int domLengthEstimation(String urlHash) { // generates an estimation of the original domain length int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11)); - int domLengthKey = flagbyte & 4; + int domLengthKey = flagbyte & 3; switch (domLengthKey) { case 0: return 4; case 1: return 10; diff --git a/source/de/anomic/plasma/plasmaWordIndexEntry.java b/source/de/anomic/plasma/plasmaWordIndexEntry.java index fa215a6bc..5f5e127c2 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntry.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntry.java @@ -410,7 +410,7 @@ public final class plasmaWordIndexEntry implements Cloneable { public boolean isLocal() { return localflag == LT_LOCAL; } public int domlengthNormalized() { - return 255 * plasmaURL.domLengthEstimation(this.urlHash) / 20; + return 255 * plasmaURL.domLengthEstimation(this.urlHash) / 30; } public static void main(String[] args) {