Resource: |
diff --git a/htroot/index.java b/htroot/index.java
index 0bc75fb1e..774620fe7 100644
--- a/htroot/index.java
+++ b/htroot/index.java
@@ -34,7 +34,6 @@ import java.util.HashMap;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
-import de.anomic.plasma.plasmaSearchPreOrder;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
@@ -106,12 +105,6 @@ public class index {
prop.put("searchoptions_count-50", (count == 50) ? 1 : 0);
prop.put("searchoptions_count-100", (count == 100) ? 1 : 0);
prop.put("searchoptions_count-1000", (count == 1000) ? 1 : 0);
- prop.put("searchoptions_order-ybr-date-quality", plasmaSearchPreOrder.canUseYBR() ? 1 : 0);
- prop.put("searchoptions_order-ybr-quality-date", 0);
- prop.put("searchoptions_order-date-ybr-quality", 0);
- prop.put("searchoptions_order-quality-ybr-date", 0);
- prop.put("searchoptions_order-date-quality-ybr", plasmaSearchPreOrder.canUseYBR() ? 0 : 1);
- prop.put("searchoptions_order-quality-date-ybr", 0);
prop.put("searchoptions_resource-global", ((global) ? 1 : 0));
prop.put("searchoptions_resource-local", ((global) ? 0 : 1));
prop.put("searchoptions_time-1", (time == 1) ? 1 : 0);
diff --git a/htroot/xml/snippet.java b/htroot/xml/snippet.java
index b1bfbcacb..410226c7f 100644
--- a/htroot/xml/snippet.java
+++ b/htroot/xml/snippet.java
@@ -70,7 +70,7 @@ public class snippet {
prop.put("links", 0);
} else {
// attach media information
- ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, true, 1000);
+ ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, media, true, 1000);
plasmaSnippetCache.MediaSnippet ms;
for (int i = 0; i < mediaSnippets.size(); i++) {
ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i);
@@ -79,7 +79,7 @@ public class snippet {
prop.put("link_" + i + "_name", ms.name);
prop.put("link_" + i + "_attr", ms.attr);
}
- System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString);
+ //System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString);
prop.put("text", "");
prop.put("link", mediaSnippets.size());
prop.put("links", mediaSnippets.size());
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index 2eff2161f..c11ec4477 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -70,6 +70,7 @@ import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyDHTAction;
import de.anomic.yacy.yacySeed;
+import de.anomic.tools.crypt;
public final class search {
@@ -95,6 +96,8 @@ public final class search {
final String prefer = post.get("prefer", "");
final String contentdom = post.get("contentdom", "text");
final String filter = post.get("filter", ".*");
+ String profile = post.get("profile", ""); // remote profile hand-over
+ if (profile.length() > 0) profile = crypt.simpleDecode(profile, null);
final boolean includesnippet = post.get("includesnippet", "false").equals("true");
final kelondroBitfield constraint = new kelondroBitfield(4, post.get("constraint", "______"));
// final boolean global = ((String) post.get("resource", "global")).equals("global"); // if true, then result may consist of answers from other peers
@@ -140,7 +143,7 @@ public final class search {
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + squery.anonymizedQueryHashes() + " - " + squery.wantedResults + " links");
// prepare a search profile
- plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchRankingProfile.ORDER_YBR, plasmaSearchRankingProfile.ORDER_DATE, plasmaSearchRankingProfile.ORDER_QUALITY});
+ plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile);
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults);
plasmaSearchTimingProfile remoteTiming = null;
@@ -167,7 +170,7 @@ public final class search {
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + squery.anonymizedQueryHashes() + " - " + squery.wantedResults + " links");
// prepare a search profile
- plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchRankingProfile.ORDER_YBR, plasmaSearchRankingProfile.ORDER_DATE, plasmaSearchRankingProfile.ORDER_QUALITY});
+ plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile);
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults);
plasmaSearchTimingProfile remoteTiming = null;
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 8dd045f48..40e24e379 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -170,122 +170,105 @@ public class yacysearch {
if (!indexDistributeGranted || !indexReceiveGranted) { global = false; }
// find search domain
- int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
- String cds = post.get("contentdom", "text");
- if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT;
- if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO;
- if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO;
- if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE;
- if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP;
+ int contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT;
+ String contentdomString = post.get("contentdom", "text");
+ if (contentdomString.equals("text")) contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT;
+ if (contentdomString.equals("audio")) contentdomCode = plasmaSearchQuery.CONTENTDOM_AUDIO;
+ if (contentdomString.equals("video")) contentdomCode = plasmaSearchQuery.CONTENTDOM_VIDEO;
+ if (contentdomString.equals("image")) contentdomCode = plasmaSearchQuery.CONTENTDOM_IMAGE;
+ if (contentdomString.equals("app")) contentdomCode = plasmaSearchQuery.CONTENTDOM_APP;
// patch until better search profiles are available
- if ((contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 30;
+ if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 30;
serverObjects prop = new serverObjects();
if (post.get("cat", "href").equals("href")) {
- final TreeSet query = plasmaSearchQuery.cleanQuery(querystring);
- // filter out stopwords
- final TreeSet filtered = kelondroMSetTools.joinConstructive(query,
- plasmaSwitchboard.stopwords);
- if (filtered.size() > 0) {
- kelondroMSetTools.excludeDestructive(query, plasmaSwitchboard.stopwords);
- }
+ final TreeSet query = plasmaSearchQuery.cleanQuery(querystring);
+ // filter out stopwords
+ final TreeSet filtered = kelondroMSetTools.joinConstructive(query, plasmaSwitchboard.stopwords);
+ if (filtered.size() > 0) {
+ kelondroMSetTools.excludeDestructive(query, plasmaSwitchboard.stopwords);
+ }
- // if a minus-button was hit, remove a special reference first
- if (post.containsKey("deleteref")) {
- if (!sb.verifyAuthentication(header, true)) {
- prop.put("AUTHENTICATE", "admin log-in"); // force log-in
- return prop;
- }
-
- // delete the index entry locally
- final String delHash = post.get("deleteref", ""); // urlhash
- sb.wordIndex.removeReferences(query, delHash);
-
- // make new news message with negative voting
- HashMap map = new HashMap();
- map.put("urlhash", delHash);
- map.put("vote", "negative");
- map.put("refid", "");
- yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippavt", map));
+ // if a minus-button was hit, remove a special reference first
+ if (post.containsKey("deleteref")) {
+ if (!sb.verifyAuthentication(header, true)) {
+ prop.put("AUTHENTICATE", "admin log-in"); // force log-in
+ return prop;
}
+
+ // delete the index entry locally
+ final String delHash = post.get("deleteref", ""); // urlhash
+ sb.wordIndex.removeReferences(query, delHash);
+
+ // make new news message with negative voting
+ HashMap map = new HashMap();
+ map.put("urlhash", delHash);
+ map.put("vote", "negative");
+ map.put("refid", "");
+ yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippavt", map));
+ }
- // if aplus-button was hit, create new voting message
- if (post.containsKey("recommendref")) {
- if (!sb.verifyAuthentication(header, true)) {
- prop.put("AUTHENTICATE", "admin log-in"); // force log-in
- return prop;
- }
- final String recommendHash = post.get("recommendref", ""); // urlhash
- indexURLEntry urlentry = sb.wordIndex.loadedURL.load(recommendHash, null);
- if (urlentry != null) {
- indexURLEntry.Components comp = urlentry.comp();
- plasmaParserDocument document;
- document = sb.snippetCache.retrieveDocument(comp.url(), true, 5000, true);
- if (document != null) {
- // create a news message
- HashMap map = new HashMap();
- map.put("url", comp.url().toNormalform().replace(',', '|'));
- map.put("title", comp.descr().replace(',', ' '));
- map.put("description", ((document == null) ? comp.descr() : document.getMainLongTitle()).replace(',', ' '));
- map.put("tags", ((document == null) ? "" : document.getKeywords(' ')));
- yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map));
- document.close();
- }
+ // if aplus-button was hit, create new voting message
+ if (post.containsKey("recommendref")) {
+ if (!sb.verifyAuthentication(header, true)) {
+ prop.put("AUTHENTICATE", "admin log-in"); // force log-in
+ return prop;
+ }
+ final String recommendHash = post.get("recommendref", ""); // urlhash
+ indexURLEntry urlentry = sb.wordIndex.loadedURL.load(recommendHash, null);
+ if (urlentry != null) {
+ indexURLEntry.Components comp = urlentry.comp();
+ plasmaParserDocument document;
+ document = sb.snippetCache.retrieveDocument(comp.url(), true, 5000, true);
+ if (document != null) {
+ // create a news message
+ HashMap map = new HashMap();
+ map.put("url", comp.url().toNormalform().replace(',', '|'));
+ map.put("title", comp.descr().replace(',', ' '));
+ map.put("description", ((document == null) ? comp.descr() : document.getMainLongTitle()).replace(',', ' '));
+ map.put("tags", ((document == null) ? "" : document.getKeywords(' ')));
+ yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map));
+ document.close();
}
}
+ }
- // prepare search order
- final boolean yacyonline = ((yacyCore.seedDB != null) && (yacyCore.seedDB.mySeed != null) && (yacyCore.seedDB.mySeed.getAddress() != null));
-
- String order1 = plasmaSearchRankingProfile.ORDER_DATE;
- String order2 = plasmaSearchRankingProfile.ORDER_YBR;
- String order3 = plasmaSearchRankingProfile.ORDER_QUALITY;
- if (order.startsWith("YBR")) order1 = plasmaSearchRankingProfile.ORDER_YBR;
- if (order.startsWith("Date")) order1 = plasmaSearchRankingProfile.ORDER_DATE;
- if (order.startsWith("Quality")) order1 = plasmaSearchRankingProfile.ORDER_QUALITY;
- if (order.indexOf("-YBR-") > 0) order2 = plasmaSearchRankingProfile.ORDER_YBR;
- if (order.indexOf("-Date-") > 0) order2 = plasmaSearchRankingProfile.ORDER_DATE;
- if (order.indexOf("-Quality-") > 0) order2 = plasmaSearchRankingProfile.ORDER_QUALITY;
- if (order.endsWith("YBR")) order3 = plasmaSearchRankingProfile.ORDER_YBR;
- if (order.endsWith("Date")) order3 = plasmaSearchRankingProfile.ORDER_DATE;
- if (order.endsWith("Quality")) order3 = plasmaSearchRankingProfile.ORDER_QUALITY;
-
- // do the search
- plasmaSearchQuery thisSearch = new plasmaSearchQuery(
+ // prepare search properties
+ final boolean yacyonline = ((yacyCore.seedDB != null) && (yacyCore.seedDB.mySeed != null) && (yacyCore.seedDB.mySeed.getAddress() != null));
+ final boolean samesearch = env.getConfig("last-search", "").equals(querystring + contentdomString);
+ final boolean globalsearch = (global) && (yacyonline) && (!samesearch);
+
+ // do the search
+ plasmaSearchQuery thisSearch = new plasmaSearchQuery(
query,
maxDistance,
prefermask,
- contentdom,
+ contentdomCode,
count,
searchtime,
urlmask,
- ((global) && (yacyonline) && (!(env.getConfig(
- "last-search", "").equals(querystring)))) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT
- : plasmaSearchQuery.SEARCHDOM_LOCAL, "", 20, constraint);
- plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile( new String[] { order1, order2, order3 });
- plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
- plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
- prop = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true);
-
- /*
- * final serverObjects prop = sb.searchFromLocal(query, order1,
- * order2, count, ((global) && (yacyonline) &&
- * (!(env.getConfig("last-search","").equals(querystring)))),
- * searchtime, urlmask);
- */
- // remember the last search expression
- env.setConfig("last-search", querystring);
-
- // process result of search
- prop.put("type_resultbottomline", 0);
- if (filtered.size() > 0) {
- prop.put("excluded", 1);
- prop.put("excluded_stopwords", filtered.toString());
- } else {
- prop.put("excluded", 0);
- }
+ (globalsearch) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT : plasmaSearchQuery.SEARCHDOM_LOCAL,
+ "",
+ 20,
+ constraint);
+ plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile(contentdomString);
+ plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
+ plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
+ prop = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true);
+
+ // remember the last search expression
+ env.setConfig("last-search", querystring + contentdomString);
+
+ // process result of search
+ prop.put("type_resultbottomline", 0);
+ if (filtered.size() > 0) {
+ prop.put("excluded", 1);
+ prop.put("excluded_stopwords", filtered.toString());
+ } else {
+ prop.put("excluded", 0);
+ }
if (prop == null || prop.size() == 0) {
if (post.get("search", "").length() < 3) {
@@ -364,7 +347,7 @@ public class yacysearch {
}
prop.put("type", (thisSearch.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : ((thisSearch.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 2 : 1));
- if (prop.getInt("type", 0) == 1) prop.put("type_mediatype", cds);
+ if (prop.getInt("type", 0) == 1) prop.put("type_mediatype", contentdomString);
prop.put("cat", "href");
prop.put("depth", "0");
@@ -418,12 +401,12 @@ public class yacysearch {
prop.put("display", display);
prop.put("indexof", (indexof) ? "on" : "off");
prop.put("constraint", constraint.exportB64());
- prop.put("contentdom", cds);
- prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
- prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
- prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
- prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
- prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);
+ prop.put("contentdom", contentdomString);
+ prop.put("contentdomCheckText", (contentdomCode == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0);
+ prop.put("contentdomCheckAudio", (contentdomCode == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0);
+ prop.put("contentdomCheckVideo", (contentdomCode == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0);
+ prop.put("contentdomCheckImage", (contentdomCode == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0);
+ prop.put("contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0);
// return rewrite properties
return prop;
diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java
index a26ae69b9..989fa7df4 100644
--- a/source/de/anomic/http/httpdFileHandler.java
+++ b/source/de/anomic/http/httpdFileHandler.java
@@ -552,8 +552,8 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
String mimeType = mimeTable.getProperty(targetExt, "text/html");
// generate an byte array from the generated image
- int width = i.getWidth(null);
- int height = i.getHeight(null);
+ int width = i.getWidth(null); if (width < 0) width = 96; // bad hack
+ int height = i.getHeight(null); if (height < 0) height = 96; // bad hack
BufferedImage bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
bi.createGraphics().drawImage(i, 0, 0, width, height, null);
serverByteBuffer baos = new serverByteBuffer();
diff --git a/source/de/anomic/index/indexRWIEntry.java b/source/de/anomic/index/indexRWIEntry.java
index 8c39fb835..003b0379c 100644
--- a/source/de/anomic/index/indexRWIEntry.java
+++ b/source/de/anomic/index/indexRWIEntry.java
@@ -43,11 +43,16 @@ public interface indexRWIEntry {
public int posintext();
public int posinphrase();
public int posofphrase();
- public int wordcount();
- public int phrasecount();
+ public int wordsintext();
+ public int phrasesintext();
public String getLanguage();
public char getType();
public kelondroBitfield flags();
+ public int wordsintitle();
+ public int llocal();
+ public int lother();
+ public int urllength();
+ public int urlcomps();
public void combineDistance(indexRWIEntry oe);
public int worddistance();
diff --git a/source/de/anomic/index/indexRWIEntryNew.java b/source/de/anomic/index/indexRWIEntryNew.java
index 5894b242c..17c6c633f 100644
--- a/source/de/anomic/index/indexRWIEntryNew.java
+++ b/source/de/anomic/index/indexRWIEntryNew.java
@@ -160,8 +160,8 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
this.entry.setCol(col_lastModified, mddlm);
this.entry.setCol(col_freshUntil, 0);
this.entry.setCol(col_wordsInTitle, 20); // guessed
- this.entry.setCol(col_wordsInText, oldEntry.wordcount());
- this.entry.setCol(col_phrasesInText, oldEntry.phrasecount());
+ this.entry.setCol(col_wordsInText, oldEntry.wordsintext());
+ this.entry.setCol(col_phrasesInText, oldEntry.phrasesintext());
this.entry.setCol(col_doctype, new byte[]{(byte) oldEntry.doctype()});
this.entry.setCol(col_language, (oldEntry.getLanguage() == null) ? "en" : oldEntry.getLanguage(), null);
this.entry.setCol(col_llocal, 0);
@@ -231,6 +231,10 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
public long lastModified() {
return plasmaWordIndex.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified));
}
+
+ public long freshUntil() {
+ return plasmaWordIndex.reverseMicroDateDays((int) this.entry.getColLong(col_freshUntil));
+ }
public int hitcount() {
return (int) this.entry.getColLong(col_hitcount);
@@ -248,11 +252,11 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
return (int) this.entry.getColLong(col_posofphrase);
}
- public int wordcount() {
+ public int wordsintext() {
return (int) this.entry.getColLong(col_wordsInText);
}
- public int phrasecount() {
+ public int phrasesintext() {
return (int) this.entry.getColLong(col_phrasesInText);
}
@@ -264,6 +268,26 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
return (char) this.entry.getColByte(col_doctype);
}
+ public int wordsintitle() {
+ return (int) this.entry.getColLong(col_wordsInTitle);
+ }
+
+ public int llocal() {
+ return (int) this.entry.getColLong(col_llocal);
+ }
+
+ public int lother() {
+ return (int) this.entry.getColLong(col_lother);
+ }
+
+ public int urllength() {
+ return (int) this.entry.getColLong(col_urlLength);
+ }
+
+ public int urlcomps() {
+ return (int) this.entry.getColLong(col_urlComps);
+ }
+
public kelondroBitfield flags() {
return new kelondroBitfield(this.entry.getColBytes(col_flags));
}
@@ -278,7 +302,7 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext()));
ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/);
ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase()));
- ie1.entry.setCol(col_wordsInText, (ie1.wordcount() + ie2.wordcount()) / 2);
+ ie1.entry.setCol(col_wordsInText, (ie1.wordsintext() + ie2.wordsintext()) / 2);
return ie1;
}
@@ -292,24 +316,30 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
public static final void min(indexRWIEntryNew t, indexRWIEntry other) {
if (t.hitcount() > other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount());
- if (t.wordcount() > other.wordcount()) t.entry.setCol(col_wordsInText, other.wordcount());
- if (t.phrasecount() > other.phrasecount()) t.entry.setCol(col_phrasesInText, other.phrasecount());
+ if (t.wordsintext() > other.wordsintext()) t.entry.setCol(col_wordsInText, other.wordsintext());
+ if (t.phrasesintext() > other.phrasesintext()) t.entry.setCol(col_phrasesInText, other.phrasesintext());
if (t.posintext() > other.posintext()) t.entry.setCol(col_posintext, other.posintext());
if (t.posinphrase() > other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase());
if (t.posofphrase() > other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase());
if (t.worddistance() > other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance());
if (t.lastModified() > other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified());
+ if (t.urllength() > other.urllength()) t.entry.setCol(col_urlLength, other.urllength());
+ if (t.urlcomps() > other.urlcomps()) t.entry.setCol(col_urlComps, other.urlcomps());
+ if (t.wordsintitle() > other.wordsintitle() ) t.entry.setCol(col_wordsInTitle, other.wordsintitle());
}
public static final void max(indexRWIEntryNew t, indexRWIEntry other) {
if (t.hitcount() < other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount());
- if (t.wordcount() < other.wordcount()) t.entry.setCol(col_wordsInText, other.wordcount());
- if (t.phrasecount() < other.phrasecount()) t.entry.setCol(col_phrasesInText, other.phrasecount());
+ if (t.wordsintext() < other.wordsintext()) t.entry.setCol(col_wordsInText, other.wordsintext());
+ if (t.phrasesintext() < other.phrasesintext()) t.entry.setCol(col_phrasesInText, other.phrasesintext());
if (t.posintext() < other.posintext()) t.entry.setCol(col_posintext, other.posintext());
if (t.posinphrase() < other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase());
if (t.posofphrase() < other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase());
if (t.worddistance() < other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance());
if (t.lastModified() < other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified());
+ if (t.urllength() < other.urllength()) t.entry.setCol(col_urlLength, other.urllength());
+ if (t.urlcomps() < other.urlcomps()) t.entry.setCol(col_urlComps, other.urlcomps());
+ if (t.wordsintitle() < other.wordsintitle() ) t.entry.setCol(col_wordsInTitle, other.wordsintitle());
}
@@ -330,13 +360,17 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
//System.out.println("min = " + min.toPropertyForm(true));
//System.out.println("max = " + max.toPropertyForm(true));
t.entry.setCol(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount()));
- t.entry.setCol(col_wordsInText , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount()));
- t.entry.setCol(col_phrasesInText, (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount()));
+ t.entry.setCol(col_wordsInText , (t.wordsintext() == 0) ? 0 : 1 + 255 * (t.wordsintext() - min.wordsintext() ) / (1 + max.wordsintext() - min.wordsintext()));
+ t.entry.setCol(col_phrasesInText, (t.phrasesintext() == 0) ? 0 : 1 + 255 * (t.phrasesintext() - min.phrasesintext() ) / (1 + max.phrasesintext() - min.phrasesintext()));
t.entry.setCol(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext()));
t.entry.setCol(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase()));
t.entry.setCol(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase()));
t.entry.setCol(col_worddistance , (t.worddistance() == 0) ? 0 : 1 + 255 * (t.worddistance() - min.worddistance()) / (1 + max.worddistance() - min.worddistance())); // FIXME: hier gibts ein division by zero, was nur sein kann wenn die Normalisierung nicht geklappt hat.
t.entry.setCol(col_lastModified , (t.lastModified() == 0) ? 0 : 1 + 255 * (t.lastModified() - min.lastModified()) / (1 + max.lastModified() - min.lastModified()));
+ t.entry.setCol(col_urlLength , (t.urllength() == 0) ? 0 : 1 + 255 * (t.urllength() - min.urllength() ) / (1 + max.urllength() - min.urllength()));
+ t.entry.setCol(col_urlComps , (t.urlcomps() == 0) ? 0 : 1 + 255 * (t.urlcomps() - min.urlcomps() ) / (1 + max.urlcomps() - min.urlcomps()));
+ t.entry.setCol(col_wordsInTitle , (t.wordsintitle() == 0) ? 0 : 1 + 255 * (t.wordsintitle() - min.wordsintitle()) / (1 + max.wordsintitle() - min.wordsintitle()));
+
//System.out.println("out = " + t.toPropertyForm(true));
}
diff --git a/source/de/anomic/index/indexRWIEntryOld.java b/source/de/anomic/index/indexRWIEntryOld.java
index 268f9dec2..5e41e1824 100644
--- a/source/de/anomic/index/indexRWIEntryOld.java
+++ b/source/de/anomic/index/indexRWIEntryOld.java
@@ -189,11 +189,11 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
return (int) this.entry.getColLong(col_posofphrase);
}
- public int wordcount() {
+ public int wordsintext() {
return (int) this.entry.getColLong(col_wordcount);
}
- public int phrasecount() {
+ public int phrasesintext() {
return (int) this.entry.getColLong(col_phrasecount);
}
@@ -215,7 +215,7 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext()));
ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/);
ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase()));
- ie1.entry.setCol(col_wordcount, (ie1.wordcount() + ie2.wordcount()) / 2);
+ ie1.entry.setCol(col_wordcount, (ie1.wordsintext() + ie2.wordsintext()) / 2);
return ie1;
}
@@ -229,8 +229,8 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
public static final void min(indexRWIEntryOld t, indexRWIEntry other) {
if (t.hitcount() > other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount());
- if (t.wordcount() > other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount());
- if (t.phrasecount() > other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount());
+ if (t.wordsintext() > other.wordsintext()) t.entry.setCol(col_wordcount, other.wordsintext());
+ if (t.phrasesintext() > other.phrasesintext()) t.entry.setCol(col_phrasecount, other.phrasesintext());
if (t.posintext() > other.posintext()) t.entry.setCol(col_posintext, other.posintext());
if (t.posinphrase() > other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase());
if (t.posofphrase() > other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase());
@@ -241,8 +241,8 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
public static final void max(indexRWIEntryOld t, indexRWIEntry other) {
if (t.hitcount() < other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount());
- if (t.wordcount() < other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount());
- if (t.phrasecount() < other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount());
+ if (t.wordsintext() < other.wordsintext()) t.entry.setCol(col_wordcount, other.wordsintext());
+ if (t.phrasesintext() < other.phrasesintext()) t.entry.setCol(col_phrasecount, other.phrasesintext());
if (t.posintext() < other.posintext()) t.entry.setCol(col_posintext, other.posintext());
if (t.posinphrase() < other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase());
if (t.posofphrase() < other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase());
@@ -269,8 +269,8 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
//System.out.println("min = " + min.toPropertyForm(true));
//System.out.println("max = " + max.toPropertyForm(true));
t.entry.setCol(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount()));
- t.entry.setCol(col_wordcount , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount()));
- t.entry.setCol(col_phrasecount , (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount()));
+ t.entry.setCol(col_wordcount , (t.wordsintext() == 0) ? 0 : 1 + 255 * (t.wordsintext() - min.wordsintext() ) / (1 + max.wordsintext() - min.wordsintext()));
+ t.entry.setCol(col_phrasecount , (t.phrasesintext() == 0) ? 0 : 1 + 255 * (t.phrasesintext() - min.phrasesintext() ) / (1 + max.phrasesintext() - min.phrasesintext()));
t.entry.setCol(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext()));
t.entry.setCol(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase()));
t.entry.setCol(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase()));
@@ -309,4 +309,24 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry {
return false;
}
+ public int llocal() {
+ return 0;
+ }
+
+ public int lother() {
+ return 0;
+ }
+
+ public int urlcomps() {
+ return 0;
+ }
+
+ public int urllength() {
+ return 0;
+ }
+
+ public int wordsintitle() {
+ return 0;
+ }
+
}
diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java
index b37ca9dcd..2e339c49f 100644
--- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java
+++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java
@@ -47,111 +47,189 @@ import java.util.Map;
import java.util.Set;
import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexRWIEntryNew;
import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexURLEntry;
+import de.anomic.kelondro.kelondroBitfield;
public class plasmaSearchRankingProfile {
- // old parameters for ordering
- public static final String ORDER_QUALITY = "Quality";
- public static final String ORDER_DATE = "Date";
- public static final String ORDER_YBR = "YBR";
-
// pre-sort attributes
- public static final String ENTROPY = "entropy";
- public static final String DATE = "date";
- public static final String YBR = "ybr";
- public static final String POSINTEXT = "posintext";
- public static final String WORDDISTANCE = "worddistance";
- public static final String HITCOUNT = "hitcount";
- public static final String DOMLENGTH = "domlength";
+ public static final String DOMLENGTH = "domlength";
+ public static final String YBR = "ybr";
+ public static final String DATE = "date";
+ public static final String WORDSINTITLE = "wordsintitle";
+ public static final String WORDSINTEXT = "wordsintext";
+ public static final String PHRASESINTEXT = "phrasesintext";
+ public static final String LLOCAL = "llocal";
+ public static final String LOTHER = "lother";
+ public static final String URLLENGTH = "urllength";
+ public static final String URLCOMPS = "urlcomps";
+ public static final String HITCOUNT = "hitcount";
+ public static final String POSINTEXT = "posintext";
+ public static final String POSOFPHRASE = "posofphrase";
+ public static final String WORDDISTANCE = "worddistance";
+ public static final String APPURL = "appurl";
+ public static final String APPDESCR = "appdescr";
+ public static final String APPAUTHOR = "appauthor";
+ public static final String APPTAGS = "apptags";
+ public static final String APPREF = "appref";
+ public static final String APPEMPH = "appemph";
+ public static final String CATINDEXOF = "catindexof";
+ public static final String CATHASIMAGE = "cathasimage";
+ public static final String CATHASAUDIO = "cathasaudio";
+ public static final String CATHASVIDEO = "cathasvideo";
+ public static final String CATHASAPP = "cathasapp";
- // post-sort attributes
- public static final String URLLENGTH = "urllength";
- public static final String URLCOMPS = "urlcomps";
- public static final String DESCRLENGTH = "descrlength";
- public static final String DESCRCOMPS = "descrcomps";
-
// post-sort predicates
- public static final String QUERYINURL = "queryinurl";
- public static final String QUERYINDESCR = "queryindescr";
- public static final String URLCOMPINTOPLIST = "urlcompintoplist";
+ public static final String QUERYINURL = "queryinurl";
+ public static final String QUERYINDESCR = "queryindescr";
+ public static final String URLCOMPINTOPLIST = "urlcompintoplist";
public static final String DESCRCOMPINTOPLIST = "descrcompintoplist";
public static final String PREFER = "prefer";
+
+ private int
+ coeff_domlength, coeff_ybr, coeff_date, coeff_wordsintitle, coeff_wordsintext, coeff_phrasesintext,
+ coeff_llocal, coeff_lother, coeff_urllength, coeff_urlcomps, coeff_hitcount,
+ coeff_posintext, coeff_posofphrase, coeff_worddistance,
+ coeff_appurl, coeff_appdescr, coeff_appauthor, coeff_apptags, coeff_appref, coeff_appemph,
+ coeff_catindexof, coeff_cathasimage, coeff_cathasaudio, coeff_cathasvideo, coeff_cathasapp,
+ coeff_queryinurl, coeff_queryindescr, coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer;
- public String[] order;
- private HashMap coeff;
-
- public plasmaSearchRankingProfile() {
- // set some default-values
- this.order = null;
- this.coeff = new HashMap();
- coeff.put(ENTROPY, new Integer(0));
- coeff.put(DATE, new Integer(4));
- coeff.put(YBR, new Integer(8));
- coeff.put(POSINTEXT, new Integer(7));
- coeff.put(WORDDISTANCE, new Integer(6));
- coeff.put(HITCOUNT, new Integer(5));
- coeff.put(DOMLENGTH, new Integer(8));
- coeff.put(URLLENGTH, new Integer(15));
- coeff.put(URLCOMPS, new Integer(15));
- coeff.put(DESCRLENGTH, new Integer(4));
- coeff.put(DESCRCOMPS, new Integer(4));
- coeff.put(QUERYINURL, new Integer(13));
- coeff.put(QUERYINDESCR, new Integer(8));
- coeff.put(URLCOMPINTOPLIST, new Integer(3));
- coeff.put(DESCRCOMPINTOPLIST, new Integer(2));
- coeff.put(PREFER, new Integer(15));
+ public plasmaSearchRankingProfile(String mediatype) {
+ // set default-values
+ if (mediatype == null) mediatype = "text";
+ coeff_domlength = 8;
+ coeff_ybr = 8;
+ coeff_date = 4;
+ coeff_wordsintitle = 4;
+ coeff_wordsintext = 1;
+ coeff_phrasesintext = 1;
+ coeff_llocal = 2;
+ coeff_lother = 3;
+ coeff_urllength = 14;
+ coeff_urlcomps = 14;
+ coeff_hitcount = 5;
+ coeff_posintext = 7;
+ coeff_posofphrase = 6;
+ coeff_worddistance = 15;
+ coeff_appurl = 14;
+ coeff_appdescr = 13;
+ coeff_appauthor = 13;
+ coeff_apptags = 8;
+ coeff_appref = 9;
+ coeff_appemph = 11;
+ coeff_queryinurl = 12;
+ coeff_queryindescr = 8;
+ coeff_urlcompintoplist = 3;
+ coeff_descrcompintoplist = 2;
+ coeff_prefer = 15;
+ coeff_catindexof = (mediatype.equals("text")) ? 0 : 10;
+ coeff_cathasimage = (mediatype.equals("image")) ? 15 : 0;
+ coeff_cathasaudio = (mediatype.equals("audio")) ? 15 : 0;
+ coeff_cathasvideo = (mediatype.equals("video")) ? 15 : 0;
+ coeff_cathasapp = (mediatype.equals("app")) ? 15 : 0;
}
public plasmaSearchRankingProfile(String prefix, String profile) {
- this(); // set defaults
- //parse external form
- String[] elts = profile.substring(1, profile.length() - 1).split(",");
- int p;
- int s = prefix.length();
- String e;
- for (int i = 0; i < elts.length; i++) {
- e = elts[i].trim();
- if ((s == 0) || (e.startsWith(prefix))) {
- coeff.put(e.substring(s, (p = e.indexOf("="))), new Integer(Integer.parseInt(e.substring(p + 1))));
+ this("text"); // set defaults
+ if ((profile != null) && (profile.length() > 0)) {
+ //parse external form
+ HashMap coeff = new HashMap();
+ String[] elts = ((profile.startsWith("{") && (profile.endsWith("}"))) ? profile.substring(1, profile.length() - 1) : profile).split(",");
+ int p;
+ int s = (prefix == null) ? 0 : prefix.length();
+ String e;
+ for (int i = 0; i < elts.length; i++) {
+ e = elts[i].trim();
+ if ((s == 0) || (e.startsWith(prefix))) {
+ coeff.put(e.substring(s, (p = e.indexOf("="))), new Integer(Integer.parseInt(e.substring(p + 1))));
+ }
}
+ coeff_domlength = parseMap(coeff, DOMLENGTH, coeff_domlength);
+ coeff_ybr = parseMap(coeff, YBR, coeff_ybr);
+ coeff_date = parseMap(coeff, DATE, coeff_date);
+ coeff_wordsintitle = parseMap(coeff, WORDSINTITLE, coeff_wordsintitle);
+ coeff_wordsintext = parseMap(coeff, WORDSINTEXT, coeff_wordsintext);
+ coeff_phrasesintext = parseMap(coeff, PHRASESINTEXT, coeff_phrasesintext);
+ coeff_llocal = parseMap(coeff, LLOCAL, coeff_llocal);
+ coeff_lother = parseMap(coeff, LOTHER, coeff_lother);
+ coeff_urllength = parseMap(coeff, URLLENGTH, coeff_urllength);
+ coeff_urlcomps = parseMap(coeff, URLCOMPS, coeff_urlcomps);
+ coeff_hitcount = parseMap(coeff, HITCOUNT, coeff_hitcount);
+ coeff_posintext = parseMap(coeff, POSINTEXT, coeff_posintext);
+ coeff_posofphrase = parseMap(coeff, POSOFPHRASE, coeff_posofphrase);
+ coeff_worddistance = parseMap(coeff, WORDDISTANCE, coeff_worddistance);
+ coeff_appurl = parseMap(coeff, APPURL, coeff_appurl);
+ coeff_appdescr = parseMap(coeff, APPDESCR, coeff_appdescr);
+ coeff_appauthor = parseMap(coeff, APPAUTHOR, coeff_appauthor);
+ coeff_apptags = parseMap(coeff, APPTAGS, coeff_apptags);
+ coeff_appref = parseMap(coeff, APPREF, coeff_appref);
+ coeff_appemph = parseMap(coeff, APPEMPH, coeff_appemph);
+ coeff_catindexof = parseMap(coeff, APPEMPH, coeff_catindexof);
+ coeff_cathasimage = parseMap(coeff, APPEMPH, coeff_cathasimage);
+ coeff_cathasaudio = parseMap(coeff, APPEMPH, coeff_cathasaudio);
+ coeff_cathasvideo = parseMap(coeff, APPEMPH, coeff_cathasvideo);
+ coeff_cathasapp = parseMap(coeff, APPEMPH, coeff_cathasapp);
+ coeff_queryinurl = parseMap(coeff, QUERYINURL, coeff_queryinurl);
+ coeff_queryindescr = parseMap(coeff, QUERYINDESCR, coeff_queryindescr);
+ coeff_urlcompintoplist = parseMap(coeff, URLCOMPINTOPLIST, coeff_urlcompintoplist);
+ coeff_descrcompintoplist = parseMap(coeff, DESCRCOMPINTOPLIST, coeff_descrcompintoplist);
+ coeff_prefer = parseMap(coeff, PREFER, coeff_prefer);
}
}
- public plasmaSearchRankingProfile(String[] order) {
- this(); // set defaults
- this.order = order;
- // overwrite defaults with order attributes
- for (int i = 0; i < 3; i++) {
- if (this.order[i].equals(plasmaSearchRankingProfile.ORDER_QUALITY)) coeff.put(ENTROPY, new Integer((3 * (3 - i))));
- else if (this.order[i].equals(plasmaSearchRankingProfile.ORDER_DATE)) coeff.put(DATE, new Integer((3 * (3 - i))));
- else if (this.order[i].equals(plasmaSearchRankingProfile.ORDER_YBR)) coeff.put(YBR, new Integer((3 * (3 - i))));
+ private static int parseMap(HashMap coeff, String attr, int dflt) {
+ if (coeff.containsKey(attr)) try {
+ return Integer.parseInt((String) coeff.get(attr));
+ } catch (NumberFormatException e) {
+ return dflt;
+ } else {
+ return dflt;
}
}
-
- public String orderString() {
- if (order == null) return "YBR-Date-Quality";
- return order[0] + "-" + order[1] + "-" + order[2];
- }
public String toExternalString() {
- return coeff.toString();
+ return toExternalMap("").toString();
}
public Map toExternalMap(String prefix) {
- Iterator i = this.coeff.entrySet().iterator();
- Map.Entry entry;
Map ext = new HashMap();
- while (i.hasNext()) {
- entry = (Map.Entry) i.next();
- ext.put(prefix + (String) entry.getKey(), entry.getValue());
- }
+ ext.put(prefix + DOMLENGTH, Integer.toString(coeff_domlength));
+ ext.put(prefix + YBR, Integer.toString(coeff_ybr));
+ ext.put(prefix + DATE, Integer.toString(coeff_date));
+ ext.put(prefix + WORDSINTITLE, Integer.toString(coeff_wordsintitle));
+ ext.put(prefix + WORDSINTEXT, Integer.toString(coeff_wordsintext));
+ ext.put(prefix + PHRASESINTEXT, Integer.toString(coeff_phrasesintext));
+ ext.put(prefix + LLOCAL, Integer.toString(coeff_llocal));
+ ext.put(prefix + LOTHER, Integer.toString(coeff_lother));
+ ext.put(prefix + URLLENGTH, Integer.toString(coeff_urllength));
+ ext.put(prefix + URLCOMPS, Integer.toString(coeff_urlcomps));
+ ext.put(prefix + HITCOUNT, Integer.toString(coeff_hitcount));
+ ext.put(prefix + POSINTEXT, Integer.toString(coeff_posintext));
+ ext.put(prefix + POSOFPHRASE, Integer.toString(coeff_posofphrase));
+ ext.put(prefix + WORDDISTANCE, Integer.toString(coeff_worddistance));
+ ext.put(prefix + APPURL, Integer.toString(coeff_appurl));
+ ext.put(prefix + APPDESCR, Integer.toString(coeff_appdescr));
+ ext.put(prefix + APPAUTHOR, Integer.toString(coeff_appauthor));
+ ext.put(prefix + APPTAGS, Integer.toString(coeff_apptags));
+ ext.put(prefix + APPREF, Integer.toString(coeff_appref));
+ ext.put(prefix + APPEMPH, Integer.toString(coeff_appemph));
+ ext.put(prefix + CATINDEXOF, Integer.toString(coeff_catindexof));
+ ext.put(prefix + CATHASIMAGE, Integer.toString(coeff_cathasimage));
+ ext.put(prefix + CATHASAUDIO, Integer.toString(coeff_cathasaudio));
+ ext.put(prefix + CATHASVIDEO, Integer.toString(coeff_cathasvideo));
+ ext.put(prefix + CATHASAPP, Integer.toString(coeff_cathasapp));
+ ext.put(prefix + QUERYINURL, Integer.toString(coeff_queryinurl));
+ ext.put(prefix + QUERYINDESCR, Integer.toString(coeff_queryindescr));
+ ext.put(prefix + URLCOMPINTOPLIST, Integer.toString(coeff_urlcompintoplist));
+ ext.put(prefix + DESCRCOMPINTOPLIST, Integer.toString(coeff_descrcompintoplist));
+ ext.put(prefix + PREFER, Integer.toString(coeff_prefer));
return ext;
}
public String toExternalURLGet(String prefix) {
- Iterator i = this.coeff.entrySet().iterator();
+ Iterator i = toExternalMap("").entrySet().iterator();
Map.Entry entry;
StringBuffer ext = new StringBuffer();
while (i.hasNext()) {
@@ -168,15 +246,37 @@ public class plasmaSearchRankingProfile {
public long preRanking(indexRWIEntry normalizedEntry, String searchedWord) {
// the normalizedEntry must be a normalized indexEntry
long ranking = 0;
- ranking += normalizedEntry.quality() << ((Integer) coeff.get(ENTROPY)).intValue();
- ranking += normalizedEntry.virtualAge() << ((Integer) coeff.get(DATE)).intValue();
- ranking += plasmaSearchPreOrder.ybr_p(normalizedEntry.urlHash()) << ((Integer) coeff.get(YBR)).intValue();
- ranking += (normalizedEntry.posintext() == 0) ? 0 : (256 - normalizedEntry.posintext()) << ((Integer) coeff.get(POSINTEXT)).intValue();
- ranking += (normalizedEntry.worddistance() == 0) ? 0 : (256 - normalizedEntry.worddistance()) << ((Integer) coeff.get(WORDDISTANCE)).intValue();
- ranking += (normalizedEntry.hitcount() == 0) ? 0 : normalizedEntry.hitcount() << ((Integer) coeff.get(HITCOUNT)).intValue();
- ranking += (256 - plasmaURL.domLengthNormalized(normalizedEntry.urlHash())) << ((Integer) coeff.get(DOMLENGTH)).intValue();
- ranking += (plasmaURL.probablyRootURL(normalizedEntry.urlHash())) ? 16 << ((Integer) coeff.get(URLLENGTH)).intValue() : 0;
- ranking += (plasmaURL.probablyWordURL(normalizedEntry.urlHash(), searchedWord) != null) ? 256 << ((Integer) coeff.get(QUERYINURL)).intValue() : 0;
+ ranking += (256 - plasmaURL.domLengthNormalized(normalizedEntry.urlHash())) << coeff_domlength;
+ ranking += plasmaSearchPreOrder.ybr_p(normalizedEntry.urlHash()) << coeff_ybr;
+ ranking += normalizedEntry.virtualAge() << coeff_date;
+ ranking += normalizedEntry.wordsintitle() << coeff_wordsintitle;
+ ranking += normalizedEntry.wordsintext() << coeff_wordsintext;
+ ranking += normalizedEntry.phrasesintext() << coeff_phrasesintext;
+ ranking += normalizedEntry.llocal() << coeff_llocal;
+ ranking += normalizedEntry.lother() << coeff_lother;
+ ranking += (normalizedEntry.urllength() == 0) ? 0 : (256 - normalizedEntry.urllength()) << coeff_urllength;
+ ranking += (normalizedEntry.urlcomps() == 0) ? 0 : (256 - normalizedEntry.urlcomps()) << coeff_urlcomps;
+ ranking += (normalizedEntry.hitcount() == 0) ? 0 : normalizedEntry.hitcount() << coeff_hitcount;
+ ranking += (normalizedEntry.posintext() == 0) ? 0 : (256 - normalizedEntry.posintext()) << coeff_posintext;
+ ranking += (normalizedEntry.posofphrase() == 0) ? 0 : (256 - normalizedEntry.hitcount()) << coeff_posofphrase;
+ ranking += (normalizedEntry.worddistance() == 0) ? 0 : (256 - normalizedEntry.worddistance()) << coeff_worddistance;
+
+ kelondroBitfield flags = normalizedEntry.flags();
+ ranking += (flags.get(indexRWIEntryNew.flag_app_url)) ? 256 << coeff_appurl : 0;
+ ranking += (flags.get(indexRWIEntryNew.flag_app_descr)) ? 256 << coeff_appdescr : 0;
+ ranking += (flags.get(indexRWIEntryNew.flag_app_author)) ? 256 << coeff_appauthor : 0;
+ ranking += (flags.get(indexRWIEntryNew.flag_app_tags)) ? 256 << coeff_apptags : 0;
+ ranking += (flags.get(indexRWIEntryNew.flag_app_reference)) ? 256 << coeff_appref : 0;
+ ranking += (flags.get(indexRWIEntryNew.flag_app_emphasized)) ? 256 << coeff_appemph : 0;
+ ranking += (flags.get(plasmaCondenser.flag_cat_indexof)) ? 256 << coeff_catindexof : 0;
+ ranking += (flags.get(plasmaCondenser.flag_cat_hasimage)) ? 256 << coeff_cathasimage : 0;
+ ranking += (flags.get(plasmaCondenser.flag_cat_hasaudio)) ? 256 << coeff_cathasaudio : 0;
+ ranking += (flags.get(plasmaCondenser.flag_cat_hasvideo)) ? 256 << coeff_cathasvideo : 0;
+ ranking += (flags.get(plasmaCondenser.flag_cat_hasapp)) ? 256 << coeff_cathasapp : 0;
+
+ ranking += (plasmaURL.probablyRootURL(normalizedEntry.urlHash())) ? 16 << coeff_urllength : 0;
+ ranking += (plasmaURL.probablyWordURL(normalizedEntry.urlHash(), searchedWord) != null) ? 256 << coeff_queryinurl : 0;
+
/*
if (indexURL.probablyWordURL(normalizedEntry.urlHash(), searchedWord))
System.out.println("DEBUG - hash " + normalizedEntry.urlHash() + " contains word " + searchedWord + ", weighted " + ((Integer) coeff.get(QUERYINURL)).intValue() + ", ranking = " + ranking);
@@ -199,15 +299,15 @@ public class plasmaSearchRankingProfile {
// prefer hit with 'prefer' pattern
indexURLEntry.Components comp = page.comp();
- if (comp.url().toNormalform().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue();
- if (comp.descr().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue();
+ if (comp.url().toNormalform().matches(query.prefer)) ranking += 256 << coeff_prefer;
+ if (comp.descr().matches(query.prefer)) ranking += 256 << coeff_prefer;
// apply 'common-sense' heuristic using references
for (int j = 0; j < urlcomps.length; j++) {
- if (topwords.contains(urlcomps[j])) ranking += 256 << ((Integer) coeff.get(URLCOMPINTOPLIST)).intValue();
+ if (topwords.contains(urlcomps[j])) ranking += 256 << coeff_urlcompintoplist;
}
for (int j = 0; j < descrcomps.length; j++) {
- if (topwords.contains(descrcomps[j])) ranking += 256 << ((Integer) coeff.get(DESCRCOMPINTOPLIST)).intValue();
+ if (topwords.contains(descrcomps[j])) ranking += 256 << coeff_descrcompintoplist;
}
// apply query-in-result matching
@@ -217,18 +317,10 @@ public class plasmaSearchRankingProfile {
String queryhash;
while (shi.hasNext()) {
queryhash = (String) shi.next();
- if (urlcomph.contains(queryhash)) ranking += 256 << ((Integer) coeff.get(QUERYINURL)).intValue();
- if (descrcomph.contains(queryhash)) ranking += 256 << ((Integer) coeff.get(QUERYINDESCR)).intValue();
+ if (urlcomph.contains(queryhash)) ranking += 256 << coeff_queryinurl;
+ if (descrcomph.contains(queryhash)) ranking += 256 << coeff_queryindescr;
}
- // prefer short urls
- ranking += (256 - comp.url().toNormalform().length()) << ((Integer) coeff.get(URLLENGTH)).intValue();
- ranking += (8 * Math.max(0, 32 - urlcomps.length)) << ((Integer) coeff.get(URLCOMPS)).intValue();
-
- // prefer long descriptions
- ranking += (256 * comp.url().toNormalform().length() / 80) << ((Integer) coeff.get(DESCRLENGTH)).intValue();
- ranking += (256 * (12 - Math.abs(12 - Math.min(12, descrcomps.length))) / 12) << ((Integer) coeff.get(DESCRCOMPS)).intValue();
-
return ranking;
}
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index e5c319d63..00df6509d 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -61,7 +61,6 @@ import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.plasma.plasmaURL;
-import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.net.URL;
import de.anomic.plasma.cache.IResourceInfo;
@@ -587,19 +586,20 @@ public class plasmaSnippetCache {
}
}
- public ArrayList retrieveMediaSnippets(URL url, Set queryhashes, boolean fetchOnline, int timeout) {
+ public ArrayList retrieveMediaSnippets(URL url, Set queryhashes, String mediatype, boolean fetchOnline, int timeout) {
if (queryhashes.size() == 0) {
serverLog.logFine("snippet fetch", "no query hashes given for url " + url);
return new ArrayList();
}
-
+ if (mediatype == null) mediatype = "";
+
plasmaParserDocument document = retrieveDocument(url, fetchOnline, timeout, false);
ArrayList a = new ArrayList();
if (document != null) {
- a.addAll(computeMediaSnippets(document, queryhashes, "audio"));
- a.addAll(computeMediaSnippets(document, queryhashes, "video"));
- a.addAll(computeMediaSnippets(document, queryhashes, "app"));
- a.addAll(computeImageSnippets(document, queryhashes));
+ if ((mediatype.length() == 0) || (mediatype.equals("audio"))) a.addAll(computeMediaSnippets(document, queryhashes, "audio"));
+ if ((mediatype.length() == 0) || (mediatype.equals("video"))) a.addAll(computeMediaSnippets(document, queryhashes, "video"));
+ if ((mediatype.length() == 0) || (mediatype.equals("app" ))) a.addAll(computeMediaSnippets(document, queryhashes, "app"));
+ if ((mediatype.length() == 0) || (mediatype.equals("image"))) a.addAll(computeImageSnippets(document, queryhashes));
}
return a;
}
@@ -838,7 +838,7 @@ public class plasmaSnippetCache {
return result;
}
-
+ /*
public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount, long maxTime) {
// fetch snippets
int i = 0;
@@ -879,5 +879,5 @@ public class plasmaSnippetCache {
log.logFine("snippetFetcher: got URL " + url + ", the snippet is '" + snippet.line + "', source=" + snippet.source);
}
}
-
+ */
}
\ No newline at end of file
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 0104fe786..88090e566 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -2160,8 +2160,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// suppress line: there is no match in that resource
} else {*/
prop.put("type_results_" + i + "_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", urlstring) == null) ? 1 : 0);
- prop.put("type_results_" + i + "_recommend_deletelink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + ranking.orderString() + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
- prop.put("type_results_" + i + "_recommend_recommendlink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + ranking.orderString() + "&resource=local&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*");
+ prop.put("type_results_" + i + "_recommend_deletelink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + crypt.simpleEncode(ranking.toExternalString()) + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
+ prop.put("type_results_" + i + "_recommend_recommendlink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + crypt.simpleEncode(ranking.toExternalString()) + "&resource=local&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*");
prop.put("type_results_" + i + "_description", comp.descr());
prop.put("type_results_" + i + "_url", urlstring);
prop.put("type_results_" + i + "_urlhash", urlhash);
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index 8378022ef..06880ea83 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -432,9 +432,9 @@ public final class yacyClient {
obj.put("filter", filter);
obj.put("ttl", "0");
obj.put("duetime", Long.toString(duetime));
- obj.put("profile", timingProfile.targetToString()); // new duetimes splitted by specific search tasks
+ obj.put("timing", crypt.simpleEncode(timingProfile.targetToString())); // new duetimes splitted by specific search tasks
obj.put("maxdist", maxDistance);
- obj.put("rankingProfile", rankingProfile.toExternalString());
+ obj.put("profile", crypt.simpleEncode(rankingProfile.toExternalString()));
obj.put("constraint", constraint.exportB64());
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
if (abstractCache != null) obj.put("abstracts", "auto");
|