diff --git a/build.properties b/build.properties index dc8f26d86..84b581f57 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.494 +releaseVersion=0.495 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} diff --git a/htroot/DetailedSearch.html b/htroot/DetailedSearch.html index fcee802ea..a72e607a3 100644 --- a/htroot/DetailedSearch.html +++ b/htroot/DetailedSearch.html @@ -47,10 +47,6 @@
Local Pre-Ranking
-
Entropy
-
- -
Date
diff --git a/htroot/DetailedSearch.java b/htroot/DetailedSearch.java index 5aecd0e12..f80e4e8d1 100644 --- a/htroot/DetailedSearch.java +++ b/htroot/DetailedSearch.java @@ -84,7 +84,7 @@ public class DetailedSearch { prop.put("results", ""); prop.put("urlmaskoptions", 0); prop.put("urlmaskoptions_urlmaskfilter", ".*"); - String defaultRankingProfile = new plasmaSearchRankingProfile().toExternalString(); + String defaultRankingProfile = new plasmaSearchRankingProfile("text").toExternalString(); prop.putAll(new plasmaSearchRankingProfile("", defaultRankingProfile).toExternalMap("local")); return prop; } diff --git a/htroot/index.html b/htroot/index.html index 6d2c95624..77288e811 100644 --- a/htroot/index.html +++ b/htroot/index.html @@ -29,7 +29,6 @@ Applications       #(searchoptions)# - @@ -52,19 +51,6 @@ - - order by: - - - - Resource: diff --git a/htroot/index.java b/htroot/index.java index 0bc75fb1e..774620fe7 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -34,7 +34,6 @@ import java.util.HashMap; import de.anomic.http.httpHeader; import de.anomic.net.URL; -import de.anomic.plasma.plasmaSearchPreOrder; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; @@ -106,12 +105,6 @@ public class index { prop.put("searchoptions_count-50", (count == 50) ? 1 : 0); prop.put("searchoptions_count-100", (count == 100) ? 1 : 0); prop.put("searchoptions_count-1000", (count == 1000) ? 1 : 0); - prop.put("searchoptions_order-ybr-date-quality", plasmaSearchPreOrder.canUseYBR() ? 1 : 0); - prop.put("searchoptions_order-ybr-quality-date", 0); - prop.put("searchoptions_order-date-ybr-quality", 0); - prop.put("searchoptions_order-quality-ybr-date", 0); - prop.put("searchoptions_order-date-quality-ybr", plasmaSearchPreOrder.canUseYBR() ? 0 : 1); - prop.put("searchoptions_order-quality-date-ybr", 0); prop.put("searchoptions_resource-global", ((global) ? 1 : 0)); prop.put("searchoptions_resource-local", ((global) ? 0 : 1)); prop.put("searchoptions_time-1", (time == 1) ? 1 : 0); diff --git a/htroot/xml/snippet.java b/htroot/xml/snippet.java index b1bfbcacb..410226c7f 100644 --- a/htroot/xml/snippet.java +++ b/htroot/xml/snippet.java @@ -70,7 +70,7 @@ public class snippet { prop.put("links", 0); } else { // attach media information - ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, true, 1000); + ArrayList mediaSnippets = switchboard.snippetCache.retrieveMediaSnippets(url, queryHashes, media, true, 1000); plasmaSnippetCache.MediaSnippet ms; for (int i = 0; i < mediaSnippets.size(); i++) { ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i); @@ -79,7 +79,7 @@ public class snippet { prop.put("link_" + i + "_name", ms.name); prop.put("link_" + i + "_attr", ms.attr); } - System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString); + //System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString); prop.put("text", ""); prop.put("link", mediaSnippets.size()); prop.put("links", mediaSnippets.size()); diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 2eff2161f..c11ec4477 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -70,6 +70,7 @@ import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacySeed; +import de.anomic.tools.crypt; public final class search { @@ -95,6 +96,8 @@ public final class search { final String prefer = post.get("prefer", ""); final String contentdom = post.get("contentdom", "text"); final String filter = post.get("filter", ".*"); + String profile = post.get("profile", ""); // remote profile hand-over + if (profile.length() > 0) profile = crypt.simpleDecode(profile, null); final boolean includesnippet = post.get("includesnippet", "false").equals("true"); final kelondroBitfield constraint = new kelondroBitfield(4, post.get("constraint", "______")); // final boolean global = ((String) post.get("resource", "global")).equals("global"); // if true, then result may consist of answers from other peers @@ -140,7 +143,7 @@ public final class search { yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + squery.anonymizedQueryHashes() + " - " + squery.wantedResults + " links"); // prepare a search profile - plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchRankingProfile.ORDER_YBR, plasmaSearchRankingProfile.ORDER_DATE, plasmaSearchRankingProfile.ORDER_QUALITY}); + plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile); plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults); plasmaSearchTimingProfile remoteTiming = null; @@ -167,7 +170,7 @@ public final class search { yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + squery.anonymizedQueryHashes() + " - " + squery.wantedResults + " links"); // prepare a search profile - plasmaSearchRankingProfile rankingProfile = new plasmaSearchRankingProfile(new String[]{plasmaSearchRankingProfile.ORDER_YBR, plasmaSearchRankingProfile.ORDER_DATE, plasmaSearchRankingProfile.ORDER_QUALITY}); + plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(contentdom) : new plasmaSearchRankingProfile("", profile); plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults); plasmaSearchTimingProfile remoteTiming = null; diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 8dd045f48..40e24e379 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -170,122 +170,105 @@ public class yacysearch { if (!indexDistributeGranted || !indexReceiveGranted) { global = false; } // find search domain - int contentdom = plasmaSearchQuery.CONTENTDOM_TEXT; - String cds = post.get("contentdom", "text"); - if (cds.equals("text")) contentdom = plasmaSearchQuery.CONTENTDOM_TEXT; - if (cds.equals("audio")) contentdom = plasmaSearchQuery.CONTENTDOM_AUDIO; - if (cds.equals("video")) contentdom = plasmaSearchQuery.CONTENTDOM_VIDEO; - if (cds.equals("image")) contentdom = plasmaSearchQuery.CONTENTDOM_IMAGE; - if (cds.equals("app")) contentdom = plasmaSearchQuery.CONTENTDOM_APP; + int contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT; + String contentdomString = post.get("contentdom", "text"); + if (contentdomString.equals("text")) contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT; + if (contentdomString.equals("audio")) contentdomCode = plasmaSearchQuery.CONTENTDOM_AUDIO; + if (contentdomString.equals("video")) contentdomCode = plasmaSearchQuery.CONTENTDOM_VIDEO; + if (contentdomString.equals("image")) contentdomCode = plasmaSearchQuery.CONTENTDOM_IMAGE; + if (contentdomString.equals("app")) contentdomCode = plasmaSearchQuery.CONTENTDOM_APP; // patch until better search profiles are available - if ((contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 30; + if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 10)) count = 30; serverObjects prop = new serverObjects(); if (post.get("cat", "href").equals("href")) { - final TreeSet query = plasmaSearchQuery.cleanQuery(querystring); - // filter out stopwords - final TreeSet filtered = kelondroMSetTools.joinConstructive(query, - plasmaSwitchboard.stopwords); - if (filtered.size() > 0) { - kelondroMSetTools.excludeDestructive(query, plasmaSwitchboard.stopwords); - } + final TreeSet query = plasmaSearchQuery.cleanQuery(querystring); + // filter out stopwords + final TreeSet filtered = kelondroMSetTools.joinConstructive(query, plasmaSwitchboard.stopwords); + if (filtered.size() > 0) { + kelondroMSetTools.excludeDestructive(query, plasmaSwitchboard.stopwords); + } - // if a minus-button was hit, remove a special reference first - if (post.containsKey("deleteref")) { - if (!sb.verifyAuthentication(header, true)) { - prop.put("AUTHENTICATE", "admin log-in"); // force log-in - return prop; - } - - // delete the index entry locally - final String delHash = post.get("deleteref", ""); // urlhash - sb.wordIndex.removeReferences(query, delHash); - - // make new news message with negative voting - HashMap map = new HashMap(); - map.put("urlhash", delHash); - map.put("vote", "negative"); - map.put("refid", ""); - yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippavt", map)); + // if a minus-button was hit, remove a special reference first + if (post.containsKey("deleteref")) { + if (!sb.verifyAuthentication(header, true)) { + prop.put("AUTHENTICATE", "admin log-in"); // force log-in + return prop; } + + // delete the index entry locally + final String delHash = post.get("deleteref", ""); // urlhash + sb.wordIndex.removeReferences(query, delHash); + + // make new news message with negative voting + HashMap map = new HashMap(); + map.put("urlhash", delHash); + map.put("vote", "negative"); + map.put("refid", ""); + yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippavt", map)); + } - // if aplus-button was hit, create new voting message - if (post.containsKey("recommendref")) { - if (!sb.verifyAuthentication(header, true)) { - prop.put("AUTHENTICATE", "admin log-in"); // force log-in - return prop; - } - final String recommendHash = post.get("recommendref", ""); // urlhash - indexURLEntry urlentry = sb.wordIndex.loadedURL.load(recommendHash, null); - if (urlentry != null) { - indexURLEntry.Components comp = urlentry.comp(); - plasmaParserDocument document; - document = sb.snippetCache.retrieveDocument(comp.url(), true, 5000, true); - if (document != null) { - // create a news message - HashMap map = new HashMap(); - map.put("url", comp.url().toNormalform().replace(',', '|')); - map.put("title", comp.descr().replace(',', ' ')); - map.put("description", ((document == null) ? comp.descr() : document.getMainLongTitle()).replace(',', ' ')); - map.put("tags", ((document == null) ? "" : document.getKeywords(' '))); - yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map)); - document.close(); - } + // if aplus-button was hit, create new voting message + if (post.containsKey("recommendref")) { + if (!sb.verifyAuthentication(header, true)) { + prop.put("AUTHENTICATE", "admin log-in"); // force log-in + return prop; + } + final String recommendHash = post.get("recommendref", ""); // urlhash + indexURLEntry urlentry = sb.wordIndex.loadedURL.load(recommendHash, null); + if (urlentry != null) { + indexURLEntry.Components comp = urlentry.comp(); + plasmaParserDocument document; + document = sb.snippetCache.retrieveDocument(comp.url(), true, 5000, true); + if (document != null) { + // create a news message + HashMap map = new HashMap(); + map.put("url", comp.url().toNormalform().replace(',', '|')); + map.put("title", comp.descr().replace(',', ' ')); + map.put("description", ((document == null) ? comp.descr() : document.getMainLongTitle()).replace(',', ' ')); + map.put("tags", ((document == null) ? "" : document.getKeywords(' '))); + yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map)); + document.close(); } } + } - // prepare search order - final boolean yacyonline = ((yacyCore.seedDB != null) && (yacyCore.seedDB.mySeed != null) && (yacyCore.seedDB.mySeed.getAddress() != null)); - - String order1 = plasmaSearchRankingProfile.ORDER_DATE; - String order2 = plasmaSearchRankingProfile.ORDER_YBR; - String order3 = plasmaSearchRankingProfile.ORDER_QUALITY; - if (order.startsWith("YBR")) order1 = plasmaSearchRankingProfile.ORDER_YBR; - if (order.startsWith("Date")) order1 = plasmaSearchRankingProfile.ORDER_DATE; - if (order.startsWith("Quality")) order1 = plasmaSearchRankingProfile.ORDER_QUALITY; - if (order.indexOf("-YBR-") > 0) order2 = plasmaSearchRankingProfile.ORDER_YBR; - if (order.indexOf("-Date-") > 0) order2 = plasmaSearchRankingProfile.ORDER_DATE; - if (order.indexOf("-Quality-") > 0) order2 = plasmaSearchRankingProfile.ORDER_QUALITY; - if (order.endsWith("YBR")) order3 = plasmaSearchRankingProfile.ORDER_YBR; - if (order.endsWith("Date")) order3 = plasmaSearchRankingProfile.ORDER_DATE; - if (order.endsWith("Quality")) order3 = plasmaSearchRankingProfile.ORDER_QUALITY; - - // do the search - plasmaSearchQuery thisSearch = new plasmaSearchQuery( + // prepare search properties + final boolean yacyonline = ((yacyCore.seedDB != null) && (yacyCore.seedDB.mySeed != null) && (yacyCore.seedDB.mySeed.getAddress() != null)); + final boolean samesearch = env.getConfig("last-search", "").equals(querystring + contentdomString); + final boolean globalsearch = (global) && (yacyonline) && (!samesearch); + + // do the search + plasmaSearchQuery thisSearch = new plasmaSearchQuery( query, maxDistance, prefermask, - contentdom, + contentdomCode, count, searchtime, urlmask, - ((global) && (yacyonline) && (!(env.getConfig( - "last-search", "").equals(querystring)))) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT - : plasmaSearchQuery.SEARCHDOM_LOCAL, "", 20, constraint); - plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile( new String[] { order1, order2, order3 }); - plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults); - plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults); - prop = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true); - - /* - * final serverObjects prop = sb.searchFromLocal(query, order1, - * order2, count, ((global) && (yacyonline) && - * (!(env.getConfig("last-search","").equals(querystring)))), - * searchtime, urlmask); - */ - // remember the last search expression - env.setConfig("last-search", querystring); - - // process result of search - prop.put("type_resultbottomline", 0); - if (filtered.size() > 0) { - prop.put("excluded", 1); - prop.put("excluded_stopwords", filtered.toString()); - } else { - prop.put("excluded", 0); - } + (globalsearch) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT : plasmaSearchQuery.SEARCHDOM_LOCAL, + "", + 20, + constraint); + plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile(contentdomString); + plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults); + plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults); + prop = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true); + + // remember the last search expression + env.setConfig("last-search", querystring + contentdomString); + + // process result of search + prop.put("type_resultbottomline", 0); + if (filtered.size() > 0) { + prop.put("excluded", 1); + prop.put("excluded_stopwords", filtered.toString()); + } else { + prop.put("excluded", 0); + } if (prop == null || prop.size() == 0) { if (post.get("search", "").length() < 3) { @@ -364,7 +347,7 @@ public class yacysearch { } prop.put("type", (thisSearch.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 0 : ((thisSearch.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 2 : 1)); - if (prop.getInt("type", 0) == 1) prop.put("type_mediatype", cds); + if (prop.getInt("type", 0) == 1) prop.put("type_mediatype", contentdomString); prop.put("cat", "href"); prop.put("depth", "0"); @@ -418,12 +401,12 @@ public class yacysearch { prop.put("display", display); prop.put("indexof", (indexof) ? "on" : "off"); prop.put("constraint", constraint.exportB64()); - prop.put("contentdom", cds); - prop.put("contentdomCheckText", (contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0); - prop.put("contentdomCheckAudio", (contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0); - prop.put("contentdomCheckVideo", (contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0); - prop.put("contentdomCheckImage", (contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0); - prop.put("contentdomCheckApp", (contentdom == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0); + prop.put("contentdom", contentdomString); + prop.put("contentdomCheckText", (contentdomCode == plasmaSearchQuery.CONTENTDOM_TEXT) ? 1 : 0); + prop.put("contentdomCheckAudio", (contentdomCode == plasmaSearchQuery.CONTENTDOM_AUDIO) ? 1 : 0); + prop.put("contentdomCheckVideo", (contentdomCode == plasmaSearchQuery.CONTENTDOM_VIDEO) ? 1 : 0); + prop.put("contentdomCheckImage", (contentdomCode == plasmaSearchQuery.CONTENTDOM_IMAGE) ? 1 : 0); + prop.put("contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? 1 : 0); // return rewrite properties return prop; diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index a26ae69b9..989fa7df4 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -552,8 +552,8 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http String mimeType = mimeTable.getProperty(targetExt, "text/html"); // generate an byte array from the generated image - int width = i.getWidth(null); - int height = i.getHeight(null); + int width = i.getWidth(null); if (width < 0) width = 96; // bad hack + int height = i.getHeight(null); if (height < 0) height = 96; // bad hack BufferedImage bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); bi.createGraphics().drawImage(i, 0, 0, width, height, null); serverByteBuffer baos = new serverByteBuffer(); diff --git a/source/de/anomic/index/indexRWIEntry.java b/source/de/anomic/index/indexRWIEntry.java index 8c39fb835..003b0379c 100644 --- a/source/de/anomic/index/indexRWIEntry.java +++ b/source/de/anomic/index/indexRWIEntry.java @@ -43,11 +43,16 @@ public interface indexRWIEntry { public int posintext(); public int posinphrase(); public int posofphrase(); - public int wordcount(); - public int phrasecount(); + public int wordsintext(); + public int phrasesintext(); public String getLanguage(); public char getType(); public kelondroBitfield flags(); + public int wordsintitle(); + public int llocal(); + public int lother(); + public int urllength(); + public int urlcomps(); public void combineDistance(indexRWIEntry oe); public int worddistance(); diff --git a/source/de/anomic/index/indexRWIEntryNew.java b/source/de/anomic/index/indexRWIEntryNew.java index 5894b242c..17c6c633f 100644 --- a/source/de/anomic/index/indexRWIEntryNew.java +++ b/source/de/anomic/index/indexRWIEntryNew.java @@ -160,8 +160,8 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry { this.entry.setCol(col_lastModified, mddlm); this.entry.setCol(col_freshUntil, 0); this.entry.setCol(col_wordsInTitle, 20); // guessed - this.entry.setCol(col_wordsInText, oldEntry.wordcount()); - this.entry.setCol(col_phrasesInText, oldEntry.phrasecount()); + this.entry.setCol(col_wordsInText, oldEntry.wordsintext()); + this.entry.setCol(col_phrasesInText, oldEntry.phrasesintext()); this.entry.setCol(col_doctype, new byte[]{(byte) oldEntry.doctype()}); this.entry.setCol(col_language, (oldEntry.getLanguage() == null) ? "en" : oldEntry.getLanguage(), null); this.entry.setCol(col_llocal, 0); @@ -231,6 +231,10 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry { public long lastModified() { return plasmaWordIndex.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified)); } + + public long freshUntil() { + return plasmaWordIndex.reverseMicroDateDays((int) this.entry.getColLong(col_freshUntil)); + } public int hitcount() { return (int) this.entry.getColLong(col_hitcount); @@ -248,11 +252,11 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry { return (int) this.entry.getColLong(col_posofphrase); } - public int wordcount() { + public int wordsintext() { return (int) this.entry.getColLong(col_wordsInText); } - public int phrasecount() { + public int phrasesintext() { return (int) this.entry.getColLong(col_phrasesInText); } @@ -264,6 +268,26 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry { return (char) this.entry.getColByte(col_doctype); } + public int wordsintitle() { + return (int) this.entry.getColLong(col_wordsInTitle); + } + + public int llocal() { + return (int) this.entry.getColLong(col_llocal); + } + + public int lother() { + return (int) this.entry.getColLong(col_lother); + } + + public int urllength() { + return (int) this.entry.getColLong(col_urlLength); + } + + public int urlcomps() { + return (int) this.entry.getColLong(col_urlComps); + } + public kelondroBitfield flags() { return new kelondroBitfield(this.entry.getColBytes(col_flags)); } @@ -278,7 +302,7 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry { ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext())); ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/); ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase())); - ie1.entry.setCol(col_wordsInText, (ie1.wordcount() + ie2.wordcount()) / 2); + ie1.entry.setCol(col_wordsInText, (ie1.wordsintext() + ie2.wordsintext()) / 2); return ie1; } @@ -292,24 +316,30 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry { public static final void min(indexRWIEntryNew t, indexRWIEntry other) { if (t.hitcount() > other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount()); - if (t.wordcount() > other.wordcount()) t.entry.setCol(col_wordsInText, other.wordcount()); - if (t.phrasecount() > other.phrasecount()) t.entry.setCol(col_phrasesInText, other.phrasecount()); + if (t.wordsintext() > other.wordsintext()) t.entry.setCol(col_wordsInText, other.wordsintext()); + if (t.phrasesintext() > other.phrasesintext()) t.entry.setCol(col_phrasesInText, other.phrasesintext()); if (t.posintext() > other.posintext()) t.entry.setCol(col_posintext, other.posintext()); if (t.posinphrase() > other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase()); if (t.posofphrase() > other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase()); if (t.worddistance() > other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance()); if (t.lastModified() > other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified()); + if (t.urllength() > other.urllength()) t.entry.setCol(col_urlLength, other.urllength()); + if (t.urlcomps() > other.urlcomps()) t.entry.setCol(col_urlComps, other.urlcomps()); + if (t.wordsintitle() > other.wordsintitle() ) t.entry.setCol(col_wordsInTitle, other.wordsintitle()); } public static final void max(indexRWIEntryNew t, indexRWIEntry other) { if (t.hitcount() < other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount()); - if (t.wordcount() < other.wordcount()) t.entry.setCol(col_wordsInText, other.wordcount()); - if (t.phrasecount() < other.phrasecount()) t.entry.setCol(col_phrasesInText, other.phrasecount()); + if (t.wordsintext() < other.wordsintext()) t.entry.setCol(col_wordsInText, other.wordsintext()); + if (t.phrasesintext() < other.phrasesintext()) t.entry.setCol(col_phrasesInText, other.phrasesintext()); if (t.posintext() < other.posintext()) t.entry.setCol(col_posintext, other.posintext()); if (t.posinphrase() < other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase()); if (t.posofphrase() < other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase()); if (t.worddistance() < other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance()); if (t.lastModified() < other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified()); + if (t.urllength() < other.urllength()) t.entry.setCol(col_urlLength, other.urllength()); + if (t.urlcomps() < other.urlcomps()) t.entry.setCol(col_urlComps, other.urlcomps()); + if (t.wordsintitle() < other.wordsintitle() ) t.entry.setCol(col_wordsInTitle, other.wordsintitle()); } @@ -330,13 +360,17 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry { //System.out.println("min = " + min.toPropertyForm(true)); //System.out.println("max = " + max.toPropertyForm(true)); t.entry.setCol(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount())); - t.entry.setCol(col_wordsInText , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount())); - t.entry.setCol(col_phrasesInText, (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount())); + t.entry.setCol(col_wordsInText , (t.wordsintext() == 0) ? 0 : 1 + 255 * (t.wordsintext() - min.wordsintext() ) / (1 + max.wordsintext() - min.wordsintext())); + t.entry.setCol(col_phrasesInText, (t.phrasesintext() == 0) ? 0 : 1 + 255 * (t.phrasesintext() - min.phrasesintext() ) / (1 + max.phrasesintext() - min.phrasesintext())); t.entry.setCol(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext())); t.entry.setCol(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase())); t.entry.setCol(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase())); t.entry.setCol(col_worddistance , (t.worddistance() == 0) ? 0 : 1 + 255 * (t.worddistance() - min.worddistance()) / (1 + max.worddistance() - min.worddistance())); // FIXME: hier gibts ein division by zero, was nur sein kann wenn die Normalisierung nicht geklappt hat. t.entry.setCol(col_lastModified , (t.lastModified() == 0) ? 0 : 1 + 255 * (t.lastModified() - min.lastModified()) / (1 + max.lastModified() - min.lastModified())); + t.entry.setCol(col_urlLength , (t.urllength() == 0) ? 0 : 1 + 255 * (t.urllength() - min.urllength() ) / (1 + max.urllength() - min.urllength())); + t.entry.setCol(col_urlComps , (t.urlcomps() == 0) ? 0 : 1 + 255 * (t.urlcomps() - min.urlcomps() ) / (1 + max.urlcomps() - min.urlcomps())); + t.entry.setCol(col_wordsInTitle , (t.wordsintitle() == 0) ? 0 : 1 + 255 * (t.wordsintitle() - min.wordsintitle()) / (1 + max.wordsintitle() - min.wordsintitle())); + //System.out.println("out = " + t.toPropertyForm(true)); } diff --git a/source/de/anomic/index/indexRWIEntryOld.java b/source/de/anomic/index/indexRWIEntryOld.java index 268f9dec2..5e41e1824 100644 --- a/source/de/anomic/index/indexRWIEntryOld.java +++ b/source/de/anomic/index/indexRWIEntryOld.java @@ -189,11 +189,11 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry { return (int) this.entry.getColLong(col_posofphrase); } - public int wordcount() { + public int wordsintext() { return (int) this.entry.getColLong(col_wordcount); } - public int phrasecount() { + public int phrasesintext() { return (int) this.entry.getColLong(col_phrasecount); } @@ -215,7 +215,7 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry { ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext())); ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/); ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase())); - ie1.entry.setCol(col_wordcount, (ie1.wordcount() + ie2.wordcount()) / 2); + ie1.entry.setCol(col_wordcount, (ie1.wordsintext() + ie2.wordsintext()) / 2); return ie1; } @@ -229,8 +229,8 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry { public static final void min(indexRWIEntryOld t, indexRWIEntry other) { if (t.hitcount() > other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount()); - if (t.wordcount() > other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount()); - if (t.phrasecount() > other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount()); + if (t.wordsintext() > other.wordsintext()) t.entry.setCol(col_wordcount, other.wordsintext()); + if (t.phrasesintext() > other.phrasesintext()) t.entry.setCol(col_phrasecount, other.phrasesintext()); if (t.posintext() > other.posintext()) t.entry.setCol(col_posintext, other.posintext()); if (t.posinphrase() > other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase()); if (t.posofphrase() > other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase()); @@ -241,8 +241,8 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry { public static final void max(indexRWIEntryOld t, indexRWIEntry other) { if (t.hitcount() < other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount()); - if (t.wordcount() < other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount()); - if (t.phrasecount() < other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount()); + if (t.wordsintext() < other.wordsintext()) t.entry.setCol(col_wordcount, other.wordsintext()); + if (t.phrasesintext() < other.phrasesintext()) t.entry.setCol(col_phrasecount, other.phrasesintext()); if (t.posintext() < other.posintext()) t.entry.setCol(col_posintext, other.posintext()); if (t.posinphrase() < other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase()); if (t.posofphrase() < other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase()); @@ -269,8 +269,8 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry { //System.out.println("min = " + min.toPropertyForm(true)); //System.out.println("max = " + max.toPropertyForm(true)); t.entry.setCol(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount())); - t.entry.setCol(col_wordcount , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount())); - t.entry.setCol(col_phrasecount , (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount())); + t.entry.setCol(col_wordcount , (t.wordsintext() == 0) ? 0 : 1 + 255 * (t.wordsintext() - min.wordsintext() ) / (1 + max.wordsintext() - min.wordsintext())); + t.entry.setCol(col_phrasecount , (t.phrasesintext() == 0) ? 0 : 1 + 255 * (t.phrasesintext() - min.phrasesintext() ) / (1 + max.phrasesintext() - min.phrasesintext())); t.entry.setCol(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext())); t.entry.setCol(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase())); t.entry.setCol(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase())); @@ -309,4 +309,24 @@ public class indexRWIEntryOld implements Cloneable, indexRWIEntry { return false; } + public int llocal() { + return 0; + } + + public int lother() { + return 0; + } + + public int urlcomps() { + return 0; + } + + public int urllength() { + return 0; + } + + public int wordsintitle() { + return 0; + } + } diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java index b37ca9dcd..2e339c49f 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java @@ -47,111 +47,189 @@ import java.util.Map; import java.util.Set; import de.anomic.index.indexRWIEntry; +import de.anomic.index.indexRWIEntryNew; import de.anomic.plasma.plasmaURL; import de.anomic.index.indexURLEntry; +import de.anomic.kelondro.kelondroBitfield; public class plasmaSearchRankingProfile { - // old parameters for ordering - public static final String ORDER_QUALITY = "Quality"; - public static final String ORDER_DATE = "Date"; - public static final String ORDER_YBR = "YBR"; - // pre-sort attributes - public static final String ENTROPY = "entropy"; - public static final String DATE = "date"; - public static final String YBR = "ybr"; - public static final String POSINTEXT = "posintext"; - public static final String WORDDISTANCE = "worddistance"; - public static final String HITCOUNT = "hitcount"; - public static final String DOMLENGTH = "domlength"; + public static final String DOMLENGTH = "domlength"; + public static final String YBR = "ybr"; + public static final String DATE = "date"; + public static final String WORDSINTITLE = "wordsintitle"; + public static final String WORDSINTEXT = "wordsintext"; + public static final String PHRASESINTEXT = "phrasesintext"; + public static final String LLOCAL = "llocal"; + public static final String LOTHER = "lother"; + public static final String URLLENGTH = "urllength"; + public static final String URLCOMPS = "urlcomps"; + public static final String HITCOUNT = "hitcount"; + public static final String POSINTEXT = "posintext"; + public static final String POSOFPHRASE = "posofphrase"; + public static final String WORDDISTANCE = "worddistance"; + public static final String APPURL = "appurl"; + public static final String APPDESCR = "appdescr"; + public static final String APPAUTHOR = "appauthor"; + public static final String APPTAGS = "apptags"; + public static final String APPREF = "appref"; + public static final String APPEMPH = "appemph"; + public static final String CATINDEXOF = "catindexof"; + public static final String CATHASIMAGE = "cathasimage"; + public static final String CATHASAUDIO = "cathasaudio"; + public static final String CATHASVIDEO = "cathasvideo"; + public static final String CATHASAPP = "cathasapp"; - // post-sort attributes - public static final String URLLENGTH = "urllength"; - public static final String URLCOMPS = "urlcomps"; - public static final String DESCRLENGTH = "descrlength"; - public static final String DESCRCOMPS = "descrcomps"; - // post-sort predicates - public static final String QUERYINURL = "queryinurl"; - public static final String QUERYINDESCR = "queryindescr"; - public static final String URLCOMPINTOPLIST = "urlcompintoplist"; + public static final String QUERYINURL = "queryinurl"; + public static final String QUERYINDESCR = "queryindescr"; + public static final String URLCOMPINTOPLIST = "urlcompintoplist"; public static final String DESCRCOMPINTOPLIST = "descrcompintoplist"; public static final String PREFER = "prefer"; + + private int + coeff_domlength, coeff_ybr, coeff_date, coeff_wordsintitle, coeff_wordsintext, coeff_phrasesintext, + coeff_llocal, coeff_lother, coeff_urllength, coeff_urlcomps, coeff_hitcount, + coeff_posintext, coeff_posofphrase, coeff_worddistance, + coeff_appurl, coeff_appdescr, coeff_appauthor, coeff_apptags, coeff_appref, coeff_appemph, + coeff_catindexof, coeff_cathasimage, coeff_cathasaudio, coeff_cathasvideo, coeff_cathasapp, + coeff_queryinurl, coeff_queryindescr, coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer; - public String[] order; - private HashMap coeff; - - public plasmaSearchRankingProfile() { - // set some default-values - this.order = null; - this.coeff = new HashMap(); - coeff.put(ENTROPY, new Integer(0)); - coeff.put(DATE, new Integer(4)); - coeff.put(YBR, new Integer(8)); - coeff.put(POSINTEXT, new Integer(7)); - coeff.put(WORDDISTANCE, new Integer(6)); - coeff.put(HITCOUNT, new Integer(5)); - coeff.put(DOMLENGTH, new Integer(8)); - coeff.put(URLLENGTH, new Integer(15)); - coeff.put(URLCOMPS, new Integer(15)); - coeff.put(DESCRLENGTH, new Integer(4)); - coeff.put(DESCRCOMPS, new Integer(4)); - coeff.put(QUERYINURL, new Integer(13)); - coeff.put(QUERYINDESCR, new Integer(8)); - coeff.put(URLCOMPINTOPLIST, new Integer(3)); - coeff.put(DESCRCOMPINTOPLIST, new Integer(2)); - coeff.put(PREFER, new Integer(15)); + public plasmaSearchRankingProfile(String mediatype) { + // set default-values + if (mediatype == null) mediatype = "text"; + coeff_domlength = 8; + coeff_ybr = 8; + coeff_date = 4; + coeff_wordsintitle = 4; + coeff_wordsintext = 1; + coeff_phrasesintext = 1; + coeff_llocal = 2; + coeff_lother = 3; + coeff_urllength = 14; + coeff_urlcomps = 14; + coeff_hitcount = 5; + coeff_posintext = 7; + coeff_posofphrase = 6; + coeff_worddistance = 15; + coeff_appurl = 14; + coeff_appdescr = 13; + coeff_appauthor = 13; + coeff_apptags = 8; + coeff_appref = 9; + coeff_appemph = 11; + coeff_queryinurl = 12; + coeff_queryindescr = 8; + coeff_urlcompintoplist = 3; + coeff_descrcompintoplist = 2; + coeff_prefer = 15; + coeff_catindexof = (mediatype.equals("text")) ? 0 : 10; + coeff_cathasimage = (mediatype.equals("image")) ? 15 : 0; + coeff_cathasaudio = (mediatype.equals("audio")) ? 15 : 0; + coeff_cathasvideo = (mediatype.equals("video")) ? 15 : 0; + coeff_cathasapp = (mediatype.equals("app")) ? 15 : 0; } public plasmaSearchRankingProfile(String prefix, String profile) { - this(); // set defaults - //parse external form - String[] elts = profile.substring(1, profile.length() - 1).split(","); - int p; - int s = prefix.length(); - String e; - for (int i = 0; i < elts.length; i++) { - e = elts[i].trim(); - if ((s == 0) || (e.startsWith(prefix))) { - coeff.put(e.substring(s, (p = e.indexOf("="))), new Integer(Integer.parseInt(e.substring(p + 1)))); + this("text"); // set defaults + if ((profile != null) && (profile.length() > 0)) { + //parse external form + HashMap coeff = new HashMap(); + String[] elts = ((profile.startsWith("{") && (profile.endsWith("}"))) ? profile.substring(1, profile.length() - 1) : profile).split(","); + int p; + int s = (prefix == null) ? 0 : prefix.length(); + String e; + for (int i = 0; i < elts.length; i++) { + e = elts[i].trim(); + if ((s == 0) || (e.startsWith(prefix))) { + coeff.put(e.substring(s, (p = e.indexOf("="))), new Integer(Integer.parseInt(e.substring(p + 1)))); + } } + coeff_domlength = parseMap(coeff, DOMLENGTH, coeff_domlength); + coeff_ybr = parseMap(coeff, YBR, coeff_ybr); + coeff_date = parseMap(coeff, DATE, coeff_date); + coeff_wordsintitle = parseMap(coeff, WORDSINTITLE, coeff_wordsintitle); + coeff_wordsintext = parseMap(coeff, WORDSINTEXT, coeff_wordsintext); + coeff_phrasesintext = parseMap(coeff, PHRASESINTEXT, coeff_phrasesintext); + coeff_llocal = parseMap(coeff, LLOCAL, coeff_llocal); + coeff_lother = parseMap(coeff, LOTHER, coeff_lother); + coeff_urllength = parseMap(coeff, URLLENGTH, coeff_urllength); + coeff_urlcomps = parseMap(coeff, URLCOMPS, coeff_urlcomps); + coeff_hitcount = parseMap(coeff, HITCOUNT, coeff_hitcount); + coeff_posintext = parseMap(coeff, POSINTEXT, coeff_posintext); + coeff_posofphrase = parseMap(coeff, POSOFPHRASE, coeff_posofphrase); + coeff_worddistance = parseMap(coeff, WORDDISTANCE, coeff_worddistance); + coeff_appurl = parseMap(coeff, APPURL, coeff_appurl); + coeff_appdescr = parseMap(coeff, APPDESCR, coeff_appdescr); + coeff_appauthor = parseMap(coeff, APPAUTHOR, coeff_appauthor); + coeff_apptags = parseMap(coeff, APPTAGS, coeff_apptags); + coeff_appref = parseMap(coeff, APPREF, coeff_appref); + coeff_appemph = parseMap(coeff, APPEMPH, coeff_appemph); + coeff_catindexof = parseMap(coeff, APPEMPH, coeff_catindexof); + coeff_cathasimage = parseMap(coeff, APPEMPH, coeff_cathasimage); + coeff_cathasaudio = parseMap(coeff, APPEMPH, coeff_cathasaudio); + coeff_cathasvideo = parseMap(coeff, APPEMPH, coeff_cathasvideo); + coeff_cathasapp = parseMap(coeff, APPEMPH, coeff_cathasapp); + coeff_queryinurl = parseMap(coeff, QUERYINURL, coeff_queryinurl); + coeff_queryindescr = parseMap(coeff, QUERYINDESCR, coeff_queryindescr); + coeff_urlcompintoplist = parseMap(coeff, URLCOMPINTOPLIST, coeff_urlcompintoplist); + coeff_descrcompintoplist = parseMap(coeff, DESCRCOMPINTOPLIST, coeff_descrcompintoplist); + coeff_prefer = parseMap(coeff, PREFER, coeff_prefer); } } - public plasmaSearchRankingProfile(String[] order) { - this(); // set defaults - this.order = order; - // overwrite defaults with order attributes - for (int i = 0; i < 3; i++) { - if (this.order[i].equals(plasmaSearchRankingProfile.ORDER_QUALITY)) coeff.put(ENTROPY, new Integer((3 * (3 - i)))); - else if (this.order[i].equals(plasmaSearchRankingProfile.ORDER_DATE)) coeff.put(DATE, new Integer((3 * (3 - i)))); - else if (this.order[i].equals(plasmaSearchRankingProfile.ORDER_YBR)) coeff.put(YBR, new Integer((3 * (3 - i)))); + private static int parseMap(HashMap coeff, String attr, int dflt) { + if (coeff.containsKey(attr)) try { + return Integer.parseInt((String) coeff.get(attr)); + } catch (NumberFormatException e) { + return dflt; + } else { + return dflt; } } - - public String orderString() { - if (order == null) return "YBR-Date-Quality"; - return order[0] + "-" + order[1] + "-" + order[2]; - } public String toExternalString() { - return coeff.toString(); + return toExternalMap("").toString(); } public Map toExternalMap(String prefix) { - Iterator i = this.coeff.entrySet().iterator(); - Map.Entry entry; Map ext = new HashMap(); - while (i.hasNext()) { - entry = (Map.Entry) i.next(); - ext.put(prefix + (String) entry.getKey(), entry.getValue()); - } + ext.put(prefix + DOMLENGTH, Integer.toString(coeff_domlength)); + ext.put(prefix + YBR, Integer.toString(coeff_ybr)); + ext.put(prefix + DATE, Integer.toString(coeff_date)); + ext.put(prefix + WORDSINTITLE, Integer.toString(coeff_wordsintitle)); + ext.put(prefix + WORDSINTEXT, Integer.toString(coeff_wordsintext)); + ext.put(prefix + PHRASESINTEXT, Integer.toString(coeff_phrasesintext)); + ext.put(prefix + LLOCAL, Integer.toString(coeff_llocal)); + ext.put(prefix + LOTHER, Integer.toString(coeff_lother)); + ext.put(prefix + URLLENGTH, Integer.toString(coeff_urllength)); + ext.put(prefix + URLCOMPS, Integer.toString(coeff_urlcomps)); + ext.put(prefix + HITCOUNT, Integer.toString(coeff_hitcount)); + ext.put(prefix + POSINTEXT, Integer.toString(coeff_posintext)); + ext.put(prefix + POSOFPHRASE, Integer.toString(coeff_posofphrase)); + ext.put(prefix + WORDDISTANCE, Integer.toString(coeff_worddistance)); + ext.put(prefix + APPURL, Integer.toString(coeff_appurl)); + ext.put(prefix + APPDESCR, Integer.toString(coeff_appdescr)); + ext.put(prefix + APPAUTHOR, Integer.toString(coeff_appauthor)); + ext.put(prefix + APPTAGS, Integer.toString(coeff_apptags)); + ext.put(prefix + APPREF, Integer.toString(coeff_appref)); + ext.put(prefix + APPEMPH, Integer.toString(coeff_appemph)); + ext.put(prefix + CATINDEXOF, Integer.toString(coeff_catindexof)); + ext.put(prefix + CATHASIMAGE, Integer.toString(coeff_cathasimage)); + ext.put(prefix + CATHASAUDIO, Integer.toString(coeff_cathasaudio)); + ext.put(prefix + CATHASVIDEO, Integer.toString(coeff_cathasvideo)); + ext.put(prefix + CATHASAPP, Integer.toString(coeff_cathasapp)); + ext.put(prefix + QUERYINURL, Integer.toString(coeff_queryinurl)); + ext.put(prefix + QUERYINDESCR, Integer.toString(coeff_queryindescr)); + ext.put(prefix + URLCOMPINTOPLIST, Integer.toString(coeff_urlcompintoplist)); + ext.put(prefix + DESCRCOMPINTOPLIST, Integer.toString(coeff_descrcompintoplist)); + ext.put(prefix + PREFER, Integer.toString(coeff_prefer)); return ext; } public String toExternalURLGet(String prefix) { - Iterator i = this.coeff.entrySet().iterator(); + Iterator i = toExternalMap("").entrySet().iterator(); Map.Entry entry; StringBuffer ext = new StringBuffer(); while (i.hasNext()) { @@ -168,15 +246,37 @@ public class plasmaSearchRankingProfile { public long preRanking(indexRWIEntry normalizedEntry, String searchedWord) { // the normalizedEntry must be a normalized indexEntry long ranking = 0; - ranking += normalizedEntry.quality() << ((Integer) coeff.get(ENTROPY)).intValue(); - ranking += normalizedEntry.virtualAge() << ((Integer) coeff.get(DATE)).intValue(); - ranking += plasmaSearchPreOrder.ybr_p(normalizedEntry.urlHash()) << ((Integer) coeff.get(YBR)).intValue(); - ranking += (normalizedEntry.posintext() == 0) ? 0 : (256 - normalizedEntry.posintext()) << ((Integer) coeff.get(POSINTEXT)).intValue(); - ranking += (normalizedEntry.worddistance() == 0) ? 0 : (256 - normalizedEntry.worddistance()) << ((Integer) coeff.get(WORDDISTANCE)).intValue(); - ranking += (normalizedEntry.hitcount() == 0) ? 0 : normalizedEntry.hitcount() << ((Integer) coeff.get(HITCOUNT)).intValue(); - ranking += (256 - plasmaURL.domLengthNormalized(normalizedEntry.urlHash())) << ((Integer) coeff.get(DOMLENGTH)).intValue(); - ranking += (plasmaURL.probablyRootURL(normalizedEntry.urlHash())) ? 16 << ((Integer) coeff.get(URLLENGTH)).intValue() : 0; - ranking += (plasmaURL.probablyWordURL(normalizedEntry.urlHash(), searchedWord) != null) ? 256 << ((Integer) coeff.get(QUERYINURL)).intValue() : 0; + ranking += (256 - plasmaURL.domLengthNormalized(normalizedEntry.urlHash())) << coeff_domlength; + ranking += plasmaSearchPreOrder.ybr_p(normalizedEntry.urlHash()) << coeff_ybr; + ranking += normalizedEntry.virtualAge() << coeff_date; + ranking += normalizedEntry.wordsintitle() << coeff_wordsintitle; + ranking += normalizedEntry.wordsintext() << coeff_wordsintext; + ranking += normalizedEntry.phrasesintext() << coeff_phrasesintext; + ranking += normalizedEntry.llocal() << coeff_llocal; + ranking += normalizedEntry.lother() << coeff_lother; + ranking += (normalizedEntry.urllength() == 0) ? 0 : (256 - normalizedEntry.urllength()) << coeff_urllength; + ranking += (normalizedEntry.urlcomps() == 0) ? 0 : (256 - normalizedEntry.urlcomps()) << coeff_urlcomps; + ranking += (normalizedEntry.hitcount() == 0) ? 0 : normalizedEntry.hitcount() << coeff_hitcount; + ranking += (normalizedEntry.posintext() == 0) ? 0 : (256 - normalizedEntry.posintext()) << coeff_posintext; + ranking += (normalizedEntry.posofphrase() == 0) ? 0 : (256 - normalizedEntry.hitcount()) << coeff_posofphrase; + ranking += (normalizedEntry.worddistance() == 0) ? 0 : (256 - normalizedEntry.worddistance()) << coeff_worddistance; + + kelondroBitfield flags = normalizedEntry.flags(); + ranking += (flags.get(indexRWIEntryNew.flag_app_url)) ? 256 << coeff_appurl : 0; + ranking += (flags.get(indexRWIEntryNew.flag_app_descr)) ? 256 << coeff_appdescr : 0; + ranking += (flags.get(indexRWIEntryNew.flag_app_author)) ? 256 << coeff_appauthor : 0; + ranking += (flags.get(indexRWIEntryNew.flag_app_tags)) ? 256 << coeff_apptags : 0; + ranking += (flags.get(indexRWIEntryNew.flag_app_reference)) ? 256 << coeff_appref : 0; + ranking += (flags.get(indexRWIEntryNew.flag_app_emphasized)) ? 256 << coeff_appemph : 0; + ranking += (flags.get(plasmaCondenser.flag_cat_indexof)) ? 256 << coeff_catindexof : 0; + ranking += (flags.get(plasmaCondenser.flag_cat_hasimage)) ? 256 << coeff_cathasimage : 0; + ranking += (flags.get(plasmaCondenser.flag_cat_hasaudio)) ? 256 << coeff_cathasaudio : 0; + ranking += (flags.get(plasmaCondenser.flag_cat_hasvideo)) ? 256 << coeff_cathasvideo : 0; + ranking += (flags.get(plasmaCondenser.flag_cat_hasapp)) ? 256 << coeff_cathasapp : 0; + + ranking += (plasmaURL.probablyRootURL(normalizedEntry.urlHash())) ? 16 << coeff_urllength : 0; + ranking += (plasmaURL.probablyWordURL(normalizedEntry.urlHash(), searchedWord) != null) ? 256 << coeff_queryinurl : 0; + /* if (indexURL.probablyWordURL(normalizedEntry.urlHash(), searchedWord)) System.out.println("DEBUG - hash " + normalizedEntry.urlHash() + " contains word " + searchedWord + ", weighted " + ((Integer) coeff.get(QUERYINURL)).intValue() + ", ranking = " + ranking); @@ -199,15 +299,15 @@ public class plasmaSearchRankingProfile { // prefer hit with 'prefer' pattern indexURLEntry.Components comp = page.comp(); - if (comp.url().toNormalform().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue(); - if (comp.descr().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue(); + if (comp.url().toNormalform().matches(query.prefer)) ranking += 256 << coeff_prefer; + if (comp.descr().matches(query.prefer)) ranking += 256 << coeff_prefer; // apply 'common-sense' heuristic using references for (int j = 0; j < urlcomps.length; j++) { - if (topwords.contains(urlcomps[j])) ranking += 256 << ((Integer) coeff.get(URLCOMPINTOPLIST)).intValue(); + if (topwords.contains(urlcomps[j])) ranking += 256 << coeff_urlcompintoplist; } for (int j = 0; j < descrcomps.length; j++) { - if (topwords.contains(descrcomps[j])) ranking += 256 << ((Integer) coeff.get(DESCRCOMPINTOPLIST)).intValue(); + if (topwords.contains(descrcomps[j])) ranking += 256 << coeff_descrcompintoplist; } // apply query-in-result matching @@ -217,18 +317,10 @@ public class plasmaSearchRankingProfile { String queryhash; while (shi.hasNext()) { queryhash = (String) shi.next(); - if (urlcomph.contains(queryhash)) ranking += 256 << ((Integer) coeff.get(QUERYINURL)).intValue(); - if (descrcomph.contains(queryhash)) ranking += 256 << ((Integer) coeff.get(QUERYINDESCR)).intValue(); + if (urlcomph.contains(queryhash)) ranking += 256 << coeff_queryinurl; + if (descrcomph.contains(queryhash)) ranking += 256 << coeff_queryindescr; } - // prefer short urls - ranking += (256 - comp.url().toNormalform().length()) << ((Integer) coeff.get(URLLENGTH)).intValue(); - ranking += (8 * Math.max(0, 32 - urlcomps.length)) << ((Integer) coeff.get(URLCOMPS)).intValue(); - - // prefer long descriptions - ranking += (256 * comp.url().toNormalform().length() / 80) << ((Integer) coeff.get(DESCRLENGTH)).intValue(); - ranking += (256 * (12 - Math.abs(12 - Math.min(12, descrcomps.length))) / 12) << ((Integer) coeff.get(DESCRCOMPS)).intValue(); - return ranking; } diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index e5c319d63..00df6509d 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -61,7 +61,6 @@ import de.anomic.htmlFilter.htmlFilterImageEntry; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.plasma.plasmaURL; -import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.net.URL; import de.anomic.plasma.cache.IResourceInfo; @@ -587,19 +586,20 @@ public class plasmaSnippetCache { } } - public ArrayList retrieveMediaSnippets(URL url, Set queryhashes, boolean fetchOnline, int timeout) { + public ArrayList retrieveMediaSnippets(URL url, Set queryhashes, String mediatype, boolean fetchOnline, int timeout) { if (queryhashes.size() == 0) { serverLog.logFine("snippet fetch", "no query hashes given for url " + url); return new ArrayList(); } - + if (mediatype == null) mediatype = ""; + plasmaParserDocument document = retrieveDocument(url, fetchOnline, timeout, false); ArrayList a = new ArrayList(); if (document != null) { - a.addAll(computeMediaSnippets(document, queryhashes, "audio")); - a.addAll(computeMediaSnippets(document, queryhashes, "video")); - a.addAll(computeMediaSnippets(document, queryhashes, "app")); - a.addAll(computeImageSnippets(document, queryhashes)); + if ((mediatype.length() == 0) || (mediatype.equals("audio"))) a.addAll(computeMediaSnippets(document, queryhashes, "audio")); + if ((mediatype.length() == 0) || (mediatype.equals("video"))) a.addAll(computeMediaSnippets(document, queryhashes, "video")); + if ((mediatype.length() == 0) || (mediatype.equals("app" ))) a.addAll(computeMediaSnippets(document, queryhashes, "app")); + if ((mediatype.length() == 0) || (mediatype.equals("image"))) a.addAll(computeImageSnippets(document, queryhashes)); } return a; } @@ -838,7 +838,7 @@ public class plasmaSnippetCache { return result; } - + /* public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount, long maxTime) { // fetch snippets int i = 0; @@ -879,5 +879,5 @@ public class plasmaSnippetCache { log.logFine("snippetFetcher: got URL " + url + ", the snippet is '" + snippet.line + "', source=" + snippet.source); } } - + */ } \ No newline at end of file diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 0104fe786..88090e566 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -2160,8 +2160,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // suppress line: there is no match in that resource } else {*/ prop.put("type_results_" + i + "_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", urlstring) == null) ? 1 : 0); - prop.put("type_results_" + i + "_recommend_deletelink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + ranking.orderString() + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*"); - prop.put("type_results_" + i + "_recommend_recommendlink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + ranking.orderString() + "&resource=local&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*"); + prop.put("type_results_" + i + "_recommend_deletelink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + crypt.simpleEncode(ranking.toExternalString()) + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*"); + prop.put("type_results_" + i + "_recommend_recommendlink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + crypt.simpleEncode(ranking.toExternalString()) + "&resource=local&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*"); prop.put("type_results_" + i + "_description", comp.descr()); prop.put("type_results_" + i + "_url", urlstring); prop.put("type_results_" + i + "_urlhash", urlhash); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 8378022ef..06880ea83 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -432,9 +432,9 @@ public final class yacyClient { obj.put("filter", filter); obj.put("ttl", "0"); obj.put("duetime", Long.toString(duetime)); - obj.put("profile", timingProfile.targetToString()); // new duetimes splitted by specific search tasks + obj.put("timing", crypt.simpleEncode(timingProfile.targetToString())); // new duetimes splitted by specific search tasks obj.put("maxdist", maxDistance); - obj.put("rankingProfile", rankingProfile.toExternalString()); + obj.put("profile", crypt.simpleEncode(rankingProfile.toExternalString())); obj.put("constraint", constraint.exportB64()); obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date())); if (abstractCache != null) obj.put("abstracts", "auto");