You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
188 lines
13 KiB
188 lines
13 KiB
13 years ago
|
// Ranking_p.java
|
||
18 years ago
|
// --------------
|
||
|
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||
|
// first published 05.02.2006 on http://yacy.net
|
||
|
//
|
||
|
// This is a part of YaCy, a peer-to-peer based web search engine
|
||
|
//
|
||
16 years ago
|
// $LastChangedDate$
|
||
|
// $LastChangedRevision$
|
||
|
// $LastChangedBy$
|
||
18 years ago
|
//
|
||
|
// LICENSE
|
||
13 years ago
|
//
|
||
18 years ago
|
// This program is free software; you can redistribute it and/or modify
|
||
|
// it under the terms of the GNU General Public License as published by
|
||
|
// the Free Software Foundation; either version 2 of the License, or
|
||
|
// (at your option) any later version.
|
||
|
//
|
||
|
// This program is distributed in the hope that it will be useful,
|
||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
// GNU General Public License for more details.
|
||
|
//
|
||
|
// You should have received a copy of the GNU General Public License
|
||
|
// along with this program; if not, write to the Free Software
|
||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
||
13 years ago
|
import java.util.LinkedHashMap;
|
||
18 years ago
|
import java.util.Map;
|
||
17 years ago
|
import java.util.Map.Entry;
|
||
18 years ago
|
|
||
12 years ago
|
import net.yacy.cora.document.analysis.Classification;
|
||
15 years ago
|
import net.yacy.cora.protocol.RequestHeader;
|
||
14 years ago
|
import net.yacy.search.Switchboard;
|
||
12 years ago
|
import net.yacy.search.SwitchboardConstants;
|
||
14 years ago
|
import net.yacy.search.query.SearchEventCache;
|
||
|
import net.yacy.search.ranking.RankingProfile;
|
||
13 years ago
|
import net.yacy.server.serverObjects;
|
||
|
import net.yacy.server.serverSwitch;
|
||
|
import net.yacy.utils.crypt;
|
||
18 years ago
|
|
||
12 years ago
|
public class RankingRWI_p {
|
||
13 years ago
|
|
||
18 years ago
|
private static final int maxRankingRange = 16;
|
||
13 years ago
|
|
||
|
private static final LinkedHashMap<String, String> rankingParameters = new LinkedHashMap<String, String>();
|
||
18 years ago
|
static {
|
||
13 years ago
|
rankingParameters.put(RankingProfile.APPEMPH, "Appearance In Emphasized Text;a higher ranking level prefers documents where the search word is emphasized");
|
||
|
rankingParameters.put(RankingProfile.APPURL, "Appearance In URL;a higher ranking level prefers documents with urls that match the search word");
|
||
|
rankingParameters.put(RankingProfile.APP_DC_CREATOR, "Appearance In Author;a higher ranking level prefers documents with authors that match the search word");
|
||
|
rankingParameters.put(RankingProfile.APP_DC_DESCRIPTION, "Appearance In Reference/Anchor Name;a higher ranking level prefers documents where the search word matches in the description text");
|
||
|
rankingParameters.put(RankingProfile.APP_DC_SUBJECT, "Appearance In Tags;a higher ranking level prefers documents where the search word is part of subject tags");
|
||
|
rankingParameters.put(RankingProfile.APP_DC_TITLE, "Appearance In Title;a higher ranking level prefers documents with titles that match the search word");
|
||
|
rankingParameters.put(RankingProfile.AUTHORITY, "Authority of Domain;a higher ranking level prefers documents from domains with a large number of matching documents");
|
||
|
rankingParameters.put(RankingProfile.CATHASAPP, "Category App, Appearance;a higher ranking level prefers documents with embedded links to applications");
|
||
|
rankingParameters.put(RankingProfile.CATHASAUDIO, "Category Audio Appearance;a higher ranking level prefers documents with embedded links to audio content");
|
||
|
rankingParameters.put(RankingProfile.CATHASIMAGE, "Category Image Appearance;a higher ranking level prefers documents with embedded images");
|
||
|
rankingParameters.put(RankingProfile.CATHASVIDEO, "Category Video Appearance;a higher ranking level prefers documents with embedded links to video files");
|
||
|
rankingParameters.put(RankingProfile.CATINDEXOF, "Category Index Page;a higher ranking level prefers 'index of' (directory listings) pages");
|
||
|
rankingParameters.put(RankingProfile.DATE, "Date;a higher ranking level prefers younger documents. The age of a document is measured using the date submitted by the remote server as document date");
|
||
|
rankingParameters.put(RankingProfile.DOMLENGTH, "Domain Length;a higher ranking level prefers documents with a short domain name");
|
||
|
rankingParameters.put(RankingProfile.HITCOUNT, "Hit Count;a higher ranking level prefers documents with a large number of matchings for the search word(s)");
|
||
|
rankingParameters.put(RankingProfile.LANGUAGE, "Preferred Language;a higher ranking level prefers documents with a language that matches the browser language.");
|
||
|
rankingParameters.put(RankingProfile.LLOCAL, "Links To Local Domain;a higher ranking level prefers documents with a high number of hyperlinks to the same domain as the matching document.");
|
||
|
rankingParameters.put(RankingProfile.LOTHER, "Links To Other Domain;a higher ranking level prefers documents with a high number of hyperlinks to domains other than the matching document domain");
|
||
|
rankingParameters.put(RankingProfile.PHRASESINTEXT, "Phrases In Text;a higher ranking level prefers documents with a large number of phrases (sentences) in the matching document.");
|
||
|
rankingParameters.put(RankingProfile.POSINPHRASE, "Position In Phrase;a higher ranking level prefers documents with a word match position high in the matching phrase. The phrase match is the phrase (sentence) where the matching word appears first.");
|
||
|
rankingParameters.put(RankingProfile.POSINTEXT, "Position In Text;a higher ranking level prefers documents with a word match position high in the document. This prefers documents where the search wort is at the beginning of a text.");
|
||
|
rankingParameters.put(RankingProfile.POSOFPHRASE, "Position Of Phrase;a higher ranking level prefers documents with a phrase match position high in the document. The phrase match is the phrase (sentence) where the matching word appears first. This prefers documents where the search wort is at the beginning of a text.");
|
||
|
rankingParameters.put(RankingProfile.TERMFREQUENCY, "Term Frequency;a higher ranking level prefers documents with a high (number of matching words)/(number of words in document) ratio. This is same ranking as used in lucene and old-age search engines as existed before the year 2000.");
|
||
|
rankingParameters.put(RankingProfile.URLCOMPS, "URL Components;a higher ranking level prefers documents with a short number of url components. The number of url components is the number of (sub-) domains plus the number of (sub-) path elements in the file path.");
|
||
|
rankingParameters.put(RankingProfile.URLLENGTH, "URL Length;a higher ranking level prefers documents with a short url (domain plus path)");
|
||
|
rankingParameters.put(RankingProfile.WORDDISTANCE, "Word Distance;a higher ranking level prefers documents where the search words appear close together. This ranking parameter works like a NEAR operator in more-than-one word searches.");
|
||
|
rankingParameters.put(RankingProfile.WORDSINTEXT, "Words In Text;a higher ranking level prefers documents with a large number of words. Be aware that this is a compensation of the term frequency parameter.");
|
||
|
rankingParameters.put(RankingProfile.WORDSINTITLE, "Words In Title;a higher ranking level prefers documents with a large number of words in the document title.");
|
||
|
rankingParameters.put(RankingProfile.YBR, "YaCy Block Rank;a higher ranking level prefers documents with a higher, statically assigned ranking value on domains. This is like a 'moderated ranking'. The ranking on domains (blocks) was computed using a link analyses on large link graphs.");
|
||
|
|
||
|
rankingParameters.put(RankingProfile.URLCOMPINTOPLIST, "URL Component Appears In Toplist;a higher ranking level prefers documents with words in the url path that match words in the toplist. The toplist is generated dynamically from the search results using a statistic of the most used words. The toplist is a top-10 list of the most used words in URLs and document titles.");
|
||
|
rankingParameters.put(RankingProfile.DESCRCOMPINTOPLIST, "Description Comp. Appears In Toplist;a higher ranking level prefers documents with words in the document description that match words in the toplist. The toplist is generated dynamically from the search results using a statistic of the most used words. The toplist is a top-10 list of the most used words in URLs and document titles.");
|
||
|
rankingParameters.put(RankingProfile.PREFER, "Application Of Prefer Pattern;a higher ranking level prefers documents where the url matches the prefer pattern given in a search request.");
|
||
13 years ago
|
rankingParameters.put(RankingProfile.CITATION, "Citation Rank;the more incoming links and the less outgoing links the better the ranking.");
|
||
18 years ago
|
}
|
||
|
|
||
|
private static serverObjects defaultValues() {
|
||
|
final serverObjects prop = new serverObjects();
|
||
|
prop.put("search", "");
|
||
18 years ago
|
prop.put("num-results", "0");
|
||
|
prop.put("excluded", "0");
|
||
|
prop.put("combine", "0");
|
||
|
prop.put("resultbottomline", "0");
|
||
|
prop.put("localCount", "10");
|
||
|
prop.put("localWDist", "999");
|
||
18 years ago
|
//prop.put("globalChecked", "checked");
|
||
18 years ago
|
prop.put("globalChecked", "0");
|
||
|
prop.put("postsortChecked", "1");
|
||
|
prop.put("localTime", "6");
|
||
18 years ago
|
prop.put("results", "");
|
||
18 years ago
|
prop.put("urlmaskoptions", "0");
|
||
|
prop.putHTML("urlmaskoptions_urlmaskfilter", ".*");
|
||
|
prop.put("jumpToCursor", "1");
|
||
18 years ago
|
return prop;
|
||
|
}
|
||
13 years ago
|
|
||
16 years ago
|
private static void putRanking(final serverObjects prop, final RankingProfile rankingProfile, final String prefix) {
|
||
18 years ago
|
putRanking(prop, rankingProfile.preToExternalMap(prefix), prefix, "Pre");
|
||
|
putRanking(prop, rankingProfile.postToExternalMap(prefix), prefix, "Post");
|
||
|
}
|
||
13 years ago
|
|
||
17 years ago
|
private static void putRanking(final serverObjects prop, final Map<String, String> map, final String prefix, final String attrExtension) {
|
||
18 years ago
|
prop.put("attr" + attrExtension, map.size());
|
||
15 years ago
|
String key, description, name, info;
|
||
|
int i, j = 0, p;
|
||
17 years ago
|
for (final Entry<String, String> entry: map.entrySet()) {
|
||
16 years ago
|
key = entry.getKey();
|
||
15 years ago
|
description = rankingParameters.get(key.substring(prefix.length()));
|
||
13 years ago
|
p = description.indexOf(';',0);
|
||
15 years ago
|
if (p >= 0) {
|
||
|
name = description.substring(0, p);
|
||
|
info = description.substring(p + 1);
|
||
|
} else {
|
||
|
name = description;
|
||
|
info = "";
|
||
|
}
|
||
|
prop.put("attr" + attrExtension + "_" + j + "_name", name);
|
||
|
prop.put("attr" + attrExtension + "_" + j + "_info", info);
|
||
16 years ago
|
prop.put("attr" + attrExtension + "_" + j + "_nameorg", key);
|
||
|
prop.put("attr" + attrExtension + "_" + j + "_select", maxRankingRange);
|
||
|
for (i=0; i<maxRankingRange; i++) {
|
||
|
prop.put("attr" + attrExtension + "_" + j + "_select_" + i + "_nameorg", key);
|
||
|
prop.put("attr" + attrExtension + "_" + j + "_select_" + i + "_value", i);
|
||
|
try {
|
||
|
prop.put("attr" + attrExtension + "_" + j + "_select_" + i + "_checked",
|
||
15 years ago
|
(i == Integer.parseInt(entry.getValue())) ? "1" : "0");
|
||
16 years ago
|
} catch (final NumberFormatException e) {
|
||
|
prop.put("attr" + attrExtension + "_" + j + "_select_" + i + "_checked", "0");
|
||
|
}
|
||
|
}
|
||
|
prop.put("attr" + attrExtension + "_" + j + "_value",
|
||
15 years ago
|
Integer.parseInt(map.get(key)));
|
||
16 years ago
|
j++;
|
||
|
}
|
||
18 years ago
|
}
|
||
13 years ago
|
|
||
13 years ago
|
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||
16 years ago
|
final Switchboard sb = (Switchboard) env;
|
||
18 years ago
|
|
||
17 years ago
|
// clean up all search events
|
||
16 years ago
|
SearchEventCache.cleanupEvents(true);
|
||
13 years ago
|
|
||
18 years ago
|
// case if no values are requested
|
||
13 years ago
|
if (post == null || sb == null) {
|
||
18 years ago
|
// we create empty entries for template strings
|
||
|
final serverObjects prop = defaultValues();
|
||
16 years ago
|
final RankingProfile ranking;
|
||
13 years ago
|
if (sb == null) ranking = new RankingProfile(Classification.ContentDomain.TEXT);
|
||
17 years ago
|
else ranking = sb.getRanking();
|
||
18 years ago
|
putRanking(prop, ranking, "local");
|
||
|
return prop;
|
||
|
}
|
||
13 years ago
|
|
||
18 years ago
|
if (post.containsKey("EnterRanking")) {
|
||
16 years ago
|
final RankingProfile ranking = new RankingProfile("local", post.toString());
|
||
12 years ago
|
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_RWI_PROFILE, crypt.simpleEncode(ranking.toExternalString()));
|
||
18 years ago
|
final serverObjects prop = defaultValues();
|
||
|
//prop.putAll(ranking.toExternalMap("local"));
|
||
|
putRanking(prop, ranking, "local");
|
||
|
return prop;
|
||
|
}
|
||
13 years ago
|
|
||
18 years ago
|
if (post.containsKey("ResetRanking")) {
|
||
12 years ago
|
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_RWI_PROFILE, "");
|
||
13 years ago
|
final RankingProfile ranking = new RankingProfile(Classification.ContentDomain.TEXT);
|
||
18 years ago
|
final serverObjects prop = defaultValues();
|
||
|
//prop.putAll(ranking.toExternalMap("local"));
|
||
|
putRanking(prop, ranking, "local");
|
||
|
return prop;
|
||
|
}
|
||
13 years ago
|
|
||
16 years ago
|
final RankingProfile localRanking = new RankingProfile("local", post.toString());
|
||
18 years ago
|
final serverObjects prop = new serverObjects();
|
||
|
putRanking(prop, localRanking, "local");
|
||
|
prop.putAll(localRanking.toExternalMap("local"));
|
||
|
|
||
|
return prop;
|
||
|
}
|
||
|
|
||
|
}
|