enhanced image search

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6489 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 2d8f3ee301
commit 4c6312d103

@ -30,7 +30,7 @@ import java.util.Map;
import java.util.Map.Entry;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.QueryParams;
import de.anomic.search.ContentDomain;
import de.anomic.search.RankingProfile;
import de.anomic.search.SearchEventCache;
import de.anomic.search.Switchboard;
@ -139,7 +139,7 @@ public class Ranking_p {
// we create empty entries for template strings
final serverObjects prop = defaultValues();
final RankingProfile ranking;
if(sb == null) ranking = new RankingProfile(QueryParams.CONTENTDOM_TEXT);
if(sb == null) ranking = new RankingProfile(ContentDomain.TEXT);
else ranking = sb.getRanking();
putRanking(prop, ranking, "local");
return prop;
@ -156,7 +156,7 @@ public class Ranking_p {
if (post.containsKey("ResetRanking")) {
sb.setConfig("rankingProfile", "");
final RankingProfile ranking = new RankingProfile(QueryParams.CONTENTDOM_TEXT);
final RankingProfile ranking = new RankingProfile(ContentDomain.TEXT);
final serverObjects prop = defaultValues();
//prop.putAll(ranking.toExternalMap("local"));
putRanking(prop, ranking, "local");

@ -203,8 +203,8 @@ tt, *.tt {
.thumbcontainer {
margin: 2px;
width: 100px;
height: 160px; /* 96px thumbnail + some lines of text */
width: 96px;
height: 96px; /* 96px thumbnail + some lines of text */
float: left;
}

@ -30,7 +30,7 @@
import de.anomic.http.server.RequestHeader;
import de.anomic.search.QueryParams;
import de.anomic.search.ContentDomain;
import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants;
import de.anomic.server.serverObjects;
@ -79,13 +79,13 @@ public class index {
//global = global && indexDistributeGranted && indexReceiveGranted;
// search domain
int contentdom = QueryParams.CONTENTDOM_TEXT;
ContentDomain contentdom = ContentDomain.TEXT;
final String cds = (post == null) ? "text" : post.get("contentdom", "text");
if (cds.equals("text")) contentdom = QueryParams.CONTENTDOM_TEXT;
if (cds.equals("audio")) contentdom = QueryParams.CONTENTDOM_AUDIO;
if (cds.equals("video")) contentdom = QueryParams.CONTENTDOM_VIDEO;
if (cds.equals("image")) contentdom = QueryParams.CONTENTDOM_IMAGE;
if (cds.equals("app")) contentdom = QueryParams.CONTENTDOM_APP;
if (cds.equals("text")) contentdom = ContentDomain.TEXT;
if (cds.equals("audio")) contentdom = ContentDomain.AUDIO;
if (cds.equals("video")) contentdom = ContentDomain.VIDEO;
if (cds.equals("image")) contentdom = ContentDomain.IMAGE;
if (cds.equals("app")) contentdom = ContentDomain.APP;
// we create empty entries for template strings
String promoteSearchPageGreeting = env.getConfig(SwitchboardConstants.GREETING, "");
@ -120,11 +120,11 @@ public class index {
prop.put("display", display);
prop.putHTML("constraint", constraint);
prop.put("searchoptions_display", display);
prop.put("contentdomCheckText", (contentdom == QueryParams.CONTENTDOM_TEXT) ? "1" : "0");
prop.put("contentdomCheckAudio", (contentdom == QueryParams.CONTENTDOM_AUDIO) ? "1" : "0");
prop.put("contentdomCheckVideo", (contentdom == QueryParams.CONTENTDOM_VIDEO) ? "1" : "0");
prop.put("contentdomCheckImage", (contentdom == QueryParams.CONTENTDOM_IMAGE) ? "1" : "0");
prop.put("contentdomCheckApp", (contentdom == QueryParams.CONTENTDOM_APP) ? "1" : "0");
prop.put("contentdomCheckText", (contentdom == ContentDomain.TEXT) ? "1" : "0");
prop.put("contentdomCheckAudio", (contentdom == ContentDomain.AUDIO) ? "1" : "0");
prop.put("contentdomCheckVideo", (contentdom == ContentDomain.VIDEO) ? "1" : "0");
prop.put("contentdomCheckImage", (contentdom == ContentDomain.IMAGE) ? "1" : "0");
prop.put("contentdomCheckApp", (contentdom == ContentDomain.APP) ? "1" : "0");
// online caution timing
sb.localSearchLastAccess = System.currentTimeMillis();

@ -48,6 +48,7 @@ import net.yacy.kelondro.util.ISO639;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.net.natLib;
import de.anomic.search.ContentDomain;
import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile;
import de.anomic.search.SearchEvent;
@ -177,7 +178,7 @@ public final class search {
final long timestamp = System.currentTimeMillis();
// prepare a search profile
final RankingProfile rankingProfile = (profile.length() == 0) ? new RankingProfile(QueryParams.contentdomParser(contentdom)) : new RankingProfile("", profile);
final RankingProfile rankingProfile = (profile.length() == 0) ? new RankingProfile(ContentDomain.contentdomParser(contentdom)) : new RankingProfile("", profile);
// prepare an abstract result
final StringBuilder indexabstract = new StringBuilder(6000);
@ -197,7 +198,7 @@ public final class search {
rankingProfile,
maxdist,
prefer,
QueryParams.contentdomParser(contentdom),
ContentDomain.contentdomParser(contentdom),
language,
"", // no navigation
false,
@ -249,8 +250,7 @@ public final class search {
rankingProfile,
maxdist,
prefer,
QueryParams.
contentdomParser(contentdom),
ContentDomain.contentdomParser(contentdom),
language,
"", // no navigation
false,

@ -55,6 +55,7 @@ import de.anomic.data.DidYouMean;
import de.anomic.data.LibraryProvider;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.ContentDomain;
import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile;
import de.anomic.search.SearchEvent;
@ -201,10 +202,10 @@ public class yacysearch {
if (clustersearch) global = true; // switches search on, but search target is limited to cluster nodes
// find search domain
final int contentdomCode = QueryParams.contentdomParser((post == null ? "text" : post.get("contentdom", "text")));
final ContentDomain contentdomCode = ContentDomain.contentdomParser((post == null ? "text" : post.get("contentdom", "text")));
// patch until better search profiles are available
if ((contentdomCode != QueryParams.CONTENTDOM_TEXT) && (itemsPerPage <= 32)) itemsPerPage = 32;
if ((contentdomCode != ContentDomain.TEXT) && (itemsPerPage <= 32)) itemsPerPage = 64;
// check the search tracker
TreeSet<Long> trackerHandles = sb.localSearchTracker.get(client);
@ -619,7 +620,7 @@ public class yacysearch {
prop.put("results_" + i + "_display", display);
}
prop.put("results", theQuery.displayResults());
prop.put("resultTable", (contentdomCode <= 1) ? "0" : "1");
prop.put("resultTable", (contentdomCode == ContentDomain.APP || contentdomCode == ContentDomain.AUDIO || contentdomCode == ContentDomain.VIDEO) ? 1 : 0);
prop.put("eventID", theQuery.id(false)); // for bottomline
// process result of search
@ -663,11 +664,11 @@ public class yacysearch {
prop.put("constraint", (constraint == null) ? "" : constraint.exportB64());
prop.put("verify", (fetchSnippets) ? "true" : "false");
prop.put("contentdom", (post == null ? "text" : post.get("contentdom", "text")));
prop.put("contentdomCheckText", (contentdomCode == QueryParams.CONTENTDOM_TEXT) ? "1" : "0");
prop.put("contentdomCheckAudio", (contentdomCode == QueryParams.CONTENTDOM_AUDIO) ? "1" : "0");
prop.put("contentdomCheckVideo", (contentdomCode == QueryParams.CONTENTDOM_VIDEO) ? "1" : "0");
prop.put("contentdomCheckImage", (contentdomCode == QueryParams.CONTENTDOM_IMAGE) ? "1" : "0");
prop.put("contentdomCheckApp", (contentdomCode == QueryParams.CONTENTDOM_APP) ? "1" : "0");
prop.put("contentdomCheckText", (contentdomCode == ContentDomain.TEXT) ? "1" : "0");
prop.put("contentdomCheckAudio", (contentdomCode == ContentDomain.AUDIO) ? "1" : "0");
prop.put("contentdomCheckVideo", (contentdomCode == ContentDomain.VIDEO) ? "1" : "0");
prop.put("contentdomCheckImage", (contentdomCode == ContentDomain.IMAGE) ? "1" : "0");
prop.put("contentdomCheckApp", (contentdomCode == ContentDomain.APP) ? "1" : "0");
// for RSS: don't HTML encode some elements
prop.putXML("rss_query", originalquerystring);

@ -26,7 +26,9 @@
<img src="/ViewImage.png?maxwidth=96&amp;maxheight=96&amp;code=#[code]#" alt="#[name]#" />
</a>
<div class="highslide-caption"><a href="#[href]#">#[name]#</a><br \><a href="#[source]#">#[sourcedom]#</a></div>
<!--
<div class="TableCellDark"><a href="#[href]#">#[name]#</a><br \>#[attr]#</div>
-->
</div>
#{/items}#
::

@ -36,6 +36,7 @@ import net.yacy.kelondro.util.MemoryTracker;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.ContentDomain;
import de.anomic.search.MediaSnippet;
import de.anomic.search.QueryParams;
import de.anomic.search.SearchEvent;
@ -94,7 +95,7 @@ public class yacysearchitem {
prop.put("remoteIndexCount", Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.getRankingResult().getRemotePeerCount(), true));
if (theQuery.contentdom == QueryParams.CONTENTDOM_TEXT) {
if (theQuery.contentdom == ContentDomain.TEXT) {
// text search
// generate result object
@ -156,10 +157,10 @@ public class yacysearchitem {
return prop;
}
if (theQuery.contentdom == QueryParams.CONTENTDOM_IMAGE) {
if (theQuery.contentdom == ContentDomain.IMAGE) {
// image search; shows thumbnails
prop.put("content", theQuery.contentdom + 1); // switch on specific content
prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
final MediaSnippet ms = theSearch.result().oneImage(item);
if (ms == null) {
prop.put("content_items", "0");
@ -176,16 +177,16 @@ public class yacysearchitem {
return prop;
}
if ((theQuery.contentdom == QueryParams.CONTENTDOM_AUDIO) ||
(theQuery.contentdom == QueryParams.CONTENTDOM_VIDEO) ||
(theQuery.contentdom == QueryParams.CONTENTDOM_APP)) {
if ((theQuery.contentdom == ContentDomain.AUDIO) ||
(theQuery.contentdom == ContentDomain.VIDEO) ||
(theQuery.contentdom == ContentDomain.APP)) {
// any other media content
// generate result object
final ResultEntry result = theSearch.oneResult(item);
if (result == null) return prop; // no content
prop.put("content", theQuery.contentdom + 1); // switch on specific content
prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
final ArrayList<MediaSnippet> media = result.mediaSnippets();
if (item == 0) col = true;
if (media != null) {

@ -0,0 +1,39 @@
package de.anomic.search;
public enum ContentDomain {
ALL(-1),
TEXT(0),
IMAGE(1),
AUDIO(2),
VIDEO(3),
APP(4);
private int code;
ContentDomain(int code) {
this.code = code;
}
public int getCode() {
return this.code;
}
public static ContentDomain contentdomParser(final String dom) {
if (dom.equals("text")) return TEXT;
else if (dom.equals("image")) return IMAGE;
else if (dom.equals("audio")) return AUDIO;
else if (dom.equals("video")) return VIDEO;
else if (dom.equals("app")) return APP;
return TEXT;
}
public String toString() {
if (this == TEXT) return "text";
else if (this == IMAGE) return "image";
else if (this == AUDIO) return "audio";
else if (this == VIDEO) return "video";
else if (this == APP) return "app";
return "text";
}
}

@ -50,7 +50,7 @@ import net.yacy.kelondro.logging.Log;
*/
public class DocumentIndex extends Segment {
private static final RankingProfile textRankingDefault = new RankingProfile(QueryParams.CONTENTDOM_TEXT);
private static final RankingProfile textRankingDefault = new RankingProfile(ContentDomain.TEXT);
//private Bitfield zeroConstraint = new Bitfield(4);
private final static File poison = new File(".");

@ -37,12 +37,12 @@ import net.yacy.repository.LoaderDispatcher;
public class MediaSnippet {
public int type;
public ContentDomain type;
public DigestURI href, source;
public String name, attr;
public int ranking;
public MediaSnippet(final int type, final DigestURI href, final String name, final String attr, final int ranking, final DigestURI source) {
public MediaSnippet(final ContentDomain type, final DigestURI href, final String name, final String attr, final int ranking, final DigestURI source) {
this.type = type;
this.href = href;
this.source = source; // the web page where the media resource appeared
@ -58,7 +58,7 @@ public class MediaSnippet {
return href.hashCode();
}
public static ArrayList<MediaSnippet> retrieveMediaSnippets(final DigestURI url, final TreeSet<byte[]> queryhashes, final int mediatype, final boolean fetchOnline, final int timeout, final boolean reindexing) {
public static ArrayList<MediaSnippet> retrieveMediaSnippets(final DigestURI url, final TreeSet<byte[]> queryhashes, final ContentDomain mediatype, final boolean fetchOnline, final int timeout, final boolean reindexing) {
if (queryhashes.size() == 0) {
Log.logFine("snippet fetch", "no query hashes given for url " + url);
return new ArrayList<MediaSnippet>();
@ -67,21 +67,21 @@ public class MediaSnippet {
final Document document = LoaderDispatcher.retrieveDocument(url, fetchOnline, timeout, false, reindexing);
final ArrayList<MediaSnippet> a = new ArrayList<MediaSnippet>();
if (document != null) {
if ((mediatype == QueryParams.CONTENTDOM_ALL) || (mediatype == QueryParams.CONTENTDOM_AUDIO)) a.addAll(computeMediaSnippets(document, queryhashes, QueryParams.CONTENTDOM_AUDIO));
if ((mediatype == QueryParams.CONTENTDOM_ALL) || (mediatype == QueryParams.CONTENTDOM_VIDEO)) a.addAll(computeMediaSnippets(document, queryhashes, QueryParams.CONTENTDOM_VIDEO));
if ((mediatype == QueryParams.CONTENTDOM_ALL) || (mediatype == QueryParams.CONTENTDOM_APP)) a.addAll(computeMediaSnippets(document, queryhashes, QueryParams.CONTENTDOM_APP));
if ((mediatype == QueryParams.CONTENTDOM_ALL) || (mediatype == QueryParams.CONTENTDOM_IMAGE)) a.addAll(computeImageSnippets(document, queryhashes));
if ((mediatype == ContentDomain.ALL) || (mediatype == ContentDomain.AUDIO)) a.addAll(computeMediaSnippets(document, queryhashes, ContentDomain.AUDIO));
if ((mediatype == ContentDomain.ALL) || (mediatype == ContentDomain.VIDEO)) a.addAll(computeMediaSnippets(document, queryhashes, ContentDomain.VIDEO));
if ((mediatype == ContentDomain.ALL) || (mediatype == ContentDomain.APP)) a.addAll(computeMediaSnippets(document, queryhashes, ContentDomain.APP));
if ((mediatype == ContentDomain.ALL) || (mediatype == ContentDomain.IMAGE)) a.addAll(computeImageSnippets(document, queryhashes));
}
return a;
}
public static ArrayList<MediaSnippet> computeMediaSnippets(final Document document, final TreeSet<byte[]> queryhashes, final int mediatype) {
public static ArrayList<MediaSnippet> computeMediaSnippets(final Document document, final TreeSet<byte[]> queryhashes, final ContentDomain mediatype) {
if (document == null) return new ArrayList<MediaSnippet>();
Map<DigestURI, String> media = null;
if (mediatype == QueryParams.CONTENTDOM_AUDIO) media = document.getAudiolinks();
else if (mediatype == QueryParams.CONTENTDOM_VIDEO) media = document.getVideolinks();
else if (mediatype == QueryParams.CONTENTDOM_APP) media = document.getApplinks();
if (mediatype == ContentDomain.AUDIO) media = document.getAudiolinks();
else if (mediatype == ContentDomain.VIDEO) media = document.getVideolinks();
else if (mediatype == ContentDomain.APP) media = document.getApplinks();
if (media == null) return null;
final Iterator<Map.Entry<DigestURI, String>> i = media.entrySet().iterator();
@ -124,18 +124,15 @@ public class MediaSnippet {
ientry = i.next();
url = ientry.url();
desc = ientry.alt();
int appcount = 0;
s = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes);
if (s.size() == 0) {
final int ranking = ientry.hashCode();
result.add(new MediaSnippet(QueryParams.CONTENTDOM_IMAGE, url, desc, ientry.width() + " x " + ientry.height(), ranking, document.dc_source()));
continue;
}
appcount += queryhashes.size() - s.size();
// if the resulting set is empty, then _all_ words from the query appeared in the url
s = TextSnippet.removeAppearanceHashes(desc, s);
if (s.size() == 0) {
final int ranking = ientry.hashCode();
result.add(new MediaSnippet(QueryParams.CONTENTDOM_IMAGE, url, desc, ientry.width() + " x " + ientry.height(), ranking, document.dc_source()));
continue;
}
appcount += queryhashes.size() - s.size();
// if the resulting set is empty, then _all_ search words appeared in the description
final int ranking = (ientry.hashCode() / queryhashes.size() / 2) * appcount;
result.add(new MediaSnippet(ContentDomain.IMAGE, url, desc, ientry.width() + " x " + ientry.height(), ranking, document.dc_source()));
}
return result;
}

@ -50,13 +50,6 @@ public final class QueryParams {
public static final int SEARCHDOM_GLOBALDHT = 3;
public static final int SEARCHDOM_GLOBALALL = 4;
public static final int CONTENTDOM_ALL = -1;
public static final int CONTENTDOM_TEXT = 0;
public static final int CONTENTDOM_IMAGE = 1;
public static final int CONTENTDOM_AUDIO = 2;
public static final int CONTENTDOM_VIDEO = 3;
public static final int CONTENTDOM_APP = 4;
public static enum FetchMode {
NO_FETCH_NO_VERIFY,
FETCH_BUT_ACCEPT_OFFLINE_OR_USE_CACHE,
@ -70,7 +63,7 @@ public final class QueryParams {
public TreeSet<byte[]> fullqueryHashes, queryHashes, excludeHashes;
public int itemsPerPage, offset;
public String prefer;
public int contentdom;
public ContentDomain contentdom;
public String urlMask;
public String targetlang;
public String navigators;
@ -113,7 +106,7 @@ public final class QueryParams {
this.tenant = null;
this.maxDistance = Integer.MAX_VALUE;
this.prefer = "";
this.contentdom = CONTENTDOM_ALL;
this.contentdom = ContentDomain.ALL;
this.itemsPerPage = itemsPerPage;
this.offset = 0;
this.urlMask = ".*";
@ -139,7 +132,7 @@ public final class QueryParams {
final TreeSet<byte[]> fullqueryHashes,
final String tenant,
final RankingProfile ranking,
final int maxDistance, final String prefer, final int contentdom,
final int maxDistance, final String prefer, final ContentDomain contentdom,
final String language,
final String navigators,
final boolean onlineSnippetFetch,
@ -194,22 +187,8 @@ public final class QueryParams {
this.offset = newOffset;
}
public static int contentdomParser(final String dom) {
if (dom.equals("text")) return CONTENTDOM_TEXT;
else if (dom.equals("image")) return CONTENTDOM_IMAGE;
else if (dom.equals("audio")) return CONTENTDOM_AUDIO;
else if (dom.equals("video")) return CONTENTDOM_VIDEO;
else if (dom.equals("app")) return CONTENTDOM_APP;
return CONTENTDOM_TEXT;
}
public String contentdom() {
if (this.contentdom == CONTENTDOM_TEXT) return "text";
else if (this.contentdom == CONTENTDOM_IMAGE) return "image";
else if (this.contentdom == CONTENTDOM_AUDIO) return "audio";
else if (this.contentdom == CONTENTDOM_VIDEO) return "video";
else if (this.contentdom == CONTENTDOM_APP) return "app";
return "text";
return this.contentdom.toString();
}
public boolean isGlobal() {

@ -199,11 +199,11 @@ public final class RankingProcess extends Thread {
if (!testFlags(iEntry)) continue;
// check document domain
if (query.contentdom != QueryParams.CONTENTDOM_TEXT) {
if ((query.contentdom == QueryParams.CONTENTDOM_AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) continue;
if ((query.contentdom == QueryParams.CONTENTDOM_VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) continue;
if ((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) continue;
if ((query.contentdom == QueryParams.CONTENTDOM_APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) continue;
if (query.contentdom != ContentDomain.TEXT) {
if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) continue;
if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) continue;
if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) continue;
if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) continue;
}
// check tld domain
@ -417,10 +417,10 @@ public final class RankingProcess extends Thread {
}
// check content domain
if ((query.contentdom == QueryParams.CONTENTDOM_AUDIO && page.laudio() == 0) ||
(query.contentdom == QueryParams.CONTENTDOM_VIDEO && page.lvideo() == 0) ||
(query.contentdom == QueryParams.CONTENTDOM_IMAGE && page.limage() == 0) ||
(query.contentdom == QueryParams.CONTENTDOM_APP && page.lapp() == 0)) {
if ((query.contentdom == ContentDomain.AUDIO && page.laudio() == 0) ||
(query.contentdom == ContentDomain.VIDEO && page.lvideo() == 0) ||
(query.contentdom == ContentDomain.IMAGE && page.limage() == 0) ||
(query.contentdom == ContentDomain.APP && page.lapp() == 0)) {
continue;
}

@ -81,7 +81,7 @@ public class RankingProfile {
coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer,
coeff_termfrequency, coeff_language;
public RankingProfile(final int mediatype) {
public RankingProfile(final ContentDomain mediatype) {
// set default-values
coeff_domlength = 11;
coeff_ybr = 9;
@ -105,11 +105,11 @@ public class RankingProfile {
coeff_app_dc_subject = 13;
coeff_app_dc_description = 13;
coeff_appemph = 10;
coeff_catindexof = (mediatype == QueryParams.CONTENTDOM_TEXT) ? 0 : 15;
coeff_cathasimage = (mediatype == QueryParams.CONTENTDOM_IMAGE) ? 15 : 0;
coeff_cathasaudio = (mediatype == QueryParams.CONTENTDOM_AUDIO) ? 15 : 0;
coeff_cathasvideo = (mediatype == QueryParams.CONTENTDOM_VIDEO) ? 15 : 0;
coeff_cathasapp = (mediatype == QueryParams.CONTENTDOM_APP) ? 15 : 0;
coeff_catindexof = (mediatype == ContentDomain.TEXT) ? 0 : 15;
coeff_cathasimage = (mediatype == ContentDomain.IMAGE) ? 15 : 0;
coeff_cathasaudio = (mediatype == ContentDomain.AUDIO) ? 15 : 0;
coeff_cathasvideo = (mediatype == ContentDomain.VIDEO) ? 15 : 0;
coeff_cathasapp = (mediatype == ContentDomain.APP) ? 15 : 0;
coeff_termfrequency = 14;
coeff_urlcompintoplist = 3;
coeff_descrcompintoplist = 2;
@ -118,7 +118,7 @@ public class RankingProfile {
}
public RankingProfile(final String prefix, final String profile) {
this(QueryParams.CONTENTDOM_TEXT); // set defaults
this(ContentDomain.TEXT); // set defaults
if ((profile != null) && (profile.length() > 0)) {
//parse external form
final HashMap<String, Integer> coeff = new HashMap<String, Integer>();

@ -50,8 +50,6 @@ import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.graphics.ProfilingGraph;
public class ResultFetcher {
protected final static int workerThreadCount = 10;
// input values
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
@ -99,14 +97,14 @@ public class ResultFetcher {
// start worker threads to fetch urls and snippets
this.workerThreads = null;
deployWorker(10);
deployWorker(query.itemsPerPage, query.neededResults());
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), this.workerThreads.length + " online snippet fetch threads started", 0, 0), false);
}
public void deployWorker(int neededResults) {
public void deployWorker(int deployCount, int neededResults) {
if (anyWorkerAlive()) return;
this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1];
this.workerThreads = new Worker[(query.onlineSnippetFetch) ? deployCount : 1];
for (int i = 0; i < workerThreads.length; i++) {
this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0, neededResults);
this.workerThreads[i].start();
@ -152,16 +150,16 @@ public class ResultFetcher {
// start fetching urls and snippets
URIMetadataRow page;
final int fetchAhead = snippetMode == 0 ? 0 : 10;
//final int fetchAhead = snippetMode == 0 ? 0 : 10;
boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0;
try {
while (System.currentTimeMillis() < this.timeout) {
if (result.size() >= neededResults) break;
if (result.size() > neededResults) break;
this.lastLifeSign = System.currentTimeMillis();
// check if we have enough
if ((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (images.size() >= query.neededResults() + fetchAhead)) break;
if ((query.contentdom != QueryParams.CONTENTDOM_IMAGE) && (result.size() >= query.neededResults() + fetchAhead)) break;
//if ((query.contentdom == ContentDomain.IMAGE) && (images.size() >= query.neededResults() + fetchAhead)) break;
//if ((query.contentdom != ContentDomain.IMAGE) && (result.size() >= query.neededResults() + fetchAhead)) break;
// get next entry
page = rankedCache.takeURL(true, taketimeout);
@ -217,7 +215,7 @@ public class ResultFetcher {
}
// load snippet
if (query.contentdom == QueryParams.CONTENTDOM_TEXT) {
if (query.contentdom == ContentDomain.TEXT) {
// attach text snippet
startTime = System.currentTimeMillis();
final TextSnippet snippet = TextSnippet.retrieveTextSnippet(metadata, snippetFetchWordHashes, (snippetMode == 2), ((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))), 180, (snippetMode == 2) ? Integer.MAX_VALUE : 30000, query.isGlobal());
@ -300,13 +298,13 @@ public class ResultFetcher {
System.out.println("query.neededResults() = " + query.neededResults());
*/
if ((!anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (images.size() + 30 < query.neededResults())) ||
(((query.contentdom == ContentDomain.IMAGE) && (images.size() + 30 < query.neededResults())) ||
(this.result.size() < query.neededResults())) &&
//(event.query.onlineSnippetFetch) &&
(this.rankedCache.size() > this.result.size())
) {
// start worker threads to fetch urls and snippets
deployWorker(query.neededResults());
deployWorker(query.itemsPerPage, query.neededResults());
}
// finally wait until enough results are there produced from the
@ -373,10 +371,10 @@ public class ResultFetcher {
long r = 0;
// for media search: prefer pages with many links
if (query.contentdom == QueryParams.CONTENTDOM_IMAGE) r += rentry.limage() << query.ranking.coeff_cathasimage;
if (query.contentdom == QueryParams.CONTENTDOM_AUDIO) r += rentry.laudio() << query.ranking.coeff_cathasaudio;
if (query.contentdom == QueryParams.CONTENTDOM_VIDEO) r += rentry.lvideo() << query.ranking.coeff_cathasvideo;
if (query.contentdom == QueryParams.CONTENTDOM_APP ) r += rentry.lapp() << query.ranking.coeff_cathasapp;
if (query.contentdom == ContentDomain.IMAGE) r += rentry.limage() << query.ranking.coeff_cathasimage;
if (query.contentdom == ContentDomain.AUDIO) r += rentry.laudio() << query.ranking.coeff_cathasaudio;
if (query.contentdom == ContentDomain.VIDEO) r += rentry.lvideo() << query.ranking.coeff_cathasvideo;
if (query.contentdom == ContentDomain.APP ) r += rentry.lapp() << query.ranking.coeff_cathasapp;
// prefer hit with 'prefer' pattern
if (rentry.url().toNormalform(true, true).matches(query.prefer)) r += 256 << query.ranking.coeff_prefer;

@ -1039,7 +1039,7 @@ public final class Switchboard extends serverSwitch {
public RankingProfile getRanking() {
return (getConfig("rankingProfile", "").length() == 0) ?
new RankingProfile(QueryParams.CONTENTDOM_TEXT) :
new RankingProfile(ContentDomain.TEXT) :
new RankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
}

@ -114,6 +114,12 @@ public class TextSnippet {
return snippetsCache.get(key);
}
/**
* removed all word hashes that can be computed as tokens from a given sentence from a given hash set
* @param sentence
* @param queryhashes
* @return the given hash set minus the hashes from the tokenization of the given sentence
*/
public static TreeSet<byte[]> removeAppearanceHashes(final String sentence, final TreeSet<byte[]> queryhashes) {
// remove all hashes that appear in the sentence
if (sentence == null) return queryhashes;

@ -469,8 +469,12 @@ public final class Condenser {
return true;
}
/**
* tokenize the given sentence and generate a word-wordPos mapping
* @param sentence the sentence to be tokenized
* @return a ordered map containing word hashes as key and positions as value. The map is orderd by the hash ordering
*/
public static TreeMap<byte[], Integer> hashSentence(final String sentence) {
// generates a word-wordPos mapping
final TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
final Enumeration<StringBuilder> words = wordTokenizer(sentence, "UTF-8");
int pos = 0;

@ -38,8 +38,8 @@ public class SortStack<E> {
// objects pushed on the stack must implement the hashCode() method to provide a handle
// for a double-check.
protected TreeMap<Long, E> onstack; // object within the stack
protected HashSet<Integer> instack; // keeps track which element has been on the stack or is now in the offstack
private TreeMap<Long, E> onstack; // object within the stack
private HashSet<Integer> instack; // keeps track which element has been on the stack or is now in the offstack
protected int maxsize;
public SortStack(final int maxsize) {

@ -29,12 +29,13 @@ package net.yacy.kelondro.util;
import java.util.ArrayList;
import java.util.Iterator;
/**
* extends the sortStack in such a way that it adds a list where objects, that had
* been pulled from the stack with pop are listed. Provides access methods to address
* specific elements in the list.
* @param <E>
*/
public class SortStore<E> extends SortStack<E> {
// extends the sortStack in such a way that it adds a list where objects, that had
// been pulled from the stack with pop are listed. Provides access methods to address
// specific elements in the list.
private final ArrayList<stackElement> offstack; // objects that had been on the stack but had been removed
@ -44,7 +45,7 @@ public class SortStore<E> extends SortStack<E> {
}
public int size() {
return super.onstack.size() + this.offstack.size();
return super.size() + this.offstack.size();
}
public int sizeStore() {
@ -54,59 +55,54 @@ public class SortStore<E> extends SortStack<E> {
public synchronized void push(final E element, final Long weight) {
super.push(element, weight);
if (this.maxsize <= 0) return;
while ((this.onstack.size() > 0) && (super.onstack.size() + this.offstack.size() > this.maxsize)) {
this.onstack.remove(this.onstack.lastKey());
while ((super.size() > 0) && (super.size() + this.offstack.size() > this.maxsize)) {
super.pop();
}
}
/**
* return the element that is currently on top of the stack
* it is removed and added to the offstack list
* this is exactly the same as element(offstack.size())
*/
public synchronized stackElement pop() {
// returns the element that is currently on top of the stack
// it is removed and added to the offstack list
// this is exactly the same as element(offstack.size())
final stackElement se = super.pop();
if (se == null) return null;
this.offstack.add(se);
return se;
}
/**
* return an element from a specific position. It is either taken from the offstack,
* or removed from the onstack.
* The offstack will grow if elements are not from the offstack and present at the onstack.
* @param position
* @return
*/
public synchronized stackElement element(final int position) {
// returns an element from a specific position. It is either taken from the offstack,
// or removed from the onstack.
// The offstack will grow if elements are not from the offstack and present at the onstack.
if (position < this.offstack.size()) {
return this.offstack.get(position);
}
if (position >= size()) return null; // we don't have that element
while (position >= this.offstack.size()) {
final Long w = this.onstack.firstKey();
final E element = this.onstack.remove(w);
final stackElement se = new stackElement(element, w);
this.offstack.add(se);
}
while (position >= this.offstack.size()) this.offstack.add(super.pop());
return this.offstack.get(position);
}
/**
* return the specific amount of entries. If they are not yet present in the offstack, they are shifted there from the onstack
* if count is < 0 then all elements are taken
* the returned list is not cloned from the internal list and shall not be modified in any way (read-only)
* @param count
* @return
*/
public ArrayList<stackElement> list(final int count) {
// returns the specific amount of entries. If they are not yet present in the offstack, they are shifted there from the onstack
// if count is < 0 then all elements are taken
// the returned list is not cloned from the internal list and shall not be modified in any way (read-only)
if (count < 0) {
// shift all elements
while (this.onstack.size() > 0) {
final Long w = this.onstack.firstKey();
final E element = this.onstack.remove(w);
final stackElement se = new stackElement(element, w);
this.offstack.add(se);
}
while (super.size() > 0) this.offstack.add(super.pop());
return this.offstack;
}
if (size() < count) throw new RuntimeException("list(" + count + ") exceeded avaiable number of elements (" + size() + ")");
while (this.offstack.size() < count) {
final Long w = this.onstack.firstKey();
final E element = this.onstack.remove(w);
final stackElement se = new stackElement(element, w);
this.offstack.add(se);
}
while (this.offstack.size() < count) this.offstack.add(super.pop());
return this.offstack;
}

Loading…
Cancel
Save