From 156cf0270336db824066edad030c905e0e3c4a35 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 31 Mar 2011 09:41:30 +0000 Subject: [PATCH] - added an index constraint 'has location' to the condenser - added evaluation of the 'has location' constraint to search using the /location operator git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7633 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Crawler_p.java | 2 +- htroot/yacysearch.java | 7 ++++++- source/de/anomic/search/RankingProcess.java | 9 ++++++++- source/net/yacy/cora/protocol/http/HTTPClient.java | 3 ++- source/net/yacy/document/Condenser.java | 3 ++- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 70870670d..ac625d08a 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -136,7 +136,7 @@ public class Crawler_p { // normalize URL DigestURI crawlingStartURL = null; - try {crawlingStartURL = new DigestURI(crawlingStart);} catch (final MalformedURLException e1) {} + try {crawlingStartURL = new DigestURI(crawlingStart);} catch (final MalformedURLException e1) {Log.logException(e1);} crawlingStart = (crawlingStartURL == null) ? null : crawlingStartURL.toNormalform(true, true); // set new properties diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 0780bc1f2..3e951cd5c 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -289,9 +289,14 @@ public class yacysearch { ranking.coeff_worddistance = RankingProfile.COEFF_MAX; } if (querystring.indexOf("/date") >= 0) { - querystring = querystring.replace("/date", ""); + querystring = querystring.replace("/date", ""); ranking.coeff_date = RankingProfile.COEFF_MAX; } + if (querystring.indexOf("/location") >= 0) { + querystring = querystring.replace("/location", ""); + if (constraint == null) constraint = new Bitfield(4); + constraint.set(Condenser.flag_cat_haslocation, true); + } int lrp = querystring.indexOf("/language/"); String lr = ""; if (lrp >= 0) { diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 90ac93475..e43fbd025 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -441,7 +441,7 @@ public final class RankingProcess extends Thread { (QueryParams.anymatch(pageauthor.toLowerCase(), query.excludeHashes))) { continue; } - + // check index-of constraint if ((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof)) && @@ -452,6 +452,13 @@ public final class RankingProcess extends Thread { } continue; } + + // check location constraint + if ((query.constraint != null) && + (query.constraint.get(Condenser.flag_cat_haslocation)) && + (metadata.lat() == 0.0f || metadata.lon() == 0.0f)) { + continue; + } // check content domain if ((query.contentdom == ContentDomain.AUDIO && page.laudio() == 0) || diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java index 7fab68642..978747379 100644 --- a/source/net/yacy/cora/protocol/http/HTTPClient.java +++ b/source/net/yacy/cora/protocol/http/HTTPClient.java @@ -266,7 +266,8 @@ public class HTTPClient { public byte[] GETbytes(final String uri, long maxBytes) throws IOException { final MultiProtocolURI url = new MultiProtocolURI(uri); boolean localhost = url.getHost().equals("localhost"); - final HttpGet httpGet = new HttpGet(url.toNormalform(true, false, !localhost, false)); + String urix = url.toNormalform(true, false, !localhost, false); + final HttpGet httpGet = new HttpGet(urix); if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service return getContentBytes(httpGet, maxBytes); } diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java index 620bf762d..0bb17dc1f 100644 --- a/source/net/yacy/document/Condenser.java +++ b/source/net/yacy/document/Condenser.java @@ -76,7 +76,7 @@ public final class Condenser { public static final int flag_cat_linux = 16; // pages about linux software public static final int flag_cat_macos = 17; // pages about macintosh, apple computers and the mac os public static final int flag_cat_windows = 18; // pages about windows os and software - public static final int flag_cat_osreserve = 19; // reserve + public static final int flag_cat_haslocation = 19; // the page has a location metadata attached public static final int flag_cat_hasimage = 20; // the page refers to (at least one) images public static final int flag_cat_hasaudio = 21; // the page refers to (at least one) audio file public static final int flag_cat_hasvideo = 22; // the page refers to (at least one) videos @@ -116,6 +116,7 @@ public final class Condenser { if (!document.getAudiolinks().isEmpty()) RESULT_FLAGS.set(flag_cat_hasaudio, true); if (!document.getVideolinks().isEmpty()) RESULT_FLAGS.set(flag_cat_hasvideo, true); if (!document.getApplinks().isEmpty()) RESULT_FLAGS.set(flag_cat_hasapp, true); + if (document.lat() != 0.0f && document.lon() != 0.0f) RESULT_FLAGS.set(flag_cat_haslocation, true); this.languageIdentificator = new Identificator();