From d2d5be032dc255b24b74e9ccf0a328d375c62e33 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 14 Jan 2013 12:50:21 +0100 Subject: [PATCH] added a 'inlink' search option according to the suggestion in the YaCy forum at http://forum.yacy-websuche.de/viewtopic.php?f=18&t=4572#p27410 The feature was not called 'haslink' but called 'inlink' to have a analogous naming like 'inurl'. This causes now that you can search for words in links of the document, like: * inlink:yacy searches all documents which link to pages which have an 'yacy' in the url. --- htroot/index.html | 2 ++ htroot/yacy/search.java | 4 ++-- htroot/yacysearch.java | 22 ++++++++++++++----- source/net/yacy/search/query/QueryParams.java | 14 ++++++++++-- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/htroot/index.html b/htroot/index.html index fe1e398c2..4d7d26c35 100644 --- a/htroot/index.html +++ b/htroot/index.html @@ -126,6 +126,8 @@
inurl:<phrase>
only urls with the <phrase> in the url
+
inlink:<phrase>
+
only urls with the <phrase> within outbound links of the document
filetype:<ext>
only urls with extension <ext>
site:<host>
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 6d073072b..51c22e3a1 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -228,7 +228,7 @@ public final class search { null, // no snippet computation count, 0, - filter, null, null, null, + filter, null, null, null, null, QueryParams.Searchdom.LOCAL, -1, null, @@ -290,7 +290,7 @@ public final class search { null, // no snippet computation count, 0, - filter, null, null, null, + filter, null, null, null, null, QueryParams.Searchdom.LOCAL, -1, constraint, diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 008c0d607..e5395e6bd 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -349,6 +349,7 @@ public class yacysearch { String protocol = null; String tld = null; String ext = null; + String inlink = null; // check available memory and clean up if necessary if ( !MemoryControl.request(8000000L, false) ) { @@ -421,13 +422,13 @@ public class yacysearch { modifier.append("/language/").append(language).append(' '); } - final int inurl = querystring.indexOf("inurl:", 0); - if ( inurl >= 0 ) { - int ftb = querystring.indexOf(' ', inurl); + final int inurlp = querystring.indexOf("inurl:", 0); + if ( inurlp >= 0 ) { + int ftb = querystring.indexOf(' ', inurlp); if ( ftb == -1 ) { ftb = querystring.length(); } - final String urlstr = querystring.substring(inurl + 6, ftb); + final String urlstr = querystring.substring(inurlp + 6, ftb); querystring = querystring.replace("inurl:" + urlstr, ""); if ( !urlstr.isEmpty() ) { urlmask = urlmask == null ? ".*" + urlstr + ".*" : urlmask + urlstr + ".*"; @@ -435,6 +436,17 @@ public class yacysearch { modifier.append("inurl:").append(urlstr).append(' '); } + final int inlinkp = querystring.indexOf("inlink:", 0); + if ( inlinkp >= 0 ) { + int ftb = querystring.indexOf(' ', inlinkp); + if ( ftb == -1 ) { + ftb = querystring.length(); + } + inlink = querystring.substring(inlinkp + 7, ftb); + querystring = querystring.replace("inlink:" + inlink, ""); + modifier.append("inlink:").append(inlink).append(' '); + } + final int filetype = querystring.indexOf("filetype:", 0); if ( filetype >= 0 ) { int ftb = querystring.indexOf(' ', filetype); @@ -712,7 +724,7 @@ public class yacysearch { snippetFetchStrategy, itemsPerPage, startRecord, - urlmask, protocol, tld, ext, + urlmask, protocol, tld, ext, inlink, clustersearch && global ? QueryParams.Searchdom.CLUSTER : (global && indexReceiveGranted ? QueryParams.Searchdom.GLOBAL : QueryParams.Searchdom.LOCAL), diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 62197ab44..9751eaf7b 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -114,7 +114,7 @@ public final class QueryParams { public Pattern urlMask; public final Pattern prefer; - public final String protocol, tld, ext; + public final String protocol, tld, ext, inlink; boolean urlMask_isCatchall; public final Classification.ContentDomain contentdom; public final String targetlang; @@ -172,6 +172,7 @@ public final class QueryParams { this.protocol = null; this.tld = null; this.ext = null; + this.inlink = null; this.prefer = matchnothing_pattern; this.contentdom = ContentDomain.ALL; this.itemsPerPage = itemsPerPage; @@ -221,7 +222,7 @@ public final class QueryParams { final Collection metatags, final CacheStrategy snippetCacheStrategy, final int itemsPerPage, final int offset, - final String urlMask, final String protocol, final String tld, final String ext, + final String urlMask, final String protocol, final String tld, final String ext, final String inlink, final Searchdom domType, final int domMaxTargets, final Bitfield constraint, final boolean allofconstraint, final String nav_sitehash, @@ -267,6 +268,7 @@ public final class QueryParams { this.protocol = protocol; this.tld = tld; this.ext = ext; + this.inlink = inlink; try { this.prefer = Pattern.compile(prefer); } catch (final PatternSyntaxException ex) { @@ -477,6 +479,10 @@ public final class QueryParams { fq.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(":\"").append(this.ext).append('\"'); } + if (this.inlink != null) { + fq.append(" AND ").append(YaCySchema.outboundlinks_urlstub_txt.getSolrFieldName()).append(":\"").append(this.inlink).append('\"'); + } + if (!this.urlMask_isCatchall) { // add a filter query on urls String urlMaskPattern = this.urlMask.pattern(); @@ -576,6 +582,10 @@ public final class QueryParams { context.append(this.constraint).append(asterisk); context.append(this.maxDistance).append(asterisk); context.append(this.modifier.s).append(asterisk); + context.append(this.protocol).append(asterisk); + context.append(this.tld).append(asterisk); + context.append(this.ext).append(asterisk); + context.append(this.inlink).append(asterisk); context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk); context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name()); String result = context.toString();