added a 'inlink' search option according to the suggestion in the YaCy

forum at 
http://forum.yacy-websuche.de/viewtopic.php?f=18&t=4572#p27410

The feature was not called 'haslink' but called 'inlink' to have a
analogous naming like 'inurl'. This causes now that you can search for
words in links of the document, like:
* inlink:yacy
searches all documents which link to pages which have an 'yacy' in the
url.
pull/1/head
Michael Peter Christen 12 years ago
parent 76e1e91b11
commit d2d5be032d

@ -126,6 +126,8 @@
<dl style="width:500px">
<dt>inurl:&lt;phrase&gt;</dt>
<dd>only urls with the &lt;phrase&gt; in the url</dd>
<dt>inlink:&lt;phrase&gt;</dt>
<dd>only urls with the &lt;phrase&gt; within outbound links of the document</dd>
<dt>filetype:&lt;ext&gt;</dt>
<dd>only urls with extension &lt;ext&gt;</dd>
<dt>site:&lt;host&gt;</dt>

@ -228,7 +228,7 @@ public final class search {
null, // no snippet computation
count,
0,
filter, null, null, null,
filter, null, null, null, null,
QueryParams.Searchdom.LOCAL,
-1,
null,
@ -290,7 +290,7 @@ public final class search {
null, // no snippet computation
count,
0,
filter, null, null, null,
filter, null, null, null, null,
QueryParams.Searchdom.LOCAL,
-1,
constraint,

@ -349,6 +349,7 @@ public class yacysearch {
String protocol = null;
String tld = null;
String ext = null;
String inlink = null;
// check available memory and clean up if necessary
if ( !MemoryControl.request(8000000L, false) ) {
@ -421,13 +422,13 @@ public class yacysearch {
modifier.append("/language/").append(language).append(' ');
}
final int inurl = querystring.indexOf("inurl:", 0);
if ( inurl >= 0 ) {
int ftb = querystring.indexOf(' ', inurl);
final int inurlp = querystring.indexOf("inurl:", 0);
if ( inurlp >= 0 ) {
int ftb = querystring.indexOf(' ', inurlp);
if ( ftb == -1 ) {
ftb = querystring.length();
}
final String urlstr = querystring.substring(inurl + 6, ftb);
final String urlstr = querystring.substring(inurlp + 6, ftb);
querystring = querystring.replace("inurl:" + urlstr, "");
if ( !urlstr.isEmpty() ) {
urlmask = urlmask == null ? ".*" + urlstr + ".*" : urlmask + urlstr + ".*";
@ -435,6 +436,17 @@ public class yacysearch {
modifier.append("inurl:").append(urlstr).append(' ');
}
final int inlinkp = querystring.indexOf("inlink:", 0);
if ( inlinkp >= 0 ) {
int ftb = querystring.indexOf(' ', inlinkp);
if ( ftb == -1 ) {
ftb = querystring.length();
}
inlink = querystring.substring(inlinkp + 7, ftb);
querystring = querystring.replace("inlink:" + inlink, "");
modifier.append("inlink:").append(inlink).append(' ');
}
final int filetype = querystring.indexOf("filetype:", 0);
if ( filetype >= 0 ) {
int ftb = querystring.indexOf(' ', filetype);
@ -712,7 +724,7 @@ public class yacysearch {
snippetFetchStrategy,
itemsPerPage,
startRecord,
urlmask, protocol, tld, ext,
urlmask, protocol, tld, ext, inlink,
clustersearch && global ? QueryParams.Searchdom.CLUSTER : (global && indexReceiveGranted
? QueryParams.Searchdom.GLOBAL
: QueryParams.Searchdom.LOCAL),

@ -114,7 +114,7 @@ public final class QueryParams {
public Pattern urlMask;
public final Pattern prefer;
public final String protocol, tld, ext;
public final String protocol, tld, ext, inlink;
boolean urlMask_isCatchall;
public final Classification.ContentDomain contentdom;
public final String targetlang;
@ -172,6 +172,7 @@ public final class QueryParams {
this.protocol = null;
this.tld = null;
this.ext = null;
this.inlink = null;
this.prefer = matchnothing_pattern;
this.contentdom = ContentDomain.ALL;
this.itemsPerPage = itemsPerPage;
@ -221,7 +222,7 @@ public final class QueryParams {
final Collection<Tagging.Metatag> metatags,
final CacheStrategy snippetCacheStrategy,
final int itemsPerPage, final int offset,
final String urlMask, final String protocol, final String tld, final String ext,
final String urlMask, final String protocol, final String tld, final String ext, final String inlink,
final Searchdom domType, final int domMaxTargets,
final Bitfield constraint, final boolean allofconstraint,
final String nav_sitehash,
@ -267,6 +268,7 @@ public final class QueryParams {
this.protocol = protocol;
this.tld = tld;
this.ext = ext;
this.inlink = inlink;
try {
this.prefer = Pattern.compile(prefer);
} catch (final PatternSyntaxException ex) {
@ -477,6 +479,10 @@ public final class QueryParams {
fq.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(":\"").append(this.ext).append('\"');
}
if (this.inlink != null) {
fq.append(" AND ").append(YaCySchema.outboundlinks_urlstub_txt.getSolrFieldName()).append(":\"").append(this.inlink).append('\"');
}
if (!this.urlMask_isCatchall) {
// add a filter query on urls
String urlMaskPattern = this.urlMask.pattern();
@ -576,6 +582,10 @@ public final class QueryParams {
context.append(this.constraint).append(asterisk);
context.append(this.maxDistance).append(asterisk);
context.append(this.modifier.s).append(asterisk);
context.append(this.protocol).append(asterisk);
context.append(this.tld).append(asterisk);
context.append(this.ext).append(asterisk);
context.append(this.inlink).append(asterisk);
context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk);
context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name());
String result = context.toString();

Loading…
Cancel
Save