|
|
@ -346,6 +346,9 @@ public class yacysearch {
|
|
|
|
|
|
|
|
|
|
|
|
if ( !block && (post == null || post.get("cat", "href").equals("href")) ) {
|
|
|
|
if ( !block && (post == null || post.get("cat", "href").equals("href")) ) {
|
|
|
|
String urlmask = null;
|
|
|
|
String urlmask = null;
|
|
|
|
|
|
|
|
String protocol = null;
|
|
|
|
|
|
|
|
String tld = null;
|
|
|
|
|
|
|
|
String ext = null;
|
|
|
|
|
|
|
|
|
|
|
|
// check available memory and clean up if necessary
|
|
|
|
// check available memory and clean up if necessary
|
|
|
|
if ( !MemoryControl.request(8000000L, false) ) {
|
|
|
|
if ( !MemoryControl.request(8000000L, false) ) {
|
|
|
@ -374,27 +377,27 @@ public class yacysearch {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( querystring.indexOf("/https", 0) >= 0 ) {
|
|
|
|
if ( querystring.indexOf("/https", 0) >= 0 ) {
|
|
|
|
querystring = querystring.replace("/https", "");
|
|
|
|
querystring = querystring.replace("/https", "");
|
|
|
|
urlmask = "https://.*";
|
|
|
|
protocol = "https";
|
|
|
|
modifier.append("/https ");
|
|
|
|
modifier.append("/https ");
|
|
|
|
} else if ( querystring.indexOf("/http", 0) >= 0 ) {
|
|
|
|
} else if ( querystring.indexOf("/http", 0) >= 0 ) {
|
|
|
|
querystring = querystring.replace("/http", "");
|
|
|
|
querystring = querystring.replace("/http", "");
|
|
|
|
urlmask = "http://.*";
|
|
|
|
protocol = "http";
|
|
|
|
modifier.append("/http ");
|
|
|
|
modifier.append("/http ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( querystring.indexOf("/ftp", 0) >= 0 ) {
|
|
|
|
if ( querystring.indexOf("/ftp", 0) >= 0 ) {
|
|
|
|
querystring = querystring.replace("/ftp", "");
|
|
|
|
querystring = querystring.replace("/ftp", "");
|
|
|
|
urlmask = "ftp://.*";
|
|
|
|
protocol = "ftp";
|
|
|
|
modifier.append("/ftp ");
|
|
|
|
modifier.append("/ftp ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( querystring.indexOf("/smb", 0) >= 0 ) {
|
|
|
|
if ( querystring.indexOf("/smb", 0) >= 0 ) {
|
|
|
|
querystring = querystring.replace("/smb", "");
|
|
|
|
querystring = querystring.replace("/smb", "");
|
|
|
|
urlmask = "smb://.*";
|
|
|
|
protocol = "smb";
|
|
|
|
modifier.append("/smb ");
|
|
|
|
modifier.append("/smb ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if ( querystring.indexOf("/file", 0) >= 0 ) {
|
|
|
|
if ( querystring.indexOf("/file", 0) >= 0 ) {
|
|
|
|
querystring = querystring.replace("/file", "");
|
|
|
|
querystring = querystring.replace("/file", "");
|
|
|
|
urlmask = "file://.*";
|
|
|
|
protocol = "file";
|
|
|
|
modifier.append("/file ");
|
|
|
|
modifier.append("/file ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -438,19 +441,13 @@ public class yacysearch {
|
|
|
|
if ( ftb == -1 ) {
|
|
|
|
if ( ftb == -1 ) {
|
|
|
|
ftb = querystring.length();
|
|
|
|
ftb = querystring.length();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
String ft = querystring.substring(filetype + 9, ftb);
|
|
|
|
ext = querystring.substring(filetype + 9, ftb);
|
|
|
|
querystring = querystring.replace("filetype:" + ft, "");
|
|
|
|
querystring = querystring.replace("filetype:" + ext, "");
|
|
|
|
while ( !ft.isEmpty() && ft.charAt(0) == '.' ) {
|
|
|
|
while ( !ext.isEmpty() && ext.charAt(0) == '.' ) {
|
|
|
|
ft = ft.substring(1);
|
|
|
|
ext = ext.substring(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( !ft.isEmpty() ) {
|
|
|
|
modifier.append("filetype:").append(ext).append(' ');
|
|
|
|
if ( urlmask == null ) {
|
|
|
|
if (ext.isEmpty()) ext = null;
|
|
|
|
urlmask = ".*\\." + ft + "(\\?.*)?";
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
urlmask = urlmask + ".*\\." + ft + "(\\?.*)?";
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
modifier.append("filetype:").append(ft).append(' ');
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int voc = 0;
|
|
|
|
int voc = 0;
|
|
|
@ -537,9 +534,7 @@ public class yacysearch {
|
|
|
|
final boolean quotes = (querystring.charAt(authori + 7) == '(');
|
|
|
|
final boolean quotes = (querystring.charAt(authori + 7) == '(');
|
|
|
|
if ( quotes ) {
|
|
|
|
if ( quotes ) {
|
|
|
|
int ftb = querystring.indexOf(')', authori + 8);
|
|
|
|
int ftb = querystring.indexOf(')', authori + 8);
|
|
|
|
if ( ftb == -1 ) {
|
|
|
|
if (ftb == -1) ftb = querystring.length() + 1;
|
|
|
|
ftb = querystring.length() + 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
author = querystring.substring(authori + 8, ftb);
|
|
|
|
author = querystring.substring(authori + 8, ftb);
|
|
|
|
querystring = querystring.replace("author:(" + author + ")", "");
|
|
|
|
querystring = querystring.replace("author:(" + author + ")", "");
|
|
|
|
modifier.append("author:(").append(author).append(") ");
|
|
|
|
modifier.append("author:(").append(author).append(") ");
|
|
|
@ -554,28 +549,19 @@ public class yacysearch {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
final int tld = querystring.indexOf("tld:", 0);
|
|
|
|
final int tldp = querystring.indexOf("tld:", 0);
|
|
|
|
if ( tld >= 0 ) {
|
|
|
|
if (tldp >= 0) {
|
|
|
|
int ftb = querystring.indexOf(' ', tld);
|
|
|
|
int ftb = querystring.indexOf(' ', tldp);
|
|
|
|
if ( ftb == -1 ) {
|
|
|
|
if (ftb == -1) ftb = querystring.length();
|
|
|
|
ftb = querystring.length();
|
|
|
|
tld = querystring.substring(tldp + 4, ftb);
|
|
|
|
}
|
|
|
|
querystring = querystring.replace("tld:" + tld, "");
|
|
|
|
String domain = querystring.substring(tld + 4, ftb);
|
|
|
|
modifier.append("tld:").append(tld).append(' ');
|
|
|
|
querystring = querystring.replace("tld:" + domain, "");
|
|
|
|
while ( tld.length() > 0 && tld.charAt(0) == '.' ) {
|
|
|
|
modifier.append("tld:").append(domain).append(' ');
|
|
|
|
tld = tld.substring(1);
|
|
|
|
while ( domain.length() > 0 && domain.charAt(0) == '.' ) {
|
|
|
|
|
|
|
|
domain = domain.substring(1);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( domain.indexOf('.', 0) < 0 ) {
|
|
|
|
|
|
|
|
domain = "\\." + domain;
|
|
|
|
|
|
|
|
} // is tld
|
|
|
|
|
|
|
|
if ( domain.length() > 0 ) {
|
|
|
|
|
|
|
|
urlmask = "[a-zA-Z]*://[^/]*" + domain + "/.*" + ((urlmask != null) ? urlmask : "");
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tld.length() == 0) tld = null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( urlmask == null || urlmask.isEmpty() ) {
|
|
|
|
if (urlmask == null || urlmask.isEmpty()) urlmask = ".*"; //if no urlmask was given
|
|
|
|
urlmask = ".*";
|
|
|
|
|
|
|
|
} //if no urlmask was given
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// read the language from the language-restrict option 'lr'
|
|
|
|
// read the language from the language-restrict option 'lr'
|
|
|
|
// if no one is given, use the user agent or the system language as default
|
|
|
|
// if no one is given, use the user agent or the system language as default
|
|
|
@ -726,7 +712,7 @@ public class yacysearch {
|
|
|
|
snippetFetchStrategy,
|
|
|
|
snippetFetchStrategy,
|
|
|
|
itemsPerPage,
|
|
|
|
itemsPerPage,
|
|
|
|
startRecord,
|
|
|
|
startRecord,
|
|
|
|
urlmask,
|
|
|
|
urlmask, protocol, tld, ext,
|
|
|
|
clustersearch && global ? QueryParams.Searchdom.CLUSTER : (global && indexReceiveGranted
|
|
|
|
clustersearch && global ? QueryParams.Searchdom.CLUSTER : (global && indexReceiveGranted
|
|
|
|
? QueryParams.Searchdom.GLOBAL
|
|
|
|
? QueryParams.Searchdom.GLOBAL
|
|
|
|
: QueryParams.Searchdom.LOCAL),
|
|
|
|
: QueryParams.Searchdom.LOCAL),
|
|
|
|