- fixed opensearch bugs

- added 'full domain' button to expert crawl start
- removed not-workin 'only one domain' button, the regex allowed crawling of other domains

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4125 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 7404f2c35c
commit b183bf6f42

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.546
releaseVersion=0.547
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy

@ -72,13 +72,13 @@
<tr valign="top" class="TableCellDark">
<td><label for="crawlingFilter">Crawling Filter</label>:</td>
<td>
<input name="crawlingFilter" id="crawlingFilter" type="text" size="20" maxlength="100" value="#[crawlingFilter]#" />
<input type="button" value="only one domain" onclick='var domain=prompt("please enter domain without \"www.\" (i.e. \"yacy.net\").");if(domain!=null){document.getElementById("crawlingFilter").value="(^|.*\\.)"+domain.replace("\.", "\\.")+"/.*"}' />
<input name="crawlingFilter" id="crawlingFilter" type="text" size="20" maxlength="100" value="#[crawlingFilter]#" />
<input type="radio" name="range" value="wide" checked="checked" />Use filter&nbsp;&nbsp;|&nbsp;&nbsp;
<input type="radio" name="range" value="domain" />Restrict to start domain
</td>
<td>
This is an emacs-like regular expression that must match with the URLs which are used to be crawled.
Use this i.e. to crawl a single domain. If you set this filter it makes sense to increase
the crawling depth.
The filter is an emacs-like regular expression that must match with the URLs which are used to be crawled; default is 'catch all'.
You can also use an automatic domain-restriction to fully crawl a single domain.
</td>
</tr>
<tr valign="top" class="TableCellLight">

@ -104,15 +104,15 @@ public class WatchCrawler_p {
boolean fullDomain = post.get("range", "wide").equals("domain"); // special property in simple crawl start
String newcrawlingfilter = post.get("crawlingFilter", ".*");
if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
env.setConfig("crawlingFilter", newcrawlingfilter);
if (fullDomain) try {
newcrawlingfilter = ".*" + (new yacyURL(post.get("crawlingURL",""), null)).getHost() + ".*";
} catch (MalformedURLException e) {}
if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
env.setConfig("crawlingFilter", newcrawlingfilter);
int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "8"));
if (fullDomain) newcrawlingdepth = 8;
env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
if ((fullDomain) && (newcrawlingdepth < 8)) newcrawlingdepth = 8;
boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");
int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1"));

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<OpenSearchDescription xmlns="http://www.opensearch.org/Specifications/OpenSearch/1.1">
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<ShortName>YaCy/#[clientname]#</ShortName>
<LongName>YaCy.net - #[SearchPageGreeting]#</LongName>
<Image type="image/gif">http://#[thisaddress]#/env/grafics/yacy.gif</Image>

@ -152,7 +152,7 @@ public class yacysearch {
}
if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {}
int count = post.getInt("count", 10);
int itemsPerPage = post.getInt("count", 10);
int offset = post.getInt("offset", 0);
boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
final boolean indexof = post.get("indexof","").equals("on");
@ -186,7 +186,7 @@ public class yacysearch {
int contentdomCode = plasmaSearchQuery.contentdomParser(post.get("contentdom", "text"));
// patch until better search profiles are available
if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 30)) count = 30;
if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (itemsPerPage <= 30)) itemsPerPage = 30;
serverObjects prop = new serverObjects();
if (post.get("cat", "href").equals("href")) {
@ -257,7 +257,7 @@ public class yacysearch {
prefermask,
contentdomCode,
true,
count,
itemsPerPage,
offset,
searchtime,
urlmask,
@ -319,8 +319,9 @@ public class yacysearch {
prop.put("num-results_totalcount", theSearch.getLocalCount() + theSearch.getGlobalCount());
prop.put("num-results_globalresults", 1);
prop.put("num-results_globalresults_globalcount", theSearch.getGlobalCount());
prop.put("num-results_offset", 0);
prop.put("num-results_offset", offset);
prop.put("num-results_linkcount", 0);
prop.put("num-results_itemsPerPage", itemsPerPage);
// compose page navigation
StringBuffer resnav = new StringBuffer();
@ -391,7 +392,7 @@ public class yacysearch {
prop.putASIS("input_promoteSearchPageGreeting", promoteSearchPageGreeting);
prop.put("input_former", querystring);
prop.put("former", post.get("search", ""));
prop.put("input_count", count);
prop.put("input_count", itemsPerPage);
prop.put("input_offset", offset);
prop.put("input_resource", (global) ? "global" : "local");
prop.put("input_time", searchtime / 1000);

@ -11,9 +11,9 @@
<url>#[rssYacyImageURL]#</url>
<title>Search for #[former]#</title>
</image>
<opensearch:totalResults>#[results]#</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex>
<opensearch:itemsPerPage>#[results]#</opensearch:itemsPerPage>
<opensearch:totalResults>#[num-results_totalcount]#</opensearch:totalResults>
<opensearch:startIndex>#[num-results_offset]#</opensearch:startIndex>
<opensearch:itemsPerPage>#[num-results_itemsPerPage]#</opensearch:itemsPerPage>
<opensearch:link rel="search" href="opensearchdescription.xml" type="application/opensearchdescription+xml"/>
<opensearch:Query role="request" searchTerms="#[former]#" />

Loading…
Cancel
Save