added crawl settings for three new filters for each crawl:

must-match for IPs (IPs that are known after DNS resolving for each URL in the crawl queue)
must-not-match for IPs
must-match against a list of country codes (allows only loading from hosts that are hostet in given countries)

note: the settings and input environment is there with that commit, but the values are not yet evaluated

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7976 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 47a8c69745
commit 5ad7f9612b

@ -557,6 +557,12 @@ xpstopw=true
# Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version # Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version
filterOutStopwordsFromTopwords=true filterOutStopwordsFromTopwords=true
# crawling steering: must-match/must-not-match
crawlingIPMustMatch=.*
crawlingIPMustNotMatch=
# the default country codes are all codes for countries in Europe
crawlingCountryMustMatch=AD,AL,AT,BA,BE,BG,BY,CH,CY,CZ,DE,DK,EE,ES,FI,FO,FR,GG,GI,GR,HR,HU,IE,IM,IS,IT,JE,LI,LT,LU,LV,MC,MD,MK,MT,NL,NO,PL,PT,RO,RU,SE,SI,SJ,SK,SM,TR,UA,UK,VA,YU
# performance-settings # performance-settings
# delay-times for permanent loops (milliseconds) # delay-times for permanent loops (milliseconds)
# the idlesleep is the pause that an proces sleeps if the last call to the # the idlesleep is the pause that an proces sleeps if the last call to the

@ -86,8 +86,8 @@ public class CrawlProfileEditor_p {
static { static {
labels.add(new eentry(CrawlProfile.NAME, "Name", true, eentry.STRING)); labels.add(new eentry(CrawlProfile.NAME, "Name", true, eentry.STRING));
labels.add(new eentry(CrawlProfile.START_URL, "Start URL", true, eentry.STRING)); labels.add(new eentry(CrawlProfile.START_URL, "Start URL", true, eentry.STRING));
labels.add(new eentry(CrawlProfile.FILTER_MUSTMATCH, "Must-Match Filter", false, eentry.STRING)); labels.add(new eentry(CrawlProfile.FILTER_URL_MUSTMATCH, "Must-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.FILTER_MUSTNOTMATCH, "Must-Not-Match Filter", false, eentry.STRING)); labels.add(new eentry(CrawlProfile.FILTER_URL_MUSTNOTMATCH, "Must-Not-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.DEPTH, "Crawl Depth", false, eentry.INTEGER)); labels.add(new eentry(CrawlProfile.DEPTH, "Crawl Depth", false, eentry.INTEGER));
labels.add(new eentry(CrawlProfile.RECRAWL_IF_OLDER, "Recrawl If Older", false, eentry.INTEGER)); labels.add(new eentry(CrawlProfile.RECRAWL_IF_OLDER, "Recrawl If Older", false, eentry.INTEGER));
labels.add(new eentry(CrawlProfile.DOM_MAX_PAGES, "Domain Max. Pages", false, eentry.INTEGER)); labels.add(new eentry(CrawlProfile.DOM_MAX_PAGES, "Domain Max. Pages", false, eentry.INTEGER));
@ -159,8 +159,8 @@ public class CrawlProfileEditor_p {
if ((post != null) && (selentry != null)) { if ((post != null) && (selentry != null)) {
if (post.containsKey("submit")) { if (post.containsKey("submit")) {
try { try {
Pattern.compile(post.get(CrawlProfile.FILTER_MUSTMATCH, CrawlProfile.MATCH_ALL)); Pattern.compile(post.get(CrawlProfile.FILTER_URL_MUSTMATCH, CrawlProfile.MATCH_ALL));
Pattern.compile(post.get(CrawlProfile.FILTER_MUSTNOTMATCH, CrawlProfile.MATCH_NEVER)); Pattern.compile(post.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH, CrawlProfile.MATCH_NEVER));
final Iterator<eentry> lit = labels.iterator(); final Iterator<eentry> lit = labels.iterator();
eentry tee; eentry tee;
while (lit.hasNext()) { while (lit.hasNext()) {

@ -136,7 +136,7 @@
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellLight"> <tr valign="top" class="TableCellLight">
<td><label for="mustmatch">Must-Match Filter</label>:</td> <td><label for="mustmatch">Must-Match Filter for URLs</label>:</td>
<td> <td>
<input type="radio" name="range" id="rangeWide" value="wide" checked="checked" />Use filter&nbsp;&nbsp; <input type="radio" name="range" id="rangeWide" value="wide" checked="checked" />Use filter&nbsp;&nbsp;
<input name="mustmatch" id="mustmatch" type="text" size="60" maxlength="100" value="#[mustmatch]#" /><br /> <input name="mustmatch" id="mustmatch" type="text" size="60" maxlength="100" value="#[mustmatch]#" /><br />
@ -151,7 +151,7 @@
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td><label for="mustnotmatch">Must-Not-Match Filter</label>:</td> <td><label for="mustnotmatch">Must-Not-Match Filter for URLs</label>:</td>
<td> <td>
<input name="mustnotmatch" id="mustnotmatch" type="text" size="60" maxlength="100" value="#[mustnotmatch]#" /> <input name="mustnotmatch" id="mustnotmatch" type="text" size="60" maxlength="100" value="#[mustnotmatch]#" />
</td> </td>
@ -162,6 +162,37 @@
If you don't know what this means, please leave this field empty. If you don't know what this means, please leave this field empty.
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellLight">
<td><label for="ipMustmatch">Must-Match Filter for IPs</label>:</td>
<td>
<input name="ipMustmatch" id="ipMustmatch" type="text" size="60" maxlength="100" value="#[ipMustmatch]#" />
</td>
<td>
Like the MUST-Match Filter for URLs this filter must match, but only for the IP of the host.
YaCy performs a DNS lookup for each host and this filter restricts the crawl to specific IPs
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td><label for="ipMustnotmatch">Must-Not-Match Filter for IPs</label>:</td>
<td>
<input name="ipMustnotmatch" id="ipMustnotmatch" type="text" size="60" maxlength="100" value="#[ipMustnotmatch]#" />
</td>
<td>
This filter must not match on the IP of the crawled host.
</td>
</tr>
<tr valign="top" class="TableCellLight">
<td><label for="crawlingCountryMustMatch">Must-Match List for Country Codes</label>:</td>
<td>
<input type="radio" name="countryMustMatchSwitch" id="countryMustMatchSwitch" value="true" />Use filter&nbsp;&nbsp;
<input name="crawlingCountryMustMatch" id="crawlingCountryMustMatch" type="text" size="60" maxlength="100" value="#[crawlingCountryMustMatch]#" />
<input type="radio" name="countryMustMatchSwitch" id="countryMustMatchSwitch" value="false" checked="checked" />no country code restriction
</td>
<td>
Crawls can be restricted to specific countries. This uses the country code that can be computed from
the IP of the server that hosts the page. The filter is not a regular expressions but a list of country codes, separated by comma.
</td>
</tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>Maximum Pages per Domain:</td> <td>Maximum Pages per Domain:</td>
<td> <td>

@ -9,7 +9,7 @@
// $LastChangedBy: orbiter $ // $LastChangedBy: orbiter $
// //
// LICENSE // LICENSE
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or // the Free Software Foundation; either version 2 of the License, or
@ -25,32 +25,36 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
public class CrawlStartExpert_p { public class CrawlStartExpert_p {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements // return variable that accumulates replacements
//final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
// define visible variables // define visible variables
prop.put("starturl", /*(intranet) ? repository :*/ "http://"); prop.put("starturl", /*(intranet) ? repository :*/ "http://");
prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0")); prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
prop.put("crawlingDepth", Math.min(3, env.getConfigLong("crawlingDepth", 0))); prop.put("crawlingDepth", Math.min(3, env.getConfigLong("crawlingDepth", 0)));
prop.put("mustmatch", /*(intranet) ? repository + ".*" :*/ CrawlProfile.MATCH_ALL); prop.put("mustmatch", /*(intranet) ? repository + ".*" :*/ CrawlProfile.MATCH_ALL);
prop.put("mustnotmatch", CrawlProfile.MATCH_NEVER); prop.put("mustnotmatch", CrawlProfile.MATCH_NEVER);
prop.put("ipMustmatch", sb.getConfig("crawlingIPMustMatch", CrawlProfile.MATCH_ALL));
prop.put("ipMustnotmatch", sb.getConfig("crawlingIPMustNotMatch", CrawlProfile.MATCH_NEVER));
prop.put("crawlingCountryMustMatch", sb.getConfig("crawlingCountryMustMatch", ""));
prop.put("crawlingIfOlderCheck", "0"); prop.put("crawlingIfOlderCheck", "0");
prop.put("crawlingIfOlderUnitYearCheck", "0"); prop.put("crawlingIfOlderUnitYearCheck", "0");
prop.put("crawlingIfOlderUnitMonthCheck", "0"); prop.put("crawlingIfOlderUnitMonthCheck", "0");
prop.put("crawlingIfOlderUnitDayCheck", "1"); prop.put("crawlingIfOlderUnitDayCheck", "1");
prop.put("crawlingIfOlderUnitHourCheck", "0"); prop.put("crawlingIfOlderUnitHourCheck", "0");
prop.put("crawlingIfOlderNumber", "7"); prop.put("crawlingIfOlderNumber", "7");
final int crawlingDomFilterDepth = env.getConfigInt("crawlingDomFilterDepth", -1); final int crawlingDomFilterDepth = env.getConfigInt("crawlingDomFilterDepth", -1);
prop.put("crawlingDomFilterCheck", (crawlingDomFilterDepth == -1) ? "0" : "1"); prop.put("crawlingDomFilterCheck", (crawlingDomFilterDepth == -1) ? "0" : "1");
prop.put("crawlingDomFilterDepth", (crawlingDomFilterDepth == -1) ? 1 : crawlingDomFilterDepth); prop.put("crawlingDomFilterDepth", (crawlingDomFilterDepth == -1) ? 1 : crawlingDomFilterDepth);
@ -62,18 +66,18 @@ public class CrawlStartExpert_p {
prop.put("indexingTextChecked", env.getConfigBool("indexText", true) ? "1" : "0"); prop.put("indexingTextChecked", env.getConfigBool("indexText", true) ? "1" : "0");
prop.put("indexingMediaChecked", env.getConfigBool("indexMedia", true) ? "1" : "0"); prop.put("indexingMediaChecked", env.getConfigBool("indexMedia", true) ? "1" : "0");
prop.put("crawlOrderChecked", env.getConfigBool("crawlOrder", true) ? "1" : "0"); prop.put("crawlOrderChecked", env.getConfigBool("crawlOrder", true) ? "1" : "0");
final long LCbusySleep = env.getConfigLong(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, 100L); final long LCbusySleep = env.getConfigLong(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, 100L);
final int LCppm = (LCbusySleep == 0) ? 1000 : (int) (60000L / LCbusySleep); final int LCppm = (LCbusySleep == 0) ? 1000 : (int) (60000L / LCbusySleep);
prop.put("crawlingSpeedMaxChecked", (LCppm >= 1000) ? "1" : "0"); prop.put("crawlingSpeedMaxChecked", (LCppm >= 1000) ? "1" : "0");
prop.put("crawlingSpeedCustChecked", ((LCppm > 10) && (LCppm < 1000)) ? "1" : "0"); prop.put("crawlingSpeedCustChecked", ((LCppm > 10) && (LCppm < 1000)) ? "1" : "0");
prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? "1" : "0"); prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? "1" : "0");
prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : ""); prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : "");
prop.put("xsstopwChecked", env.getConfigBool("xsstopw", true) ? "1" : "0"); prop.put("xsstopwChecked", env.getConfigBool("xsstopw", true) ? "1" : "0");
prop.put("xdstopwChecked", env.getConfigBool("xdstopw", true) ? "1" : "0"); prop.put("xdstopwChecked", env.getConfigBool("xdstopw", true) ? "1" : "0");
prop.put("xpstopwChecked", env.getConfigBool("xpstopw", true) ? "1" : "0"); prop.put("xpstopwChecked", env.getConfigBool("xpstopw", true) ? "1" : "0");
// return rewrite properties // return rewrite properties
return prop; return prop;
} }

@ -156,6 +156,14 @@ public class Crawler_p {
String newcrawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL); String newcrawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL);
final String newcrawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER); final String newcrawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER);
if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL; // avoid that all urls are filtered out if bad value was submitted if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL; // avoid that all urls are filtered out if bad value was submitted
String ipMustMatch = post.get("ipMustmatch", CrawlProfile.MATCH_ALL);
final String ipMustNotMatch = post.get("ipMustnotmatch", CrawlProfile.MATCH_NEVER);
if (ipMustMatch.length() < 2) ipMustMatch = CrawlProfile.MATCH_ALL;
final String countryMustMatch = post.getBoolean("countryMustMatchSwitch", false) ? post.get("countryMustMatchList", "") : "";
sb.setConfig("crawlingIPMustMatch", ipMustMatch);
sb.setConfig("crawlingIPMustNotMatch", ipMustNotMatch);
if (countryMustMatch.length() > 0) sb.setConfig("crawlingCountryMustMatch", countryMustMatch);
// special cases: // special cases:
if (crawlingStartURL!= null && fullDomain) { if (crawlingStartURL!= null && fullDomain) {
if (crawlingStartURL.isFile()) { if (crawlingStartURL.isFile()) {
@ -249,7 +257,10 @@ public class Crawler_p {
crawlingStart, crawlingStart,
crawlingStartURL, crawlingStartURL,
newcrawlingMustMatch, newcrawlingMustMatch,
CrawlProfile.MATCH_NEVER, newcrawlingMustNotMatch,
ipMustMatch,
ipMustNotMatch,
countryMustMatch,
newcrawlingdepth, newcrawlingdepth,
crawlingIfOlder, crawlingIfOlder,
crawlingDomMaxPages, crawlingDomMaxPages,
@ -306,6 +317,9 @@ public class Crawler_p {
crawlingStartURL, crawlingStartURL,
newcrawlingMustMatch, newcrawlingMustMatch,
newcrawlingMustNotMatch, newcrawlingMustNotMatch,
ipMustMatch,
ipMustNotMatch,
countryMustMatch,
newcrawlingdepth, newcrawlingdepth,
crawlingIfOlder, crawlingIfOlder,
crawlingDomMaxPages, crawlingDomMaxPages,
@ -426,6 +440,9 @@ public class Crawler_p {
crawlURL, crawlURL,
newcrawlingMustMatch, newcrawlingMustMatch,
CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_NEVER,
ipMustMatch,
ipMustNotMatch,
countryMustMatch,
newcrawlingdepth, newcrawlingdepth,
crawlingIfOlder, crawlingIfOlder,
crawlingDomMaxPages, crawlingDomMaxPages,
@ -463,6 +480,9 @@ public class Crawler_p {
sitemapURL, sitemapURL,
CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_ALL,
CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_NEVER,
ipMustMatch,
ipMustNotMatch,
countryMustMatch,
0, 0,
crawlingIfOlder, crawlingIfOlder,
crawlingDomMaxPages, crawlingDomMaxPages,
@ -504,6 +524,9 @@ public class Crawler_p {
sitelistURL, sitelistURL,
newcrawlingMustMatch, newcrawlingMustMatch,
CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_NEVER,
ipMustMatch,
ipMustNotMatch,
countryMustMatch,
newcrawlingdepth, newcrawlingdepth,
crawlingIfOlder, crawlingIfOlder,
crawlingDomMaxPages, crawlingDomMaxPages,

@ -149,6 +149,9 @@ public class QuickCrawlLink_p {
crawlingStartURL.getHost(), crawlingStartURL.getHost(),
crawlingStartURL, crawlingStartURL,
crawlingMustMatch, crawlingMustMatch,
CrawlProfile.MATCH_ALL,
CrawlProfile.MATCH_NEVER,
"",
crawlingMustNotMatch, crawlingMustNotMatch,
CrawlingDepth, CrawlingDepth,
60 * 24 * 30, // recrawlIfOlder (minutes); here: one month 60 * 24 * 30, // recrawlIfOlder (minutes); here: one month

@ -48,8 +48,6 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
public static final String HANDLE = "handle"; public static final String HANDLE = "handle";
public static final String NAME = "name"; public static final String NAME = "name";
public static final String START_URL = "startURL"; public static final String START_URL = "startURL";
public static final String FILTER_MUSTMATCH = "generalFilter";
public static final String FILTER_MUSTNOTMATCH = "nevermatch";
public static final String DEPTH = "generalDepth"; public static final String DEPTH = "generalDepth";
public static final String RECRAWL_IF_OLDER = "recrawlIfOlder"; public static final String RECRAWL_IF_OLDER = "recrawlIfOlder";
public static final String DOM_MAX_PAGES = "domMaxPages"; public static final String DOM_MAX_PAGES = "domMaxPages";
@ -63,6 +61,11 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
public static final String XDSTOPW = "xdstopw"; public static final String XDSTOPW = "xdstopw";
public static final String XPSTOPW = "xpstopw"; public static final String XPSTOPW = "xpstopw";
public static final String CACHE_STRAGEGY = "cacheStrategy"; public static final String CACHE_STRAGEGY = "cacheStrategy";
public static final String FILTER_URL_MUSTMATCH = "generalFilter"; // for URLs
public static final String FILTER_URL_MUSTNOTMATCH = "nevermatch"; // for URLs
public static final String FILTER_IP_MUSTMATCH = "crawlingIPMustMatch";
public static final String FILTER_IP_MUSTNOTMATCH = "crawlingIPMustNotMatch";
public static final String FILTER_COUNTRY_MUSTMATCH = "crawlingCountryMustMatch";
private Pattern mustmatch = null, mustnotmatch = null; private Pattern mustmatch = null, mustnotmatch = null;
@ -70,8 +73,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
* Constructor which creates CrawlPofile from parameters. * Constructor which creates CrawlPofile from parameters.
* @param name name of the crawl profile * @param name name of the crawl profile
* @param startURL root URL of the crawl * @param startURL root URL of the crawl
* @param mustmatch URLs which do not match this regex will be ignored * @param urlMustMatch URLs which do not match this regex will be ignored
* @param mustnotmatch URLs which match this regex will be ignored * @param urlMustNotMatch URLs which match this regex will be ignored
* @param depth height of the tree which will be created by the crawler * @param depth height of the tree which will be created by the crawler
* @param recrawlIfOlder documents which have been indexed in the past will * @param recrawlIfOlder documents which have been indexed in the past will
* be indexed again if they are older than the time (ms) in this parameter * be indexed again if they are older than the time (ms) in this parameter
@ -89,8 +92,11 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
public CrawlProfile( public CrawlProfile(
final String name, final String name,
final DigestURI startURL, final DigestURI startURL,
final String mustmatch, final String urlMustMatch,
final String mustnotmatch, final String urlMustNotMatch,
final String ipMustMatch,
final String ipMustNotMatch,
final String countryMustMatch,
final int depth, final int depth,
final long recrawlIfOlder /*date*/, final long recrawlIfOlder /*date*/,
final int domMaxPages, final int domMaxPages,
@ -107,14 +113,17 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
if (name == null || name.isEmpty()) { if (name == null || name.isEmpty()) {
throw new NullPointerException("name must not be null or empty"); throw new NullPointerException("name must not be null or empty");
} }
final String handle = (startURL == null) final String handle = (startURL == null)
? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength) ? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength)
: ASCII.String(startURL.hash()); : ASCII.String(startURL.hash());
put(HANDLE, handle); put(HANDLE, handle);
put(NAME, name); put(NAME, name);
put(START_URL, (startURL == null) ? "" : startURL.toNormalform(true, false)); put(START_URL, (startURL == null) ? "" : startURL.toNormalform(true, false));
put(FILTER_MUSTMATCH, (mustmatch == null) ? CrawlProfile.MATCH_ALL : mustmatch); put(FILTER_URL_MUSTMATCH, (urlMustMatch == null) ? CrawlProfile.MATCH_ALL : urlMustMatch);
put(FILTER_MUSTNOTMATCH, (mustnotmatch == null) ? CrawlProfile.MATCH_NEVER : mustnotmatch); put(FILTER_URL_MUSTNOTMATCH, (urlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER : urlMustNotMatch);
put(FILTER_IP_MUSTMATCH, (ipMustMatch == null) ? CrawlProfile.MATCH_ALL : ipMustMatch);
put(FILTER_IP_MUSTNOTMATCH, (ipMustNotMatch == null) ? CrawlProfile.MATCH_NEVER : ipMustNotMatch);
put(FILTER_COUNTRY_MUSTMATCH, (countryMustMatch == null) ? "" : countryMustMatch);
put(DEPTH, depth); put(DEPTH, depth);
put(RECRAWL_IF_OLDER, recrawlIfOlder); put(RECRAWL_IF_OLDER, recrawlIfOlder);
put(DOM_MAX_PAGES, domMaxPages); put(DOM_MAX_PAGES, domMaxPages);
@ -137,7 +146,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
super(ext == null ? 1 : ext.size()); super(ext == null ? 1 : ext.size());
if (ext != null) putAll(ext); if (ext != null) putAll(ext);
} }
/** /**
* Adds a parameter to CrawlProfile. * Adds a parameter to CrawlProfile.
* @param key name of the parameter * @param key name of the parameter
@ -174,7 +183,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
//if (r == null) return null; //if (r == null) return null;
return r; return r;
} }
/** /**
* Gets the name of the CrawlProfile. * Gets the name of the CrawlProfile.
* @return name of the profile * @return name of the profile
@ -184,7 +193,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
if (r == null) return ""; if (r == null) return "";
return r; return r;
} }
/** /**
* Gets the root URL of the crawl job. * Gets the root URL of the crawl job.
* @return root URL * @return root URL
@ -193,35 +202,35 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
final String r = get(START_URL); final String r = get(START_URL);
return r; return r;
} }
/** /**
* Gets the regex which must be matched by URLs in order to be crawled. * Gets the regex which must be matched by URLs in order to be crawled.
* @return regex which must be matched * @return regex which must be matched
*/ */
public Pattern mustMatchPattern() { public Pattern mustMatchPattern() {
if (this.mustmatch == null) { if (this.mustmatch == null) {
String r = get(FILTER_MUSTMATCH); String r = get(FILTER_URL_MUSTMATCH);
if (r == null) r = CrawlProfile.MATCH_ALL; if (r == null) r = CrawlProfile.MATCH_ALL;
this.mustmatch = Pattern.compile(r); this.mustmatch = Pattern.compile(r);
} }
return this.mustmatch; return this.mustmatch;
} }
/** /**
* Gets the regex which must not be matched by URLs in order to be crawled. * Gets the regex which must not be matched by URLs in order to be crawled.
* @return regex which must not be matched * @return regex which must not be matched
*/ */
public Pattern mustNotMatchPattern() { public Pattern mustNotMatchPattern() {
if (this.mustnotmatch == null) { if (this.mustnotmatch == null) {
String r = get(FILTER_MUSTNOTMATCH); String r = get(FILTER_URL_MUSTNOTMATCH);
if (r == null) r = CrawlProfile.MATCH_NEVER; if (r == null) r = CrawlProfile.MATCH_NEVER;
this.mustnotmatch = Pattern.compile(r); this.mustnotmatch = Pattern.compile(r);
} }
return this.mustnotmatch; return this.mustnotmatch;
} }
/** /**
* Gets depth of crawl job (or height of the tree which will be * Gets depth of crawl job (or height of the tree which will be
* created by the crawler). * created by the crawler).
* @return depth of crawl job * @return depth of crawl job
*/ */
@ -235,7 +244,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return 0; return 0;
} }
} }
public CacheStrategy cacheStrategy() { public CacheStrategy cacheStrategy() {
final String r = get(CACHE_STRAGEGY); final String r = get(CACHE_STRAGEGY);
if (r == null) return CacheStrategy.IFEXIST; if (r == null) return CacheStrategy.IFEXIST;
@ -246,11 +255,11 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return CacheStrategy.IFEXIST; return CacheStrategy.IFEXIST;
} }
} }
public void setCacheStrategy(final CacheStrategy newStrategy) { public void setCacheStrategy(final CacheStrategy newStrategy) {
put(CACHE_STRAGEGY, newStrategy.toString()); put(CACHE_STRAGEGY, newStrategy.toString());
} }
/** /**
* Gets the minimum age that an entry must have to be re-crawled. * Gets the minimum age that an entry must have to be re-crawled.
* @return time in ms * @return time in ms
@ -268,7 +277,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return 0L; return 0L;
} }
} }
public int domMaxPages() { public int domMaxPages() {
// this is the maximum number of pages that are crawled for a single domain // this is the maximum number of pages that are crawled for a single domain
// if -1, this means no limit // if -1, this means no limit
@ -283,31 +292,31 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return Integer.MAX_VALUE; return Integer.MAX_VALUE;
} }
} }
public boolean crawlingQ() { public boolean crawlingQ() {
final String r = get(CRAWLING_Q); final String r = get(CRAWLING_Q);
if (r == null) return false; if (r == null) return false;
return (r.equals(Boolean.TRUE.toString())); return (r.equals(Boolean.TRUE.toString()));
} }
public boolean pushSolr() { public boolean pushSolr() {
final String r = get(PUSH_SOLR); final String r = get(PUSH_SOLR);
if (r == null) return true; if (r == null) return true;
return (r.equals(Boolean.TRUE.toString())); return (r.equals(Boolean.TRUE.toString()));
} }
public boolean indexText() { public boolean indexText() {
final String r = get(INDEX_TEXT); final String r = get(INDEX_TEXT);
if (r == null) return true; if (r == null) return true;
return (r.equals(Boolean.TRUE.toString())); return (r.equals(Boolean.TRUE.toString()));
} }
public boolean indexMedia() { public boolean indexMedia() {
final String r = get(INDEX_MEDIA); final String r = get(INDEX_MEDIA);
if (r == null) return true; if (r == null) return true;
return (r.equals(Boolean.TRUE.toString())); return (r.equals(Boolean.TRUE.toString()));
} }
public boolean storeHTCache() { public boolean storeHTCache() {
final String r = get(STORE_HTCACHE); final String r = get(STORE_HTCACHE);
if (r == null) return false; if (r == null) return false;
@ -318,19 +327,19 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
if (r == null) return false; if (r == null) return false;
return (r.equals(Boolean.TRUE.toString())); return (r.equals(Boolean.TRUE.toString()));
} }
public boolean excludeStaticStopwords() { public boolean excludeStaticStopwords() {
final String r = get(XSSTOPW); final String r = get(XSSTOPW);
if (r == null) return false; if (r == null) return false;
return (r.equals(Boolean.TRUE.toString())); return (r.equals(Boolean.TRUE.toString()));
} }
public boolean excludeDynamicStopwords() { public boolean excludeDynamicStopwords() {
final String r = get(XDSTOPW); final String r = get(XDSTOPW);
if (r == null) return false; if (r == null) return false;
return (r.equals(Boolean.TRUE.toString())); return (r.equals(Boolean.TRUE.toString()));
} }
public boolean excludeParentStopwords() { public boolean excludeParentStopwords() {
final String r = get(XPSTOPW); final String r = get(XPSTOPW);
if (r == null) return false; if (r == null) return false;

@ -63,7 +63,8 @@ public final class CrawlSwitchboard {
public static final long CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE = 60L * 24L * 30L; public static final long CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE = 60L * 24L * 30L;
private final Log log; private final Log log;
private Map<byte[], Map<String, String>> profilesActiveCrawls, profilesPassiveCrawls, profilesInvalidCrawls; private Map<byte[], Map<String, String>> profilesActiveCrawls;
private final Map<byte[], Map<String, String>> profilesPassiveCrawls, profilesInvalidCrawls;
public CrawlProfile defaultProxyProfile; public CrawlProfile defaultProxyProfile;
public CrawlProfile defaultRemoteProfile; public CrawlProfile defaultRemoteProfile;
public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile; public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
@ -91,28 +92,28 @@ public final class CrawlSwitchboard {
final File profilesInvalidFile = new File(queuesRoot, DBFILE_INVALID_CRAWL_PROFILES); final File profilesInvalidFile = new File(queuesRoot, DBFILE_INVALID_CRAWL_PROFILES);
this.profilesInvalidCrawls = loadFromDB(profilesInvalidFile); this.profilesInvalidCrawls = loadFromDB(profilesInvalidFile);
final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES); final File profilesActiveFile = new File(queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
this.profilesActiveCrawls = loadFromDB(profilesActiveFile); this.profilesActiveCrawls = loadFromDB(profilesActiveFile);
for (final byte[] handle : this.profilesActiveCrawls.keySet()) { for (final byte[] handle : this.profilesActiveCrawls.keySet()) {
final CrawlProfile p; final CrawlProfile p;
p = new CrawlProfile(this.profilesActiveCrawls.get(handle)); p = new CrawlProfile(this.profilesActiveCrawls.get(handle));
if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_MUSTMATCH))) { if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH))) {
this.removeActive(handle); removeActive(handle);
this.putInvalid(handle, p); putInvalid(handle, p);
Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name()
+ " from active crawls since " + CrawlProfile.FILTER_MUSTMATCH + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTMATCH
+ " is no valid regular expression: " + p.get(CrawlProfile.FILTER_MUSTMATCH)); + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTMATCH));
} else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_MUSTNOTMATCH))) { } else if (!RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH))) {
this.putInvalid(handle, p); putInvalid(handle, p);
this.removeActive(handle); removeActive(handle);
Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name() Log.logWarning("CrawlProfiles", "removed Profile " + p.handle() + ": " + p.name()
+ " from active crawls since " + CrawlProfile.FILTER_MUSTNOTMATCH + " from active crawls since " + CrawlProfile.FILTER_URL_MUSTNOTMATCH
+ " is no valid regular expression: " + p.get(CrawlProfile.FILTER_MUSTNOTMATCH)); + " is no valid regular expression: " + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH));
} else { } else {
Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name()); Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
} }
} }
initActiveCrawlProfiles(); initActiveCrawlProfiles();
log.logInfo("Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries"); log.logInfo("Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries");
@ -134,7 +135,7 @@ public final class CrawlSwitchboard {
if (m == null) return null; if (m == null) return null;
return new CrawlProfile(m); return new CrawlProfile(m);
} }
public CrawlProfile getInvalid(final byte[] profileKey) { public CrawlProfile getInvalid(final byte[] profileKey) {
if (profileKey == null) return null; if (profileKey == null) return null;
final Map<String, String> m = this.profilesInvalidCrawls.get(profileKey); final Map<String, String> m = this.profilesInvalidCrawls.get(profileKey);
@ -152,7 +153,7 @@ public final class CrawlSwitchboard {
public Set<byte[]> getActive() { public Set<byte[]> getActive() {
return this.profilesActiveCrawls.keySet(); return this.profilesActiveCrawls.keySet();
} }
public Set<byte[]> getInvalid() { public Set<byte[]> getInvalid() {
return this.profilesInvalidCrawls.keySet(); return this.profilesInvalidCrawls.keySet();
} }
@ -165,7 +166,7 @@ public final class CrawlSwitchboard {
if (profileKey == null) return; if (profileKey == null) return;
this.profilesActiveCrawls.remove(profileKey); this.profilesActiveCrawls.remove(profileKey);
} }
public void removeInvalid(final byte[] profileKey) { public void removeInvalid(final byte[] profileKey) {
if (profileKey == null) return; if (profileKey == null) return;
this.profilesInvalidCrawls.remove(profileKey); this.profilesInvalidCrawls.remove(profileKey);
@ -179,7 +180,7 @@ public final class CrawlSwitchboard {
public void putActive(final byte[] profileKey, final CrawlProfile profile) { public void putActive(final byte[] profileKey, final CrawlProfile profile) {
this.profilesActiveCrawls.put(profileKey, profile); this.profilesActiveCrawls.put(profileKey, profile);
} }
public void putInvalid(final byte[] profileKey, final CrawlProfile profile) { public void putInvalid(final byte[] profileKey, final CrawlProfile profile) {
this.profilesInvalidCrawls.put(profileKey, profile); this.profilesInvalidCrawls.put(profileKey, profile);
} }
@ -227,7 +228,10 @@ public final class CrawlSwitchboard {
if (this.defaultProxyProfile == null) { if (this.defaultProxyProfile == null) {
// generate new default entry for proxy crawling // generate new default entry for proxy crawling
this.defaultProxyProfile = new CrawlProfile( this.defaultProxyProfile = new CrawlProfile(
"proxy", null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "proxy", null,
CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
"",
0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/, 0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, false, CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, false,
true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/, true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/,
@ -239,38 +243,38 @@ public final class CrawlSwitchboard {
} }
if (this.defaultRemoteProfile == null) { if (this.defaultRemoteProfile == null) {
// generate new default entry for remote crawling // generate new default entry for remote crawling
this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0, this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", CrawlProfile.MATCH_NEVER, 0,
-1, -1, true, true, true, false, false, true, true, false, CacheStrategy.IFFRESH); -1, -1, true, true, true, false, false, true, true, false, CacheStrategy.IFFRESH);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultRemoteProfile.handle()), this.defaultRemoteProfile); this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultRemoteProfile.handle()), this.defaultRemoteProfile);
} }
if (this.defaultTextSnippetLocalProfile == null) { if (this.defaultTextSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling // generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0, this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST); CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()), this.defaultTextSnippetLocalProfile); this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()), this.defaultTextSnippetLocalProfile);
} }
if (this.defaultTextSnippetGlobalProfile == null) { if (this.defaultTextSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling // generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0, this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, true, false, true, true, false, CacheStrategy.IFEXIST); CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), this.defaultTextSnippetGlobalProfile); this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), this.defaultTextSnippetGlobalProfile);
} }
this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST); this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST);
if (this.defaultMediaSnippetLocalProfile == null) { if (this.defaultMediaSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling // generate new default entry for snippet fetch and optional crawling
this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0, this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST); CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), this.defaultMediaSnippetLocalProfile); this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), this.defaultMediaSnippetLocalProfile);
} }
if (this.defaultMediaSnippetGlobalProfile == null) { if (this.defaultMediaSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling // generate new default entry for snippet fetch and optional crawling
this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0, this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, true, false, true, true, false, true, true, false, CacheStrategy.IFEXIST); CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, true, false, true, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), this.defaultMediaSnippetGlobalProfile); this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), this.defaultMediaSnippetGlobalProfile);
} }
if (this.defaultSurrogateProfile == null) { if (this.defaultSurrogateProfile == null) {
// generate new default entry for surrogate parsing // generate new default entry for surrogate parsing
this.defaultSurrogateProfile = new CrawlProfile(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0, this.defaultSurrogateProfile = new CrawlProfile(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, true, true, false, false, false, true, true, false, CacheStrategy.NOCACHE); CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, true, true, false, false, false, true, true, false, CacheStrategy.NOCACHE);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultSurrogateProfile.handle()), this.defaultSurrogateProfile); this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultSurrogateProfile.handle()), this.defaultSurrogateProfile);
} }
@ -324,8 +328,8 @@ public final class CrawlSwitchboard {
((MapHeap) this.profilesInvalidCrawls).close(); ((MapHeap) this.profilesInvalidCrawls).close();
((MapHeap) this.profilesPassiveCrawls).close(); ((MapHeap) this.profilesPassiveCrawls).close();
} }
/** /**
* Loads crawl profiles from a DB file. * Loads crawl profiles from a DB file.
* @param file DB file * @param file DB file

Loading…
Cancel
Save