in case that a crawl profile has a collection assigned, use the

collection to show a name in the web interface. This should prevent that
much too long names make the interface unusable.
pull/1/head
Michael Peter Christen 12 years ago
parent 0fe8be7981
commit 4a14122ba7

@ -114,7 +114,7 @@ public class CrawlProfileEditor_p {
for (final byte[] h : sb.crawler.getActive()) {
selentry = sb.crawler.getActive(h);
if (selentry != null && !CrawlProfile.ignoreNames.contains(selentry.name())) {
orderdHandles.put(selentry.name(), selentry.handle());
orderdHandles.put(selentry.collectionName(), selentry.handle());
}
}
@ -184,7 +184,7 @@ public class CrawlProfileEditor_p {
prop.put("edit", "0");
} else {
prop.put("edit", "1");
prop.put("edit_name", selentry.name());
prop.put("edit_name", selentry.collectionName());
prop.put("edit_handle", selentry.handle());
final Iterator<eentry> lit = labels.iterator();
count = 0;

@ -63,7 +63,7 @@ public class IndexCreateQueues_p {
final Pattern compiledPattern = Pattern.compile(deletepattern);
if (option == PROFILE) {
// search and delete the crawl profile (_much_ faster, independant of queue size)
// search and delete the crawl profile (_much_ faster, independent of queue size)
CrawlProfile entry;
for (final byte[] handle: sb.crawler.getActive()) {
entry = sb.crawler.getActive(handle);
@ -143,7 +143,7 @@ public class IndexCreateQueues_p {
profileHandle = request.profileHandle();
profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler_host_" + hc + "_list_" + count + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler_host_" + hc + "_list_" + count + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.collectionName()));
prop.put("crawler_host_" + hc + "_list_" + count + "_depth", request.depth());
prop.put("crawler_host_" + hc + "_list_" + count + "_modified", daydate(request.appdate()) );
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_anchor", request.name());

@ -482,12 +482,12 @@ public final class CrawlStacker {
if (maxAllowedPagesPerDomain < Integer.MAX_VALUE && maxAllowedPagesPerDomain > 0) {
final AtomicInteger dp = profile.getCount(url.getHost());
if (dp != null && dp.get() >= maxAllowedPagesPerDomain) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + profile.domMaxPages() + " is allowed.");
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + maxAllowedPagesPerDomain + " is allowed.");
return "crawl stack domain counter exceeded";
}
if (ResultURLs.domainCount(EventOrigin.LOCAL_CRAWLING, url.getHost()) >= maxAllowedPagesPerDomain) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in result stack, a maximum of " + profile.domMaxPages() + " is allowed.");
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in result stack, a maximum of " + maxAllowedPagesPerDomain + " is allowed.");
return "result stack domain counter exceeded";
}
}

@ -121,7 +121,7 @@ public final class CrawlSwitchboard {
CrawlProfile p;
try {
p = new CrawlProfile(this.profilesPassiveCrawls.get(handle));
Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.collectionName());
} catch ( final IOException e ) {
continue;
} catch ( final SpaceExceededException e ) {

@ -259,6 +259,15 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
return r;
}
/**
* create a name that takes the collection as name if this is not "user".
* @return the name of the collection if that is not "user" or the name() otherwise;
*/
public String collectionName() {
final String r = get(COLLECTIONS);
return r == null || r.length() == 0 || "user".equals(r) ? name() : r;
}
/**
* Gets the regex which must be matched by URLs in order to be crawled.
* @return regex which must be matched
@ -521,7 +530,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
final int domlistlength) {
prop.put(CRAWL_PROFILE_PREFIX + count + "_dark", dark ? "1" : "0");
prop.put(CRAWL_PROFILE_PREFIX + count + "_name", this.name());
prop.put(CRAWL_PROFILE_PREFIX + count + "_name", this.collectionName());
prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton", (!active || ignoreNames.contains(this.name())) ? "0" : "1");
prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", this.handle());
prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", (active) ? "0" : "1");

@ -538,7 +538,7 @@ public class Response {
// check profile
if (!profile().indexText() && !profile().indexMedia()) {
return "indexing not allowed - indexText and indexMedia not set (for proxy = " + this.profile.name()+ ")";
return "indexing not allowed - indexText and indexMedia not set (for proxy = " + this.profile.collectionName()+ ")";
}
// -CGI access in request
@ -683,7 +683,7 @@ public class Response {
// check profile
if (!profile().indexText() && !profile().indexMedia()) {
return "indexing not allowed - indexText and indexMedia not set (for crawler = " + this.profile.name() + ")";
return "indexing not allowed - indexText and indexMedia not set (for crawler = " + this.profile.collectionName() + ")";
}
// -CGI access in request

@ -1988,7 +1988,7 @@ public final class Switchboard extends serverSwitch
CrawlProfile selentry;
for ( final byte[] handle : this.crawler.getActive() ) {
selentry = this.crawler.getActive(handle);
assert selentry.handle() != null : "profile.name = " + selentry.name();
assert selentry.handle() != null : "profile.name = " + selentry.collectionName();
if ( selentry.handle() == null ) {
this.crawler.removeActive(handle);
continue;
@ -2583,7 +2583,7 @@ public final class Switchboard extends serverSwitch
"denied by profile rule, process case="
+ processCase
+ ", profile name = "
+ queueEntry.profile().name());
+ queueEntry.profile().collectionName());
return;
}

Loading…
Cancel
Save