delete crawl queue on init exception

(happens occasionally on path name vaiolation and will never get resolved)
pull/51/head
reger 9 years ago
parent f781b9dd47
commit 7789c32c82

@ -87,7 +87,7 @@ public class HostBalancer implements Balancer {
}
/**
* fills the queue with by scanning the hostsPath directory in a thread to
* fills the queue by scanning the hostsPath directory in a thread to
* return immediately (as large unfinished crawls may take longer to load)
*/
private void init() {
@ -106,7 +106,9 @@ public class HostBalancer implements Balancer {
queues.put(DigestURL.hosthash(queue.getHost(), queue.getPort()), queue);
}
} catch (MalformedURLException | RuntimeException e) {
log.warn("init error for " + hostsPath.getName() + " host=" + hoststr + " " + e.getLocalizedMessage());
log.warn("delete queue due to init error for " + hostsPath.getName() + " host=" + hoststr + " " + e.getLocalizedMessage());
// if exception thrown we can't init the queue, maybe due to name violation. That won't get better, delete it.
FileUtils.deletedelete(new File(hostsPath, hoststr));
}
}
}

@ -101,7 +101,11 @@ public class HostQueue implements Balancer {
this.port = Integer.parseInt(filename.substring(p + 1)); // consider "host.com" contains dot but no required port -> will throw exception
init();
}
/**
* Opens and initializes the host queue
* @throws MalformedURLException if directory for the host could not be created
*/
private final void init() throws MalformedURLException {
try {
if (this.hostName == null)

Loading…
Cancel
Save