Crawl from local file : faster task end when manually terminating crawl.

pull/88/head
luccioman 8 years ago
parent 4c67ed3f8d
commit db3b9db9c2

@ -376,13 +376,13 @@ public class yacysearchitem {
* is null * is null
*/ */
private static String processFaviconURL(final boolean authenticated, DigestURL faviconURL) { private static String processFaviconURL(final boolean authenticated, DigestURL faviconURL) {
final String iconUrlExt = MultiProtocolURL.getFileExtension(faviconURL.getFileName());
/* Image format ouput for ViewFavicon servlet : default is png, except with gif and svg icons */
final String viewFaviconExt = !iconUrlExt.isEmpty() && ImageViewer.isBrowserRendered(iconUrlExt) ? iconUrlExt : "png";
/* Only use licence code for non authentified users. For authenticated users licence would never be released and would unnecessarily fill URLLicense.permissions. */ /* Only use licence code for non authentified users. For authenticated users licence would never be released and would unnecessarily fill URLLicense.permissions. */
StringBuilder contentFaviconURL = new StringBuilder(); StringBuilder contentFaviconURL = new StringBuilder();
if (faviconURL != null) { if (faviconURL != null) {
final String iconUrlExt = MultiProtocolURL.getFileExtension(faviconURL.getFileName());
/* Image format ouput for ViewFavicon servlet : default is png, except with gif and svg icons */
final String viewFaviconExt = !iconUrlExt.isEmpty() && ImageViewer.isBrowserRendered(iconUrlExt) ? iconUrlExt : "png";
contentFaviconURL.append("ViewFavicon.").append(viewFaviconExt).append("?maxwidth=16&maxheight=16&isStatic=true&quadratic"); contentFaviconURL.append("ViewFavicon.").append(viewFaviconExt).append("?maxwidth=16&maxheight=16&isStatic=true&quadratic");
if (authenticated) { if (authenticated) {
contentFaviconURL.append("&url=").append(faviconURL.toNormalform(true)); contentFaviconURL.append("&url=").append(faviconURL.toNormalform(true));

@ -172,6 +172,7 @@ public final class CrawlStacker {
* @param hyperlinks crawl starting points links to stack * @param hyperlinks crawl starting points links to stack
* @param replace Specify whether old indexed entries should be replaced * @param replace Specify whether old indexed entries should be replaced
* @param timezoneOffset local time-zone offset * @param timezoneOffset local time-zone offset
* @throws IllegalCrawlProfileException when the crawl profile is not active
*/ */
public void enqueueEntries( public void enqueueEntries(
final byte[] initiator, final byte[] initiator,
@ -189,8 +190,9 @@ public final class CrawlStacker {
} else { } else {
error = "Rejected " + hyperlinks.size() + " crawl entries. Reason : LOST STACKER PROFILE HANDLE '" + profileHandle + "'"; error = "Rejected " + hyperlinks.size() + " crawl entries. Reason : LOST STACKER PROFILE HANDLE '" + profileHandle + "'";
} }
CrawlStacker.log.info(error); // this is NOT an error but a normal effect when terminating a crawl queue CrawlStacker.log.info(error); // this is NOT an error but a normal behavior when terminating a crawl queue
return; /* Throw an exception to signal caller it can stop stacking URLs using this crawl profile */
throw new IllegalCrawlProfileException("Profile " + profileHandle + " is no more active");
} }
if (replace) { if (replace) {
// delete old entries, if exists to force a re-load of the url (thats wanted here) // delete old entries, if exists to force a re-load of the url (thats wanted here)

@ -156,9 +156,17 @@ public class FileCrawlStarterTask extends Thread {
writer.close(); writer.close();
} catch (IOException e) { } catch (IOException e) {
log.severe("Error parsing the crawlingFile " + this.crawlingFile.getAbsolutePath(), e); log.severe("Error parsing the crawlingFile " + this.crawlingFile.getAbsolutePath(), e);
} catch (Throwable t) { } catch (IllegalCrawlProfileException e) {
/* Other errors are likely to occur when the crawl is interrupted : still log this at warning level to avoid polluting regular error log level */ /* We should get here when the crawl is stopped manually before termination */
log.warn("Error parsing the crawlingFile " + this.crawlingFile.getAbsolutePath(), t); log.info("Parsing crawlingFile " + this.crawlingFile.getAbsolutePath() + " terminated. Crawl profile "
+ this.profile.handle() + " is no more active.");
} catch (Exception e) {
/*
* Other errors are likely to occur when the crawl is interrupted :
* still log this at warning level to avoid polluting regular error
* log level
*/
log.warn("Error parsing the crawlingFile " + this.crawlingFile.getAbsolutePath(), e);
} finally { } finally {
if (inStream != null) { if (inStream != null) {
try { try {

@ -0,0 +1,50 @@
// IllegalCrawlProfileException.java
// Copyright 2016 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.crawler;
import net.yacy.crawler.data.CrawlProfile;
/**
* Exception used to signal that an operation is trying to use an inactive or deleted {@link CrawlProfile}.
* @author luccioman
*
*/
public class IllegalCrawlProfileException extends RuntimeException {
/** Generated serial ID */
private static final long serialVersionUID = 8482302347823257958L;
/**
* Default constructor : use a generic message
*/
public IllegalCrawlProfileException() {
super("Crawl profile can not be used");
}
/**
* @param message detail message
*/
public IllegalCrawlProfileException(String message) {
super(message);
}
}
Loading…
Cancel
Save