*) crawler/ftp/CrawlWorker.java: better errorhandling

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2503 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 7d7f30139c
commit b44514242a

@ -133,6 +133,8 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
String fullPath = this.url.getPath(); String fullPath = this.url.getPath();
int port = this.url.getPort(); int port = this.url.getPort();
plasmaHTCache.Entry htCache = null;
try {
// open a connection to the ftp server // open a connection to the ftp server
if (port == -1) { if (port == -1) {
ftpClient.exec("open " + host, false); ftpClient.exec("open " + host, false);
@ -140,22 +142,25 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
ftpClient.exec("open " + host + " " + port, false); ftpClient.exec("open " + host + " " + port, false);
} }
if (berr.size() > 0) { if (berr.size() > 0) {
this.log.logInfo("Unable to connect to ftp server " + this.url.getHost() + " hosting URL " + this.url.toString() + "\nErrorlog: " + berr.toString()); this.log.logWarning("Unable to connect to ftp server " + this.url.getHost() + " hosting URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_CONNECTION_ERROR); addURLtoErrorDB(plasmaCrawlEURL.DENIED_CONNECTION_ERROR);
return null;
} }
// login to the server // login to the server
ftpClient.exec("user " + userName + " " + userPwd, false); ftpClient.exec("user " + userName + " " + userPwd, false);
if (berr.size() > 0) { if (berr.size() > 0) {
this.log.logInfo("Unable to login to ftp server " + this.url.getHost() + " hosting URL " + this.url.toString() + "\nErrorlog: " + berr.toString()); this.log.logWarning("Unable to login to ftp server " + this.url.getHost() + " hosting URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_LOGIN_FAILED); addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_LOGIN_FAILED);
return null;
} }
// change transfer mode to binary // change transfer mode to binary
ftpClient.exec("binary", false); ftpClient.exec("binary", false);
if (berr.size() > 0) { if (berr.size() > 0) {
this.log.logInfo("Unable to set the file transfer mode to binary for URL " + this.url.toString() + "\nErrorlog: " + berr.toString()); this.log.logWarning("Unable to set the file transfer mode to binary for URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_TRASFER_MODE_PROBLEM); addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_TRASFER_MODE_PROBLEM);
return null;
} }
// determine filename and path // determine filename and path
@ -205,7 +210,6 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
String mimeType; String mimeType;
Date fileDate; Date fileDate;
plasmaHTCache.Entry htCache = null;
if (file.length() == 0) { if (file.length() == 0) {
// getting the dirlist // getting the dirlist
mimeType = "text/html"; mimeType = "text/html";
@ -217,11 +221,16 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
// generate the dirlist // generate the dirlist
StringBuffer dirList = ftpClient.dirhtml(fullPath); StringBuffer dirList = ftpClient.dirhtml(fullPath);
if (dirList != null && dirList.length() > 0) try {
// write it into a file // write it into a file
PrintWriter writer = new PrintWriter(new FileOutputStream(cacheFile),false); PrintWriter writer = new PrintWriter(new FileOutputStream(cacheFile),false);
writer.write(dirList.toString()); writer.write(dirList.toString());
writer.flush(); writer.flush();
writer.close(); writer.close();
} catch (Exception e) {
this.log.logInfo("Unable to write dirlist for URL " + this.url.toString());
htCache = null;
}
} else { } else {
// determine the mimetype of the resource // determine the mimetype of the resource
String extension = plasmaParser.getFileExt(this.url); String extension = plasmaParser.getFileExt(this.url);
@ -245,17 +254,14 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
// if the response has not the right file type then reject file // if the response has not the right file type then reject file
this.log.logInfo("REJECTED WRONG MIME/EXT TYPE " + mimeType + " for URL " + this.url.toString()); this.log.logInfo("REJECTED WRONG MIME/EXT TYPE " + mimeType + " for URL " + this.url.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT); addURLtoErrorDB(plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT);
return null;
} }
} }
// closing connection
ftpClient.exec("close", false);
ftpClient.exec("exit", false);
// pass the downloaded resource to the cache manager // pass the downloaded resource to the cache manager
if (berr.size() > 0 || htCache == null) { if (berr.size() > 0 || htCache == null) {
// if the response has not the right file type then reject file // if the response has not the right file type then reject file
this.log.logInfo("Unable to download URL " + this.url.toString() + "\nErrorlog: " + berr.toString()); this.log.logWarning("Unable to download URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR); addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR);
// an error has occured. cleanup // an error has occured. cleanup
@ -271,6 +277,11 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
} }
return htCache; return htCache;
} finally {
// closing connection
ftpClient.exec("close", false);
ftpClient.exec("exit", false);
}
} }

Loading…
Cancel
Save