*) crawler/ftp/CrawlWorker.java: better errorhandling

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2503 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 18 years ago
parent 7d7f30139c
commit b44514242a

@ -133,6 +133,8 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
String fullPath = this.url.getPath();
int port = this.url.getPort();
plasmaHTCache.Entry htCache = null;
try {
// open a connection to the ftp server
if (port == -1) {
ftpClient.exec("open " + host, false);
@ -140,22 +142,25 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
ftpClient.exec("open " + host + " " + port, false);
}
if (berr.size() > 0) {
this.log.logInfo("Unable to connect to ftp server " + this.url.getHost() + " hosting URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
this.log.logWarning("Unable to connect to ftp server " + this.url.getHost() + " hosting URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_CONNECTION_ERROR);
return null;
}
// login to the server
ftpClient.exec("user " + userName + " " + userPwd, false);
if (berr.size() > 0) {
this.log.logInfo("Unable to login to ftp server " + this.url.getHost() + " hosting URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
this.log.logWarning("Unable to login to ftp server " + this.url.getHost() + " hosting URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_LOGIN_FAILED);
return null;
}
// change transfer mode to binary
ftpClient.exec("binary", false);
if (berr.size() > 0) {
this.log.logInfo("Unable to set the file transfer mode to binary for URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
this.log.logWarning("Unable to set the file transfer mode to binary for URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_TRASFER_MODE_PROBLEM);
return null;
}
// determine filename and path
@ -205,7 +210,6 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
String mimeType;
Date fileDate;
plasmaHTCache.Entry htCache = null;
if (file.length() == 0) {
// getting the dirlist
mimeType = "text/html";
@ -217,11 +221,16 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
// generate the dirlist
StringBuffer dirList = ftpClient.dirhtml(fullPath);
if (dirList != null && dirList.length() > 0) try {
// write it into a file
PrintWriter writer = new PrintWriter(new FileOutputStream(cacheFile),false);
writer.write(dirList.toString());
writer.flush();
writer.close();
} catch (Exception e) {
this.log.logInfo("Unable to write dirlist for URL " + this.url.toString());
htCache = null;
}
} else {
// determine the mimetype of the resource
String extension = plasmaParser.getFileExt(this.url);
@ -245,17 +254,14 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
// if the response has not the right file type then reject file
this.log.logInfo("REJECTED WRONG MIME/EXT TYPE " + mimeType + " for URL " + this.url.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT);
return null;
}
}
// closing connection
ftpClient.exec("close", false);
ftpClient.exec("exit", false);
// pass the downloaded resource to the cache manager
if (berr.size() > 0 || htCache == null) {
// if the response has not the right file type then reject file
this.log.logInfo("Unable to download URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
this.log.logWarning("Unable to download URL " + this.url.toString() + "\nErrorlog: " + berr.toString());
addURLtoErrorDB(plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR);
// an error has occured. cleanup
@ -271,6 +277,11 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke
}
return htCache;
} finally {
// closing connection
ftpClient.exec("close", false);
ftpClient.exec("exit", false);
}
}

Loading…
Cancel
Save