From f51bad8ae560358f3ed2f1fb52a8fda49f385c79 Mon Sep 17 00:00:00 2001 From: danielr Date: Sat, 15 Mar 2008 21:57:55 +0000 Subject: [PATCH] FTP: - report connection status (to break if no connection possible) - fixed isFolder() - additional error output - fixed paths with encoded symbols (ie. a%20file.txt) - refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4567 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/net/ftpc.java | 149 +++++++++++------ .../plasma/crawler/plasmaFTPLoader.java | 155 +++++++++++------- 2 files changed, 187 insertions(+), 117 deletions(-) diff --git a/source/de/anomic/net/ftpc.java b/source/de/anomic/net/ftpc.java index 79bca8a59..e9dbe8e2d 100644 --- a/source/de/anomic/net/ftpc.java +++ b/source/de/anomic/net/ftpc.java @@ -249,11 +249,11 @@ public class ftpc { .booleanValue()); } catch (final InvocationTargetException e) { if (e.getMessage() == null) { - } else if (ControlSocket == null) { + } else if (notConnected()) { // the error was probably caused because there is no // connection errPrintln("not connected. no effect."); - e.printStackTrace(); + e.printStackTrace(err); return ret; } else { errPrintln("ftp internal exception: target exception " + e); @@ -266,7 +266,7 @@ public class ftpc { // consider first that the user attempted to execute a java // command from // the current path; either local or remote - if (ControlSocket == null) { + if (notConnected()) { // try a local exec try { javaexec(cmd); @@ -470,7 +470,7 @@ public class ftpc { errPrintln("Syntax: CD "); return true; } - if (ControlSocket == null) { + if (notConnected()) { return LCD(); } try { @@ -531,7 +531,7 @@ public class ftpc { errPrintln("Syntax: DEL "); return true; } - if (ControlSocket == null) { + if (notConnected()) { return LDEL(); } try { @@ -551,7 +551,7 @@ public class ftpc { errPrintln("Syntax: DIR [|]"); return true; } - if (ControlSocket == null) { + if (notConnected()) { return LDIR(); } try { @@ -575,18 +575,21 @@ public class ftpc { } catch (final IOException e) { errPrintln("Connection to server lost."); } - ControlSocket = null; - DataSocketActive = null; - DataSocketPassive = null; - clientInput = null; - clientOutput = null; + try { + closeConnection(); + } catch (final IOException e) { + ControlSocket = null; + DataSocketActive = null; + DataSocketPassive = null; + clientInput = null; + clientOutput = null; + } prompt = "ftp [local]>"; return true; } private String quit() throws IOException { - // send delete command send("QUIT"); // read status reply @@ -595,24 +598,7 @@ public class ftpc { throw new IOException(reply); } - // cleanup - if (ControlSocket != null) { - clientOutput.close(); - clientInput.close(); - ControlSocket.close(); - ControlSocket = null; - } - - if (DataSocketActive != null) { - DataSocketActive.close(); - DataSocketActive = null; - } - if (DataSocketPassive != null) { - DataSocketPassive.close(); - DataSocketPassive = null; // "Once a socket has been closed, it is - // not available for further networking - // use" - } + closeConnection(); return reply; } @@ -633,8 +619,8 @@ public class ftpc { final File local = absoluteLocalFile(localFilename); if (local.exists()) { - errPrintln("Error: local file " + local.toString() + " already exists."); - errPrintln(logPrefix + " File " + remote + " not retrieved. Local file unchanged."); + errPrintln("Error: local file " + local.toString() + " already exists.\n" + " File " + remote + + " not retrieved. Local file unchanged."); } else { if (withoutLocalFile) { retrieveFilesRecursively(remote, false); @@ -732,7 +718,8 @@ public class ftpc { } // check if we actually changed into the folder final String changedPath = pwd(); - if (!(changedPath.equals(path) || changedPath.equals(currentFolder + "/" + path))) { + if (!(changedPath.equals(path) || changedPath.equals(currentFolder + + (currentFolder.endsWith("/") ? "" : "/") + path))) { throw new IOException("folder is '" + changedPath + "' should be '" + path + "'"); } // return to last folder @@ -1172,7 +1159,7 @@ public class ftpc { final String dateString = tokens.group(3) + " " + tokens.group(4) + " " + year + " " + time; try { date = lsDateFormat.parse(dateString); - } catch (ParseException e) { + } catch (final ParseException e) { errPrintln(logPrefix + "---- Error: not ls date-format '" + dateString + "': " + e.getMessage()); date = new Date(); } @@ -1391,7 +1378,7 @@ public class ftpc { errPrintln("Syntax: LS [|]"); return true; } - if (ControlSocket == null) { + if (notConnected()) { return LLS(); } try { @@ -1415,7 +1402,6 @@ public class ftpc { outPrintln("---- v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v"); for (final String element : list) { outPrintln(element); - outPrintln("--> " + parseListData(element)); } outPrintln("---- ^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^"); } @@ -1477,7 +1463,7 @@ public class ftpc { errPrintln("Syntax: MKDIR "); return true; } - if (ControlSocket == null) { + if (notConnected()) { return LMKDIR(); } try { @@ -1545,7 +1531,7 @@ public class ftpc { errPrintln("Syntax: MV "); return true; } - if (ControlSocket == null) { + if (notConnected()) { return LMV(); } try { @@ -1604,7 +1590,7 @@ public class ftpc { outPrintln("---- Connection to " + cmd[1] + " established."); prompt = "ftp [" + cmd[1] + "]>"; } catch (final IOException e) { - errPrintln("Error: connecting " + cmd[1] + " on port " + port + " failed."); + errPrintln("Error: connecting " + cmd[1] + " on port " + port + " failed: " + e.getMessage()); } return true; } @@ -1614,17 +1600,56 @@ public class ftpc { exec("close", false); // close any existing connections first } - ControlSocket = new Socket(host, port); - ControlSocket.setSoTimeout(getTimeout()); - clientInput = new BufferedReader(new InputStreamReader(ControlSocket.getInputStream())); - clientOutput = new DataOutputStream(new BufferedOutputStream(ControlSocket.getOutputStream())); + try { + ControlSocket = new Socket(host, port); + ControlSocket.setSoTimeout(getTimeout()); + clientInput = new BufferedReader(new InputStreamReader(ControlSocket.getInputStream())); + clientOutput = new DataOutputStream(new BufferedOutputStream(ControlSocket.getOutputStream())); + + // read and return server message + this.host = host; + this.port = port; + remotemessage = receive(); + if ((remotemessage != null) && (remotemessage.length() > 3)) { + remotemessage = remotemessage.substring(4); + } + } catch (final IOException e) { + // if a connection was opened, it should not be used + closeConnection(); + throw new IOException(e); + } + } + + /** + * @return + */ + public boolean notConnected() { + return ControlSocket == null; + } - // read and return server message - this.host = host; - this.port = port; - remotemessage = receive(); - if ((remotemessage != null) && (remotemessage.length() > 3)) { - remotemessage = remotemessage.substring(4); + /** + * close all sockets + * + * @throws IOException + */ + private void closeConnection() throws IOException { + // cleanup + if (ControlSocket != null) { + clientOutput.close(); + clientInput.close(); + ControlSocket.close(); + ControlSocket = null; + } + + if (DataSocketActive != null) { + DataSocketActive.close(); + DataSocketActive = null; + } + if (DataSocketPassive != null) { + DataSocketPassive.close(); + DataSocketPassive = null; // "Once a socket has been closed, it is + // not available for further networking + // use" } } @@ -1658,7 +1683,7 @@ public class ftpc { errPrintln("Syntax: PWD (no parameter)"); return true; } - if (ControlSocket == null) { + if (notConnected()) { return LPWD(); } try { @@ -1701,7 +1726,7 @@ public class ftpc { errPrintln("Syntax: RMDIR "); return true; } - if (ControlSocket == null) { + if (notConnected()) { return LRMDIR(); } try { @@ -1713,7 +1738,7 @@ public class ftpc { } public boolean QUIT() { - if (ControlSocket != null) { + if (!notConnected()) { exec("close", false); } return false; @@ -1780,7 +1805,7 @@ public class ftpc { login(cmd[1], cmd[2]); outPrintln("---- Granted access for user " + cmd[1] + "."); } catch (final IOException e) { - errPrintln("Error: authorization of user " + cmd[1] + " failed."); + errPrintln("Error: authorization of user " + cmd[1] + " failed: " + e.getMessage()); } return true; } @@ -2354,6 +2379,7 @@ public class ftpc { * @throws IOException */ private void login(final String account, final String password) throws IOException { + unsetLoginData(); // send user name send("USER " + account); @@ -2380,6 +2406,15 @@ public class ftpc { setLoginData(account, password, reply); } + /** + * we are authorized to use the server + * + * @return + */ + public boolean isLoggedIn() { + return (account != null && password != null && remotegreeting != null); + } + /** * remember username and password which were used to login * @@ -2394,6 +2429,12 @@ public class ftpc { remotegreeting = reply; } + private void unsetLoginData() { + account = null; + password = null; + remotegreeting = null; + } + public void sys() throws IOException { // send system command send("SYST"); @@ -2451,7 +2492,7 @@ public class ftpc { * @param timeout * in seconds, 0 = infinite */ - public void setDataSocketTimeout(int timeout) { + public void setDataSocketTimeout(final int timeout) { DataSocketTimeout = timeout; try { diff --git a/source/de/anomic/plasma/crawler/plasmaFTPLoader.java b/source/de/anomic/plasma/crawler/plasmaFTPLoader.java index 7ff74f6e9..e798ec639 100644 --- a/source/de/anomic/plasma/crawler/plasmaFTPLoader.java +++ b/source/de/anomic/plasma/crawler/plasmaFTPLoader.java @@ -90,7 +90,11 @@ public class plasmaFTPLoader { */ public plasmaHTCache.Entry load(final plasmaCrawlEntry entry) { final yacyURL entryUrl = entry.url(); - final String fullPath = entryUrl.getPath(); + final String fullPath = getPath(entryUrl); + final File cacheFile = createCachefile(entryUrl); + + // the return value + plasmaHTCache.Entry htCache = null; // determine filename and path String file, path; @@ -113,72 +117,78 @@ public class plasmaFTPLoader { final ByteArrayOutputStream berr = new ByteArrayOutputStream(); final ftpc ftpClient = createFTPClient(berr); - plasmaHTCache.Entry htCache = null; - try { - openConnection(ftpClient, entryUrl); - - // testing if the specified file is a directory - if (file.length() > 0) { - ftpClient.exec("cd \"" + path + "\"", false); - - // testing if the current name is a directoy - final boolean isFolder = ftpClient.isFolder(file); - if (isFolder) { - path = fullPath + "/"; - file = ""; + if (openConnection(ftpClient, entryUrl)) { + // ftp stuff + try { + // testing if the specified file is a directory + if (file.length() > 0) { + ftpClient.exec("cd \"" + path + "\"", false); + + final boolean isFolder = ftpClient.isFolder(file); + if (isFolder) { + path = fullPath + "/"; + file = ""; + } } - } - - // creating a cache file object - final File cacheFile = plasmaHTCache.getCachePath(entryUrl); - - // TODO: invalid file path check - - // testing if the file already exists - if (cacheFile.isFile()) { - // delete the file if it already exists - plasmaHTCache.deleteURLfromCache(entryUrl); - } else { - // create parent directories - cacheFile.getParentFile().mkdirs(); - } - if (file.length() == 0) { - // directory -> get list of files - // create a htcache entry - htCache = createCacheEntry(entry, "text/html", new Date()); - if (!generateDirlist(ftpClient, entry, path, cacheFile)) { - htCache = null; - } - } else { - // file -> download - try { - htCache = getFile(ftpClient, entry, cacheFile); - } catch (final Exception e) { + if (file.length() == 0) { + // directory -> get list of files + // create a htcache entry + htCache = createCacheEntry(entry, "text/html", new Date()); + if (!generateDirlist(ftpClient, entry, path, cacheFile)) { + htCache = null; + } + } else { + // file -> download + try { + htCache = getFile(ftpClient, entry, cacheFile); + } catch (final Exception e) { + // add message to errorLog + (new PrintStream(berr)).print(e.getMessage()); + } } + } finally { + closeConnection(ftpClient); } + } - // pass the downloaded resource to the cache manager - if (berr.size() > 0 || htCache == null) { - // some error logging - final String detail = (berr.size() > 0) ? "\n Errorlog: " + berr.toString() : ""; - log.logWarning("Unable to download URL " + entry.url().toString() + detail); - sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1, - plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR); + // pass the downloaded resource to the cache manager + if (berr.size() > 0 || htCache == null) { + // some error logging + final String detail = (berr.size() > 0) ? "\n Errorlog: " + berr.toString() : ""; + log.logWarning("Unable to download URL " + entry.url().toString() + detail); + sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1, plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR); - // an error has occured. cleanup - if (cacheFile.exists()) { - cacheFile.delete(); - } - } else { - // announce the file - plasmaHTCache.writeFileAnnouncement(cacheFile); + // an error has occured. cleanup + if (cacheFile.exists()) { + cacheFile.delete(); } + } else { + // announce the file + plasmaHTCache.writeFileAnnouncement(cacheFile); + } - return htCache; - } finally { - closeConnection(ftpClient); + return htCache; + } + + /** + * creating a cache file object + * + * @param entryUrl + * @return + */ + private File createCachefile(final yacyURL entryUrl) { + final File cacheFile = plasmaHTCache.getCachePath(entryUrl); + + // testing if the file already exists + if (cacheFile.isFile()) { + // delete the file if it already exists + plasmaHTCache.deleteURLfromCache(entryUrl); + } else { + // create parent directories + cacheFile.getParentFile().mkdirs(); } + return cacheFile; } /** @@ -196,8 +206,9 @@ public class plasmaFTPLoader { * @param ftpClient * @param host * @param port + * @return success */ - private void openConnection(final ftpc ftpClient, final yacyURL entryUrl) { + private boolean openConnection(final ftpc ftpClient, final yacyURL entryUrl) { // get username and password final String userInfo = entryUrl.getUserInfo(); String userName = "anonymous", userPwd = "anonymous"; @@ -218,12 +229,20 @@ public class plasmaFTPLoader { } else { ftpClient.exec("open " + host + " " + port, false); } + if (ftpClient.notConnected()) { + return false; + } // login to the server ftpClient.exec("user " + userName + " " + userPwd, false); - // change transfer mode to binary - ftpClient.exec("binary", false); + if (ftpClient.isLoggedIn()) { + // change transfer mode to binary + ftpClient.exec("binary", false); + } else { + return false; + } + return true; } /** @@ -240,7 +259,7 @@ public class plasmaFTPLoader { final yacyURL entryUrl = entry.url(); final String extension = plasmaParser.getFileExt(entryUrl); final String mimeType = plasmaParser.getMimeTypeByFileExt(extension); - final String path = entryUrl.getPath(); + final String path = getPath(entryUrl); // if the mimetype and file extension is supported we start to download // the file @@ -264,7 +283,7 @@ public class plasmaFTPLoader { log.logInfo("REJECTED TOO BIG FILE with size " + size + " Bytes for URL " + entry.url().toString()); sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1, plasmaCrawlEURL.DENIED_FILESIZE_LIMIT_EXCEEDED); - throw new Exception("filesize too big: " + size + " bytes"); + throw new Exception("file size exceeds limit"); } } else { // if the response has not the right file type then reject file @@ -275,6 +294,16 @@ public class plasmaFTPLoader { return htCache; } + /** + * gets path suitable for FTP (url-decoded, double-quotes escaped) + * + * @param entryUrl + * @return + */ + private String getPath(final yacyURL entryUrl) { + return yacyURL.unescape(entryUrl.getPath()).replace("\"", "\"\""); + } + /** * @param ftpClient * @param entry