*) URL.java: userinfo was not taken over when generating a new url from a base url and a rel. path

*) CrawlWorker.java: using new dirhtml function of ftpc

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2492 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 18 years ago
parent 17ba468165
commit 7930839594

@ -116,6 +116,7 @@ public class URL {
this.protocol = baseURL.protocol;
this.host = baseURL.host;
this.port = baseURL.port;
this.userInfo = baseURL.userInfo;
if (relPath.toLowerCase().startsWith("javascript:")) {
this.path = baseURL.path;
} else if (relPath.startsWith("/")) {

@ -51,7 +51,9 @@ import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.io.PrintWriter;
import de.anomic.net.URL;
import de.anomic.net.ftpc;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaSwitchboard;
@ -82,8 +84,7 @@ public class CrawlWorker extends AbstractCrawlWorker implements
public Entry load() throws IOException {
File cacheFile = cacheManager.getCachePath(url);
cacheFile.getParentFile().mkdirs();
ByteArrayOutputStream bout = new ByteArrayOutputStream();
PrintStream out = new PrintStream(bout);
@ -104,27 +105,61 @@ public class CrawlWorker extends AbstractCrawlWorker implements
userPwd = "anonymous";
}
String host = this.url.getHost();
String fullPath = this.url.getPath();
int port = this.url.getPort();
if (port == -1) {
ftpClient.exec("open " + this.url.getHost(), false);
} else {
ftpClient.exec("open " + this.url.getHost() + " " + port, false);
}
ftpClient.exec("user " + userName + " " + userPwd, false);
ftpClient.exec("binary", false);
// cd
String file = "";
String path = this.url.getPath();
int pos = path.lastIndexOf("/");
String file, path;
if (fullPath.endsWith("/")) {
file = "";
path = fullPath;
} else {
int pos = fullPath.lastIndexOf("/");
if (pos == -1) {
file = path;
file = fullPath;
path = "/";
} else {
file = path.substring(pos+1);
path = path.substring(0,pos);
path = fullPath.substring(0,pos+1);
file = fullPath.substring(pos+1);
}
}
if (file.length() > 0) {
ftpClient.exec("cd \"" + path + "\"", false);
if (ftpClient.isFolder(file)) {
ftpClient.exec("cd \"" + file + "\"", false);
// testing if the current name is a directoy
boolean isFolder = ftpClient.isFolder(file);
if (isFolder) {
fullPath = fullPath + "/";
file = "";
this.url = new URL(this.url,fullPath);
}
}
// creating a cache file object
File cacheFile = cacheManager.getCachePath(this.url);
cacheFile.getParentFile().mkdirs();
ftpClient.exec("cd \"" + fullPath + "\"", false);
if (file.length() == 0) {
// TODO: dirlist
// getting the dirlist
StringBuffer dirList = ftpClient.dirhtml(host, (port==-1)?21:port, fullPath, userName, userPwd);
// write it into a file
PrintWriter writer = new PrintWriter(cacheFile);
writer.write(dirList.toString());
writer.close();
} else {
// download the remote file
ftpClient.exec("get \"" + file + "\" \"" + cacheFile.getAbsolutePath() + "\"", false);
@ -133,6 +168,8 @@ public class CrawlWorker extends AbstractCrawlWorker implements
ftpClient.exec("close", false);
ftpClient.exec("exit", false);
// TODO: do mimetype detection based of file extension
// TODO: create a new htCache entry ....
return null;

Loading…
Cancel
Save