diff --git a/source/de/anomic/crawler/retrieval/SMBLoader.java b/source/de/anomic/crawler/retrieval/SMBLoader.java index d136461e6..51979b267 100644 --- a/source/de/anomic/crawler/retrieval/SMBLoader.java +++ b/source/de/anomic/crawler/retrieval/SMBLoader.java @@ -29,10 +29,16 @@ package de.anomic.crawler.retrieval; import java.io.IOException; import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Date; import java.util.List; +import jcifs.smb.SmbException; +import jcifs.smb.SmbFile; +import jcifs.smb.SmbFileInputStream; + import de.anomic.http.server.HeaderFramework; import de.anomic.http.server.RequestHeader; import de.anomic.http.server.ResponseHeader; @@ -72,8 +78,6 @@ public class SMBLoader { // process directories: transform them to html with meta robots=noindex (using the ftpc lib) if (url.isDirectory()) { - List list = new ArrayList(); - String u = url.toNormalform(true, true); String[] l = url.list(); if (l == null) { // this can only happen if there is no connection or the directory does not exist @@ -81,7 +85,16 @@ public class SMBLoader { sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, "directory listing not available. URL = " + request.url().toString()); throw new IOException("directory listing not available. URL = " + request.url().toString()); } - for (String s: l) list.add(u + s); + String u = url.toNormalform(true, true); + List list = new ArrayList(); + for (String s: l) { + if (!s.endsWith("/") && !s.endsWith("\\")) { + // check if this is a directory + SmbFile sf = new SmbFile(u + s); + if (sf.isDirectory()) s = s + "/"; + } + list.add(u + s); + } StringBuilder content = ftpc.dirhtml(u, null, null, null, list, true); @@ -147,5 +160,32 @@ public class SMBLoader { b); return response; } - + + public static void main(String[] args) { + //jcifs.Config.setProperty( "jcifs.netbios.wins", "192.168.1.220" ); + //NtlmPasswordAuthentication auth = new NtlmPasswordAuthentication("domain", "username", "password"); + SmbFileInputStream in; + try { + SmbFile sf = new SmbFile(args[0]); + if (sf.isDirectory()) { + String[] s = sf.list(); + for (String t: s) System.out.println(t); + } else { + in = new SmbFileInputStream(sf); + byte[] b = new byte[8192]; + int n; + while(( n = in.read( b )) > 0 ) { + System.out.write( b, 0, n ); + } + } + } catch (SmbException e) { + e.printStackTrace(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (UnknownHostException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } } diff --git a/source/de/anomic/net/ftpc.java b/source/de/anomic/net/ftpc.java index 81da73818..bf7b637aa 100644 --- a/source/de/anomic/net/ftpc.java +++ b/source/de/anomic/net/ftpc.java @@ -2665,7 +2665,7 @@ public class ftpc { entryInfo info; for (final String line : list) { info = parseListData(line); - if(info != null) { + if (info != null) { // with link nameStart = line.indexOf(info.name); page.append(line.substring(0, nameStart)); @@ -2674,9 +2674,11 @@ public class ftpc { if (line.length() > nameEnd) { page.append(line.substring(nameEnd)); } + } else if (line.startsWith("http://") || line.startsWith("ftp://") || line.startsWith("smb://")) { + page.append("" + line + ""); } else { - // raw - page.append(line); + // raw + page.append(line); } page.append('\n'); } diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 5b748bda7..ca1be28a0 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -1749,7 +1749,7 @@ public final class Switchboard extends serverSwitch { // process the next hyperlink nextUrl = nextEntry.getKey(); String u = nextUrl.toNormalform(true, true, true); - if (!(u.startsWith("http") || u.startsWith("ftp"))) continue; + if (!(u.startsWith("http") || u.startsWith("ftp") || u.startsWith("smb"))) continue; // enqueue the hyperlink into the pre-notice-url db try { crawlStacker.enqueueEntry(new Request( @@ -1829,13 +1829,13 @@ public final class Switchboard extends serverSwitch { if (condenser == null || document.indexingDenied()) { if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase); - addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, "unknown indexing process case" + processCase); + addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, "denied by rule in document"); return; } if (!queueEntry.profile().indexText() && !queueEntry.profile().indexMedia()) { if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase); - addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, "unknown indexing process case" + processCase); + addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, "denied by profile rule"); return; } diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java index e20844b33..c3a4c4583 100644 --- a/source/net/yacy/kelondro/data/meta/DigestURI.java +++ b/source/net/yacy/kelondro/data/meta/DigestURI.java @@ -1037,7 +1037,8 @@ public class DigestURI implements Serializable { */ public SmbFile getSmbFile() throws MalformedURLException { if (!isSMB()) throw new UnsupportedOperationException(); - return new SmbFile(this.toNormalform(false, true)); + String url = this.toNormalform(false, true); + return new SmbFile(url); } // some methods that let the DigestURI look like a java.io.File object @@ -1162,10 +1163,13 @@ public class DigestURI implements Serializable { public String[] list() { if (isFile()) return getFSFile().list(); if (isSMB()) try { - return getSmbFile().list(); - } catch (SmbException e) { - Log.logWarning("DigestURI", "SMB.list SmbException for " + this.toString() + ": " + e.getMessage()); - return null; + SmbFile sf = getSmbFile(); + try { + return sf.list(); + } catch (SmbException e) { + Log.logWarning("DigestURI", "SMB.list SmbException for " + sf.toString() + ": " + e.getMessage()); + return null; + } } catch (MalformedURLException e) { Log.logWarning("DigestURI", "SMB.list MalformedURLException for " + this.toString() + ": " + e.getMessage()); return null;