allow/recognize host in file: protocol crawl target

This is useful in intranet indexing while crawling a intranet file server accessed via hostname while e.g. under Windows mapped to different drive letters on individual clients.
Here you can crawl e.g.  file://fileserver/documents having a valid uri in that intranet environment (while e.g. P:/documents might be client dependant).
pull/1/head
reger 10 years ago
parent 77851fa53c
commit eda0aeaf26

@ -223,7 +223,14 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
// no host given
this.path = h.substring(2); // "/path" or "/c:/path"
} else if (h.startsWith("//")) { // "//host/path" or "//host/c:/path"
this.path = h.substring(2); // "/path" or "/c:/path"
int q = h.indexOf('/', 2);
if (q < 0) {
this.path = h.substring(2); // "path" or "c:/path"
} else {
this.host = h.substring(2, q ); // TODO: handle "c:" ?
if (this.host.equalsIgnoreCase(Domains.LOCALHOST)) this.host = null;
this.path = h.substring(q ); // "/path"
}
} else if (h.startsWith("/")) { // "/host/path" or "/host/c:/path"
this.path = h;
}
@ -1985,7 +1992,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
*/
public java.io.File getFSFile() throws MalformedURLException {
if (!isFile()) throw new MalformedURLException();
return new java.io.File(this.toNormalform(true).substring(7));
return new java.io.File(this.toNormalform(true).substring(5));
}
/**

Loading…
Cancel
Save