fix for file urls

pull/1/head
orbiter 11 years ago
parent 08409ec680
commit 4b06adb751

@ -225,12 +225,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
// no host given
this.path = h.substring(2); // "/path" or "/c:/path"
} else if (h.startsWith("//")) { // "//host/path" or "//host/c:/path"
int q = url.indexOf('/', p + 3);
if (q < 0) {
this.path = "/";
} else {
this.path = url.substring(q);
}
this.path = h.substring(2); // "/path" or "/c:/path"
} else if (h.startsWith("/")) { // "/host/path" or "/host/c:/path"
this.path = h;
}
@ -2196,6 +2191,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
*/
public static void main(final String[] args) {
final String[][] test = new String[][]{
new String[]{null, "file://Z:\\"},
new String[]{null, "https://www.example.com/shoe/?p=2&ps=75#t={%22san_NaviPaging%22:2}"}, // ugly strange pagination link
new String[]{null, "C:WINDOWS\\CMD0.EXE"},
new String[]{null, "file://C:WINDOWS\\CMD0.EXE"},

@ -116,9 +116,9 @@ public class FileLoader {
// only the metadata is returned
if (parserError != null) {
this.log.info("No parser available in File crawler: '" + parserError + "' for URL " + request.url().toString() + ": parsing only metadata");
this.log.info("No parser available in File crawler: '" + parserError + "' for URL " + request.url().toNormalform(false) + ": parsing only metadata");
} else {
this.log.info("Too big file in File crawler with size = " + size + " Bytes for URL " + request.url().toString() + ": parsing only metadata");
this.log.info("Too big file in File crawler with size = " + size + " Bytes for URL " + request.url().toNormalform(false) + ": parsing only metadata");
}
// create response with metadata only

@ -164,7 +164,7 @@ public class RobotsTxt {
try {
response = RobotsTxt.this.loader.load(request, CacheStrategy.NOCACHE, null, agent);
} catch (final Throwable e) {
log.info("Trying to download the robots.txt file from URL '" + robotsURL + "' failed - " + e.getMessage());
log.info("Trying to download the robots.txt file from URL '" + robotsURL.toNormalform(false) + "' failed - " + e.getMessage());
response = null;
}
}

Loading…
Cancel
Save