fix for file urls

11 years ago · 4b06adb751
parent 08409ec680
commit 4b06adb751
3 changed files with 5 additions and 9 deletions
--- a/source/net/yacy/cora/document/id/MultiProtocolURL.java
+++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java
@ -225,12 +225,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
                    // no host given
                    this.path = h.substring(2); // "/path"  or "/c:/path"
                } else if (h.startsWith("//")) { // "//host/path" or "//host/c:/path"
-                    int q = url.indexOf('/', p + 3);
-                    if (q < 0) {
-                        this.path = "/";
-                    } else {
-                        this.path = url.substring(q);
-                    }
+                    this.path = h.substring(2); // "/path"  or "/c:/path"
                } else if (h.startsWith("/")) { // "/host/path" or "/host/c:/path"
                    this.path = h;
                }
@ -2196,6 +2191,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
 */
    public static void main(final String[] args) {
        final String[][] test = new String[][]{
+          new String[]{null, "file://Z:\\"},
          new String[]{null, "https://www.example.com/shoe/?p=2&ps=75#t={%22san_NaviPaging%22:2}"}, // ugly strange pagination link
          new String[]{null, "C:WINDOWS\\CMD0.EXE"},
          new String[]{null, "file://C:WINDOWS\\CMD0.EXE"},
--- a/source/net/yacy/crawler/retrieval/FileLoader.java
+++ b/source/net/yacy/crawler/retrieval/FileLoader.java
@ -116,9 +116,9 @@ public class FileLoader {
            // only the metadata is returned

            if (parserError != null) {
-                this.log.info("No parser available in File crawler: '" + parserError + "' for URL " + request.url().toString() + ": parsing only metadata");
+                this.log.info("No parser available in File crawler: '" + parserError + "' for URL " + request.url().toNormalform(false) + ": parsing only metadata");
            } else {
-                this.log.info("Too big file in File crawler with size = " + size + " Bytes for URL " + request.url().toString() + ": parsing only metadata");
+                this.log.info("Too big file in File crawler with size = " + size + " Bytes for URL " + request.url().toNormalform(false) + ": parsing only metadata");
            }

            // create response with metadata only
--- a/source/net/yacy/crawler/robots/RobotsTxt.java
+++ b/source/net/yacy/crawler/robots/RobotsTxt.java
@ -164,7 +164,7 @@ public class RobotsTxt {
                    try {
                        response = RobotsTxt.this.loader.load(request, CacheStrategy.NOCACHE, null, agent);
                    } catch (final Throwable e) {
-                        log.info("Trying to download the robots.txt file from URL '" + robotsURL + "' failed - " + e.getMessage());
+                        log.info("Trying to download the robots.txt file from URL '" + robotsURL.toNormalform(false) + "' failed - " + e.getMessage());
                        response = null;
                    }
                }