Adjusted hash computation and toNormalform for file:// protocol to deliver

same hash same file on Windows filesystem path with forward- and backslash in path.
Background see http://mantis.tokeek.de/view.php?id=671
+Test case
pull/65/head
reger 9 years ago
parent f0f38a4a94
commit 87fcfc6d78

@ -244,11 +244,12 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
// find rootpath
int rootpathStart = 0;
int rootpathEnd = this.path.length() - 1;
if (!this.path.isEmpty() && this.path.charAt(0) == '/')
if (!this.path.isEmpty() && (this.path.charAt(0) == '/' || this.path.charAt(0) == '\\'))
rootpathStart = 1;
if (this.path.endsWith("/"))
rootpathEnd = this.path.length() - 2;
p = this.path.indexOf('/', rootpathStart);
if (this.isFile() && p < 0) p = this.path.indexOf('\\', rootpathStart); // double-check for windows path (if it's a file url)
String rootpath = "";
if (p > 0 && p < rootpathEnd) {
rootpath = this.path.substring(rootpathStart, p);
@ -264,7 +265,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
final StringBuilder hashs = new StringBuilder(12);
assert hashs.length() == 0;
// form the 'local' part of the hash
final String normalform = toNormalform(true, true);
final String normalform = toNormalform(true, true); // normalizes also Windows backslash in path to '/' for file url
final String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform));
if (b64l.length() < 5) return null;
hashs.append(b64l.substring(0, 5)); // 5 chars

@ -832,7 +832,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
}
/**
* Get extension out of a filename
* Get extension out of a filename in lowercase
* cuts off query part
* @param fileName
* @return extension or ""
@ -1064,8 +1064,14 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
return toNormalform(excludeAnchor, false);
}
/**
* Generates a normal form of the URL.
* For file: url it normalizes also path delimiter to be '/' (replace possible Windows '\'
* @param excludeAnchor
* @param removeSessionID
* @return
*/
public String toNormalform(final boolean excludeAnchor, final boolean removeSessionID) {
// generates a normal form of the URL
boolean defaultPort = false;
if (this.protocol.equals("mailto")) {
return this.protocol + ":" + this.userInfo + "@" + this.host;
@ -1096,6 +1102,9 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
u.append(":");
u.append(this.port);
}
if (isFile() && urlPath.indexOf('\\') >= 0) { // normalize windows backslash (important for hash computation)
urlPath = urlPath.replace('\\', '/');
}
u.append(urlPath);
String result = u.toString();

@ -2,6 +2,7 @@ package net.yacy.cora.document.id;
import java.net.MalformedURLException;
import junit.framework.TestCase;
import net.yacy.cora.document.encoding.ASCII;
import org.junit.Test;
public class DigestURLTest extends TestCase {
@ -30,4 +31,23 @@ public class DigestURLTest extends TestCase {
}
}
/**
* Test hash() of DigestURL and File protocol to deliver same hash for
* allowed Windows or Java notation of same file
*/
@Test
public void testHash_ForFile() throws MalformedURLException {
String winUrlStr = "file:///C:\\tmp\\test.html"; // allowed Windows notation
String javaUrlStr = "file:///C:/tmp/test.html"; // allowed Java notation for Windows file system
DigestURL winUrl = new DigestURL(winUrlStr);
DigestURL javaUrl = new DigestURL(javaUrlStr);
String winHashResult = ASCII.String(winUrl.hash());
String javaHashResult = ASCII.String(javaUrl.hash());
assertEquals("hash for same file url", javaHashResult, winHashResult);
}
}

Loading…
Cancel
Save