improve url hash computation for file path with mixed java & windows

file.separator to compute equal hashes (by normalizing path for computation)
+ expand test case for to check mixed java / windows file url notation
like e.g. file:///c:/test/file.html vs. file:///c:\test/file.html
- relates partially to http://mantis.tokeek.de/view.php?id=692
pull/77/head
reger 9 years ago
parent bac302bfe4
commit 6f8c3ccea4

@ -242,17 +242,24 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
}
// find rootpath
final String normalizedPath;
if (this.isFile() && this.path.indexOf('\\') > -1) // for file protocol normalize path to java notation
normalizedPath = this.path.replace('\\','/'); // replace possible Windows pathseparator
else
normalizedPath = this.path;
int rootpathStart = 0;
int rootpathEnd = this.path.length() - 1;
if (!this.path.isEmpty() && (this.path.charAt(0) == '/' || this.path.charAt(0) == '\\'))
int rootpathEnd = normalizedPath.length() - 1;
if (!normalizedPath.isEmpty() && (normalizedPath.charAt(0) == '/'))
rootpathStart = 1;
if (this.path.endsWith("/"))
rootpathEnd = this.path.length() - 2;
p = this.path.indexOf('/', rootpathStart);
if (this.isFile() && p < 0) p = this.path.indexOf('\\', rootpathStart); // double-check for windows path (if it's a file url)
if (normalizedPath.endsWith("/"))
rootpathEnd = normalizedPath.length() - 2;
p = normalizedPath.indexOf('/', rootpathStart);
// following doesn't recognize mixed notation e.g. c:\\tmp/test.html correct -> solved by using normalized path
//if (this.isFile() && p < 0) p = this.path.indexOf('\\', rootpathStart); // double-check for windows path (if it's a file url)
String rootpath = "";
if (p > 0 && p < rootpathEnd) {
rootpath = this.path.substring(rootpathStart, p);
rootpath = normalizedPath.substring(rootpathStart, p);
}
// we collected enough information to compute the fragments that are

@ -1,7 +1,10 @@
package net.yacy.cora.document.id;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Set;
import junit.framework.TestCase;
import static junit.framework.TestCase.assertEquals;
import net.yacy.cora.document.encoding.ASCII;
import org.junit.Test;
@ -37,16 +40,24 @@ public class DigestURLTest extends TestCase {
*/
@Test
public void testHash_ForFile() throws MalformedURLException {
String winUrlStr = "file:///C:\\tmp\\test.html"; // allowed Windows notation
String javaUrlStr = "file:///C:/tmp/test.html"; // allowed Java notation for Windows file system
DigestURL winUrl = new DigestURL(winUrlStr);
DigestURL javaUrl = new DigestURL(javaUrlStr);
// allowed Windows notation
Set<String> testUrls = new HashSet();
testUrls.add("file:///C:\\tmp\\test.html");
testUrls.add("file:///C:/tmp\\test.html");
testUrls.add("file:///C:\\tmp/test.html");
testUrls.add("file:///C:/tmp/test.html");
String winHashResult = ASCII.String(winUrl.hash());
DigestURL javaUrl = new DigestURL(javaUrlStr);
String javaHashResult = ASCII.String(javaUrl.hash());
assertEquals("hash for same file url", javaHashResult, winHashResult);
// compare test url hash to default java Url notation
for (String str : testUrls) {
DigestURL winUrl = new DigestURL(str);
String winHashResult = ASCII.String(winUrl.hash());
assertEquals("hash for same file url "+str, javaHashResult, winHashResult);
}
}

Loading…
Cancel
Save