more ipv6 bugfixes

pull/1/head
Michael Peter Christen 11 years ago
parent 7817fc50c9
commit 9b1958e8ca

@ -287,7 +287,7 @@ public class HostBrowser {
// get all files for a specific host from the index
StringBuilder q = new StringBuilder();
if (host != null) q.append(CollectionSchema.host_s.getSolrFieldName()).append(':').append(host);
if (host != null) q.append(CollectionSchema.host_s.getSolrFieldName()).append(":\"").append(host).append("\"");
if (pathparts.length > 0 && pathparts[0].length() > 0) {
for (String pe: pathparts) {
if (pe.length() > 0) q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName()).append(":\"").append(pe).append('\"');

@ -230,7 +230,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
final int id = Domains.getDomainID(this.host, this.hostAddress); // id=7: tld is local
final boolean isHTTP = isHTTP();
int p = (this.host == null) ? -1 : this.host.lastIndexOf('.');
int p = (this.host == null || this.host.indexOf(':') >= 0) ? -1 : this.host.lastIndexOf('.');
String dom = (p > 0) ? dom = this.host.substring(0, p) : "";
p = dom.lastIndexOf('.'); // locate subdomain
final String subdom;
@ -295,8 +295,11 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
if (host == null) {
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(protocol)).substring(0, 5);
}
boolean isIPv6HostIP = host.indexOf(':') >= 0;
final StringBuilder sb = new StringBuilder(host.length() + 15);
sb.append(protocol).append(':').append(host).append(':').append(Integer.toString(port));
sb.append(protocol).append(':');
if (isIPv6HostIP) {sb.append('[').append(host).append(']');} else sb.append(host);
sb.append(':').append(Integer.toString(port));
return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(sb.toString())).substring(0, 5);
}

@ -2069,7 +2069,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
return -1;
}
if (isSMB()) try {
return TimeoutRequest.length(getSmbFile(), SMB_TIMEOUT);
return getSmbFile().length();
//return TimeoutRequest.length(getSmbFile(), SMB_TIMEOUT); // a timeout request is a bad idea, that will create a lot of concurrent threads during crawling
} catch (final Throwable e) {
ConcurrentLog.logException(e);
return -1;
@ -2080,7 +2081,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
public long lastModified() throws IOException {
if (isFile()) return getFSFile().lastModified();
if (isSMB()) try {
return TimeoutRequest.lastModified(getSmbFile(), SMB_TIMEOUT);
return getSmbFile().lastModified();
// return TimeoutRequest.lastModified(getSmbFile(), SMB_TIMEOUT); // a timeout request is a bad idea, that will create a lot of concurrent threads during crawling
} catch (final SmbException e) {
throw new IOException("SMB.lastModified SmbException (" + e.getMessage() + ") for " + toNormalform(false));
} catch (final MalformedURLException e) {

@ -335,7 +335,8 @@ public class RobotsTxt {
String host = theURL.getHost();
if (host == null) return null;
StringBuilder sb = new StringBuilder(host.length() + 6);
sb.append(host).append(':').append(Integer.toString(port));
if (host.indexOf(':') >= 0) {sb.append('[').append(host).append(']');} else sb.append(host);
sb.append(':').append(Integer.toString(port));
return sb.toString();
}

Loading…
Cancel
Save