fixes for filesystem indexing

pull/1/head
orbiter 12 years ago
parent bf42179982
commit 5aa5202adf

@ -25,6 +25,7 @@ import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import net.yacy.cora.federate.solr.YaCySchema; import net.yacy.cora.federate.solr.YaCySchema;
@ -86,7 +87,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
this.hitCacheMax = hitCacheMax; this.hitCacheMax = hitCacheMax;
this.missCacheMax = missCacheMax; this.missCacheMax = missCacheMax;
this.partitions = Runtime.getRuntime().availableProcessors() * 2; this.partitions = Runtime.getRuntime().availableProcessors() * 2;
this.hitMissCache = new HashMap<String, HitMissCache>(); this.hitMissCache = new ConcurrentHashMap<String, HitMissCache>();
this.documentCache = new ConcurrentARC<String, SolrDocument>(docCacheMax, this.partitions); this.documentCache = new ConcurrentARC<String, SolrDocument>(docCacheMax, this.partitions);
} }

@ -490,6 +490,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
public static String mustMatchFilterFullDomain(final MultiProtocolURI uri) { public static String mustMatchFilterFullDomain(final MultiProtocolURI uri) {
String host = uri.getHost(); String host = uri.getHost();
if (host == null) return uri.getProtocol() + ".*";
if (host.startsWith("www.")) host = host.substring(4); if (host.startsWith("www.")) host = host.substring(4);
String protocol = uri.getProtocol(); String protocol = uri.getProtocol();
if ("http".equals(protocol) || "https".equals(protocol)) protocol = "https?+"; if ("http".equals(protocol) || "https".equals(protocol)) protocol = "https?+";

@ -236,6 +236,7 @@ public class RobotsTxt {
public void ensureExist(final MultiProtocolURI theURL, final Set<String> thisAgents, boolean concurrent) { public void ensureExist(final MultiProtocolURI theURL, final Set<String> thisAgents, boolean concurrent) {
final String urlHostPort = getHostPort(theURL); final String urlHostPort = getHostPort(theURL);
if (urlHostPort == null) return;
final BEncodedHeap robotsTable; final BEncodedHeap robotsTable;
try { try {
robotsTable = this.tables.getHeap(WorkTables.TABLE_ROBOTS_NAME); robotsTable = this.tables.getHeap(WorkTables.TABLE_ROBOTS_NAME);
@ -371,6 +372,7 @@ public class RobotsTxt {
static final String getHostPort(final MultiProtocolURI theURL) { static final String getHostPort(final MultiProtocolURI theURL) {
final int port = getPort(theURL); final int port = getPort(theURL);
String host = theURL.getHost(); String host = theURL.getHost();
if (host == null) return null;
StringBuilder sb = new StringBuilder(host.length() + 6); StringBuilder sb = new StringBuilder(host.length() + 6);
sb.append(host).append(':').append(Integer.toString(port)); sb.append(host).append(':').append(Integer.toString(port));
return sb.toString(); return sb.toString();

Loading…
Cancel
Save