diff --git a/defaults/solr/solrconfig.xml b/defaults/solr/solrconfig.xml
index 1234dd0d3..d8044f969 100644
--- a/defaults/solr/solrconfig.xml
+++ b/defaults/solr/solrconfig.xml
@@ -461,19 +461,21 @@
and old cache.
-->
+ size="64"
+ initialSize="64"
+ autowarmCount="4"
+ cleanupThread="true"/>
-
+
-
+
diff --git a/defaults/yacy.init b/defaults/yacy.init
index d23d09bae..b910a354e 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -797,11 +797,6 @@ search.excludehosth=
# the cases of nocache, iffresh and ifexist causes an index deletion
search.verify.delete = true
-# images may be treated either as documents that are shown in search results or as objects
-# that are only visible in special search environments, like image search
-search.excludeintext.image = true
-crawler.load.image = true
-
# remote search details
remotesearch.maxcount = 10
remotesearch.maxtime = 3000
diff --git a/htroot/ConfigHTCache_p.html b/htroot/ConfigHTCache_p.html
index ae8c851ce..890ae9845 100644
--- a/htroot/ConfigHTCache_p.html
+++ b/htroot/ConfigHTCache_p.html
@@ -19,7 +19,7 @@
-
#[actualCacheSize]# MB
+
#[actualCacheSize]# MB for #[actualCacheDocCount]# files, #[docSizeAverage]# KB / file in average
MB
diff --git a/htroot/ConfigHTCache_p.java b/htroot/ConfigHTCache_p.java
index 73141e65a..48d4df623 100644
--- a/htroot/ConfigHTCache_p.java
+++ b/htroot/ConfigHTCache_p.java
@@ -77,7 +77,9 @@ public class ConfigHTCache_p {
}
prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT));
- prop.put("actualCacheSize", (Cache.getActualCacheSize() / 1024 / 1024));
+ prop.put("actualCacheSize", Cache.getActualCacheSize() / 1024 / 1024);
+ prop.put("actualCacheDocCount", Cache.getActualCacheDocCount());
+ prop.put("docSizeAverage", Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024);
prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64));
// return rewrite properties
return prop;
diff --git a/htroot/ContentAnalysis_p.java b/htroot/ContentAnalysis_p.java
index 2ba573ab0..eed8455e7 100644
--- a/htroot/ContentAnalysis_p.java
+++ b/htroot/ContentAnalysis_p.java
@@ -34,7 +34,7 @@ public class ContentAnalysis_p {
// clean up all search events
SearchEventCache.cleanupEvents(true);
- sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
+ sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
if (post != null && post.containsKey("EnterDoublecheck")) {
Ranking.setMinTokenLen(post.getInt("minTokenLen", 3));
diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java
index 833b77f00..8d53f191f 100644
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@@ -553,7 +553,6 @@ public class HostBrowser {
}
} catch (final IOException e) {
}
-
}
this.references_external = (rc_external == null || rc_external.intValue() <= 0) ? 0 : rc_external.intValue();
this.references_exthosts = (rc_exthosts == null || rc_exthosts.intValue() <= 0) ? 0 : rc_exthosts.intValue();
@@ -562,7 +561,7 @@ public class HostBrowser {
StringBuilder sbi = new StringBuilder();
int c = 0;
for (String s: references_internal_urls) {
- sbi.append("");
+ sbi.append("");
c++;
if (c % 80 == 0) sbi.append(" ");
}
@@ -570,7 +569,7 @@ public class HostBrowser {
StringBuilder sbe = new StringBuilder();
c = 0;
for (String s: references_external_urls) {
- sbe.append("");
+ sbe.append("");
c++;
if (c % 80 == 0) sbe.append(" ");
}
diff --git a/htroot/IndexControlURLs_p.html b/htroot/IndexControlURLs_p.html
index b83d63754..6b96b1b17 100644
--- a/htroot/IndexControlURLs_p.html
+++ b/htroot/IndexControlURLs_p.html
@@ -193,6 +193,9 @@ function updatepage(str) {
URL Filter
+
query
+
+
Export Format
Only Domain:
Plain Text List (domains only)
diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java
index 94d46ba0e..48da0982c 100644
--- a/htroot/IndexControlURLs_p.java
+++ b/htroot/IndexControlURLs_p.java
@@ -261,7 +261,8 @@ public class IndexControlURLs_p {
final File f = new File(s);
f.getParentFile().mkdirs();
final String filter = post.get("exportfilter", ".*");
- final Fulltext.Export running = segment.fulltext().export(f, filter, format, dom);
+ final String query = post.get("exportquery", "*:*");
+ final Fulltext.Export running = segment.fulltext().export(f, filter, query, format, dom);
prop.put("lurlexport_exportfile", s);
prop.put("lurlexport_urlcount", running.count());
diff --git a/htroot/RankingSolr_p.java b/htroot/RankingSolr_p.java
index 04784f938..91e543a11 100644
--- a/htroot/RankingSolr_p.java
+++ b/htroot/RankingSolr_p.java
@@ -38,7 +38,7 @@ public class RankingSolr_p {
// clean up all search events
SearchEventCache.cleanupEvents(true);
- sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
+ sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
int profileNr = 0;
if (post != null) profileNr = post.getInt("profileNr", profileNr);
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 284a0b15e..b79c8061b 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -360,7 +360,7 @@ public class yacysearch {
// check available memory and clean up if necessary
if ( !MemoryControl.request(8000000L, false) ) {
- indexSegment.clearCache();
+ indexSegment.clearCaches();
SearchEventCache.cleanupEvents(false);
}
diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java
index de91810f7..c1b2000bf 100644
--- a/source/net/yacy/cora/document/id/MultiProtocolURL.java
+++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java
@@ -57,6 +57,7 @@ import net.yacy.cora.protocol.TimeoutRequest;
import net.yacy.cora.protocol.ftp.FTPClient;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.CommonPattern;
+import net.yacy.document.parser.html.CharacterCoding;
/**
* MultiProtocolURI provides a URL object for multiple protocols like http, https, ftp, smb and file
@@ -66,7 +67,6 @@ public class MultiProtocolURL implements Serializable, Comparable existsByIds(Collection ids) throws IOException {
+ public Set existsByIds(Set ids) throws IOException {
if (ids == null || ids.size() == 0) return new HashSet();
// construct raw query
final SolrQuery params = new SolrQuery();
diff --git a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
index c96fe2d33..eaf93603c 100644
--- a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
@@ -61,7 +61,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
this.missCache = new ConcurrentARC(missCacheMax, partitions);
}
- public void clearCache() {
+ public void clearCaches() {
this.hitCache.clear();
this.missCache.clear();
this.documentCache.clear();
@@ -70,9 +70,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public synchronized void close() {
+ this.clearCaches();
if (this.solr != null) this.solr.close();
this.solr = null;
- this.clearCache();
}
/**
@@ -81,7 +81,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
*/
@Override
public void clear() throws IOException {
- this.clearCache();
+ this.clearCaches();
if (this.solr != null) this.solr.clear();
}
@@ -119,7 +119,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public void deleteByQuery(final String querystring) throws IOException {
- this.clearCache();
+ this.clearCaches();
this.solr.deleteByQuery(querystring);
}
@@ -261,7 +261,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
}
private void addToCache(SolrDocumentList list, boolean doccache) {
- if (MemoryControl.shortStatus()) clearCache();
+ if (MemoryControl.shortStatus()) clearCaches();
for (final SolrDocument solrdoc: list) {
addToCache(solrdoc, doccache);
}
diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
index 8eff5f315..ddbf550ec 100644
--- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
@@ -118,6 +118,12 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
ensureAliveUpdateHandler();
}
+ @Override
+ public void clearCaches() {
+ this.connector.clearCaches();
+ this.idCache.clear();
+ }
+
/**
* used for debugging
*/
@@ -326,10 +332,11 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
}
@Override
- public Set existsByIds(Collection ids) throws IOException {
+ public Set existsByIds(Set ids) throws IOException {
HashSet e = new HashSet();
if (ids == null || ids.size() == 0) return e;
- Collection idsC = new HashSet();
+ if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : e;
+ Set idsC = new HashSet();
for (String id: ids) {
if (this.idCache.has(ASCII.getBytes(id))) {cacheSuccessSign(); e.add(id); continue;}
if (existIdFromDeleteQueue(id)) {cacheSuccessSign(); continue;}
diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
index 6348c79a3..10d36a9c9 100644
--- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
@@ -22,7 +22,6 @@
package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
-import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
@@ -35,6 +34,7 @@ import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.search.Query;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
@@ -48,10 +48,14 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
+import org.apache.solr.request.UnInvertedField;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.QueryResultKey;
+import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
@@ -89,6 +93,22 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
super.init(this.instance.getServer(coreName));
}
+ public void clearCaches() {
+ SolrConfig solrConfig = this.core.getSolrConfig();
+ @SuppressWarnings("unchecked")
+ SolrCache fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null : solrConfig.fieldValueCacheConfig.newInstance();
+ if (fieldValueCache != null) fieldValueCache.clear();
+ @SuppressWarnings("unchecked")
+ SolrCache filterCache= solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance();
+ if (filterCache != null) filterCache.clear();
+ @SuppressWarnings("unchecked")
+ SolrCache queryResultCache = solrConfig.queryResultCacheConfig == null ? null : solrConfig.queryResultCacheConfig.newInstance();
+ if (queryResultCache != null) queryResultCache.clear();
+ @SuppressWarnings("unchecked")
+ SolrCache documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance();
+ if (documentCache != null) documentCache.clear();
+ }
+
public SolrInstance getInstance() {
return this.instance;
}
@@ -224,9 +244,9 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
}
@Override
- public Set existsByIds(Collection ids) {
+ public Set existsByIds(Set ids) {
if (ids == null || ids.size() == 0) return new HashSet();
- if (ids.size() == 1 && ids instanceof Set) return existsById(ids.iterator().next()) ? (Set) ids : new HashSet();
+ if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet();
StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
for (String id: ids) {
sb.append("({!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id).append(") OR ");
diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
index 3f7a1453c..19fa604c5 100644
--- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
@@ -53,6 +53,12 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
this.solr0 = solr0;
this.solr1 = solr1;
}
+
+ @Override
+ public void clearCaches() {
+ if (this.solr0 != null) this.solr0.clearCaches();
+ if (this.solr1 != null) this.solr1.clearCaches();
+ }
public boolean isConnected0() {
return this.solr0 != null;
@@ -347,7 +353,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
}
@Override
- public Set existsByIds(Collection ids) throws IOException {
+ public Set existsByIds(Set ids) throws IOException {
+ if (ids == null || ids.size() == 0) return new HashSet();
+ if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet();
if (this.solr0 != null && this.solr1 == null) return this.solr0.existsByIds(ids);
if (this.solr0 == null && this.solr1 != null) return this.solr1.existsByIds(ids);
Set s = new HashSet();
diff --git a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
index 4e2a9369f..0ab5f8b31 100644
--- a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
@@ -71,6 +71,11 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
super.close();
}
+ @Override
+ public void clearCaches() {
+ // we do not have a direct access to the caches here, thus we simply do nothing.
+ }
+
@Override
public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException {
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
@@ -134,4 +139,5 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
}
System.exit(0);
}
+
}
diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
index e7a3dd957..f28d26f09 100644
--- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
@@ -36,7 +36,12 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
public interface SolrConnector extends Iterable /* Iterable of document IDs */ {
-
+
+ /**
+ * clear all caches: inside solr and ouside solr within the implementations of this interface
+ */
+ public void clearCaches();
+
/**
* get the size of the index
* @return number of results if solr is queries with a catch-all pattern
@@ -106,7 +111,7 @@ public interface SolrConnector extends Iterable /* Iterable of document
* @return a collection of a subset of the ids which exist in the index
* @throws IOException
*/
- public Set existsByIds(Collection ids) throws IOException;
+ public Set existsByIds(Set ids) throws IOException;
/**
* check if a given document exists in solr
diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
index f12d43950..aec6352f0 100644
--- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
@@ -64,7 +64,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public SolrServer getServer() {
return this.server;
}
-
+
@Override
public void commit(final boolean softCommit) {
synchronized (this.server) {
diff --git a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
index 6b9b7a939..1d49fd537 100644
--- a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
+++ b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
@@ -24,7 +24,6 @@ import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
-import net.yacy.cora.federate.solr.connector.CachedSolrConnector;
import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
@@ -161,9 +160,9 @@ public class InstanceMirror {
return msc;
}
- public void clearCache() {
+ public void clearCaches() {
for (SolrConnector csc: this.connectorCache.values()) {
- if (csc instanceof CachedSolrConnector) ((CachedSolrConnector) csc).clearCache();
+ csc.clearCaches();
}
for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true);
}
diff --git a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
index 19125afda..a4d3c38be 100644
--- a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
@@ -1,195 +1,193 @@
-/**
- * HTMLResponseWriter
- * Copyright 2013 by Michael Peter Christen
- * First released 09.06.2013 at http://yacy.net
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program in the file lgpl21.txt
- * If not, see .
- */
-
-package net.yacy.cora.federate.solr.responsewriter;
-
-import java.io.IOException;
-import java.io.Writer;
-import java.util.Date;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-import net.yacy.cora.federate.solr.SolrType;
-import net.yacy.search.schema.CollectionSchema;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexableField;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.XML;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.QueryResponseWriter;
-import org.apache.solr.response.ResultContext;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.schema.FieldType;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.apache.solr.schema.TextField;
-import org.apache.solr.search.DocIterator;
-import org.apache.solr.search.DocList;
-import org.apache.solr.search.SolrIndexSearcher;
-
-public class HTMLResponseWriter implements QueryResponseWriter {
-
- private static final Set DEFAULT_FIELD_LIST = null;
- private static final Pattern dqp = Pattern.compile("\"");
-
- public HTMLResponseWriter() {
- super();
- }
-
- @Override
- public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
- return "text/html";
- }
-
- @Override
- public void init(@SuppressWarnings("rawtypes") NamedList n) {
- }
-
- @Override
- public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
- NamedList> values = rsp.getValues();
- assert values.get("responseHeader") != null;
- assert values.get("response") != null;
-
- writer.write("\n");
- //writer.write("\n");
- writer.write("\n");
- writer.write("\n");
- //writer.write("\n");
- writer.write("\n");
- writer.write("\n");
- NamedList