diff --git a/defaults/solr/solrconfig.xml b/defaults/solr/solrconfig.xml
index 1234dd0d3..e0f895152 100644
--- a/defaults/solr/solrconfig.xml
+++ b/defaults/solr/solrconfig.xml
@@ -461,19 +461,21 @@
and old cache.
-->
+ size="64"
+ initialSize="64"
+ autowarmCount="0"
+ cleanupThread="true"/>
-
+
-
+
-
+ size="64"
+ autowarmCount="0"
+ showItems="32"
+ cleanupThread="true"/>
diff --git a/defaults/yacy.init b/defaults/yacy.init
index ae70b9162..25769d1a8 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -442,6 +442,7 @@ seedScpPath=
peerCycle=2
# debug flags
+debug.search.profiling=false
debug.search.local.dht.off=false
debug.search.local.solr.off=false
debug.search.remote.dht.off=false
diff --git a/htroot/ConfigHTCache_p.html b/htroot/ConfigHTCache_p.html
index ae8c851ce..890ae9845 100644
--- a/htroot/ConfigHTCache_p.html
+++ b/htroot/ConfigHTCache_p.html
@@ -19,7 +19,7 @@
- #[actualCacheSize]# MB
+ #[actualCacheSize]# MB for #[actualCacheDocCount]# files, #[docSizeAverage]# KB / file in average
MB
diff --git a/htroot/ConfigHTCache_p.java b/htroot/ConfigHTCache_p.java
index 73141e65a..48d4df623 100644
--- a/htroot/ConfigHTCache_p.java
+++ b/htroot/ConfigHTCache_p.java
@@ -77,7 +77,9 @@ public class ConfigHTCache_p {
}
prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT));
- prop.put("actualCacheSize", (Cache.getActualCacheSize() / 1024 / 1024));
+ prop.put("actualCacheSize", Cache.getActualCacheSize() / 1024 / 1024);
+ prop.put("actualCacheDocCount", Cache.getActualCacheDocCount());
+ prop.put("docSizeAverage", Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024);
prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64));
// return rewrite properties
return prop;
diff --git a/htroot/ContentAnalysis_p.java b/htroot/ContentAnalysis_p.java
index 2ba573ab0..eed8455e7 100644
--- a/htroot/ContentAnalysis_p.java
+++ b/htroot/ContentAnalysis_p.java
@@ -34,7 +34,7 @@ public class ContentAnalysis_p {
// clean up all search events
SearchEventCache.cleanupEvents(true);
- sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
+ sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
if (post != null && post.containsKey("EnterDoublecheck")) {
Ranking.setMinTokenLen(post.getInt("minTokenLen", 3));
diff --git a/htroot/RankingSolr_p.java b/htroot/RankingSolr_p.java
index 04784f938..91e543a11 100644
--- a/htroot/RankingSolr_p.java
+++ b/htroot/RankingSolr_p.java
@@ -38,7 +38,7 @@ public class RankingSolr_p {
// clean up all search events
SearchEventCache.cleanupEvents(true);
- sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
+ sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
int profileNr = 0;
if (post != null) profileNr = post.getInt("profileNr", profileNr);
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 284a0b15e..b79c8061b 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -360,7 +360,7 @@ public class yacysearch {
// check available memory and clean up if necessary
if ( !MemoryControl.request(8000000L, false) ) {
- indexSegment.clearCache();
+ indexSegment.clearCaches();
SearchEventCache.cleanupEvents(false);
}
diff --git a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
index c96fe2d33..eaf93603c 100644
--- a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
@@ -61,7 +61,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
this.missCache = new ConcurrentARC(missCacheMax, partitions);
}
- public void clearCache() {
+ public void clearCaches() {
this.hitCache.clear();
this.missCache.clear();
this.documentCache.clear();
@@ -70,9 +70,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public synchronized void close() {
+ this.clearCaches();
if (this.solr != null) this.solr.close();
this.solr = null;
- this.clearCache();
}
/**
@@ -81,7 +81,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
*/
@Override
public void clear() throws IOException {
- this.clearCache();
+ this.clearCaches();
if (this.solr != null) this.solr.clear();
}
@@ -119,7 +119,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public void deleteByQuery(final String querystring) throws IOException {
- this.clearCache();
+ this.clearCaches();
this.solr.deleteByQuery(querystring);
}
@@ -261,7 +261,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
}
private void addToCache(SolrDocumentList list, boolean doccache) {
- if (MemoryControl.shortStatus()) clearCache();
+ if (MemoryControl.shortStatus()) clearCaches();
for (final SolrDocument solrdoc: list) {
addToCache(solrdoc, doccache);
}
diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
index 792d921ad..ddbf550ec 100644
--- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
@@ -118,6 +118,12 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
ensureAliveUpdateHandler();
}
+ @Override
+ public void clearCaches() {
+ this.connector.clearCaches();
+ this.idCache.clear();
+ }
+
/**
* used for debugging
*/
diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
index 533ecb080..7b5c104d5 100644
--- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
@@ -30,10 +30,12 @@ import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.search.Query;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
@@ -47,10 +49,14 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
+import org.apache.solr.request.UnInvertedField;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.QueryResultKey;
+import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
@@ -88,6 +94,22 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
super.init(this.instance.getServer(coreName));
}
+ public void clearCaches() {
+ SolrConfig solrConfig = this.core.getSolrConfig();
+ @SuppressWarnings("unchecked")
+ SolrCache fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null : solrConfig.fieldValueCacheConfig.newInstance();
+ if (fieldValueCache != null) fieldValueCache.clear();
+ @SuppressWarnings("unchecked")
+ SolrCache filterCache= solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance();
+ if (filterCache != null) filterCache.clear();
+ @SuppressWarnings("unchecked")
+ SolrCache queryResultCache = solrConfig.queryResultCacheConfig == null ? null : solrConfig.queryResultCacheConfig.newInstance();
+ if (queryResultCache != null) queryResultCache.clear();
+ @SuppressWarnings("unchecked")
+ SolrCache documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance();
+ if (documentCache != null) documentCache.clear();
+ }
+
public SolrInstance getInstance() {
return this.instance;
}
@@ -224,6 +246,17 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
@Override
public Set existsByIds(Set ids) {
+ boolean debug = Switchboard.getSwitchboard().getConfigBool("debug.search.profiling", false);
+ long debugSingleTime = 0; int debugSingleCount = 0;
+ if (debug) {
+ // run this also with single exist queries which might be faster (but we don't know, thats the reason we test that here)
+ long start = System.currentTimeMillis();
+ Set idsr = new HashSet();
+ for (String id: ids) if (existsById(id)) idsr.add(id);
+ debugSingleTime = System.currentTimeMillis() - start;
+ debugSingleCount = idsr.size();
+ }
+ long start = System.currentTimeMillis();
if (ids == null || ids.size() == 0) return new HashSet();
if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet();
StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
@@ -246,6 +279,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
} finally {
docListSearcher.close();
}
+ long debugCollectionTime = System.currentTimeMillis() - start;
+ if (debug) {
+ ConcurrentLog.info("EmbeddedSolrConnector", "Comparisment of existsByIds: input=" + ids.size() + " records, output=" + idsr.size() + " records, singleTime=" + debugSingleTime + ", collectionTime=" + debugCollectionTime + ", singleCount=" + debugSingleCount + ", collectionCount=" + idsr.size());
+ }
// construct a new id list from that
return idsr;
}
diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
index c6d51e8ec..19fa604c5 100644
--- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
@@ -53,6 +53,12 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
this.solr0 = solr0;
this.solr1 = solr1;
}
+
+ @Override
+ public void clearCaches() {
+ if (this.solr0 != null) this.solr0.clearCaches();
+ if (this.solr1 != null) this.solr1.clearCaches();
+ }
public boolean isConnected0() {
return this.solr0 != null;
diff --git a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
index 4e2a9369f..0ab5f8b31 100644
--- a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
@@ -71,6 +71,11 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
super.close();
}
+ @Override
+ public void clearCaches() {
+ // we do not have a direct access to the caches here, thus we simply do nothing.
+ }
+
@Override
public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException {
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
@@ -134,4 +139,5 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
}
System.exit(0);
}
+
}
diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
index 8fb31c531..f28d26f09 100644
--- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
@@ -36,7 +36,12 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
public interface SolrConnector extends Iterable /* Iterable of document IDs */ {
-
+
+ /**
+ * clear all caches: inside solr and ouside solr within the implementations of this interface
+ */
+ public void clearCaches();
+
/**
* get the size of the index
* @return number of results if solr is queries with a catch-all pattern
diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
index f12d43950..aec6352f0 100644
--- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
@@ -64,7 +64,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public SolrServer getServer() {
return this.server;
}
-
+
@Override
public void commit(final boolean softCommit) {
synchronized (this.server) {
diff --git a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
index 6b9b7a939..1d49fd537 100644
--- a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
+++ b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
@@ -24,7 +24,6 @@ import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
-import net.yacy.cora.federate.solr.connector.CachedSolrConnector;
import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
@@ -161,9 +160,9 @@ public class InstanceMirror {
return msc;
}
- public void clearCache() {
+ public void clearCaches() {
for (SolrConnector csc: this.connectorCache.values()) {
- if (csc instanceof CachedSolrConnector) ((CachedSolrConnector) csc).clearCache();
+ csc.clearCaches();
}
for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true);
}
diff --git a/source/net/yacy/crawler/data/Cache.java b/source/net/yacy/crawler/data/Cache.java
index f1d72354f..9973f08a0 100644
--- a/source/net/yacy/crawler/data/Cache.java
+++ b/source/net/yacy/crawler/data/Cache.java
@@ -182,6 +182,14 @@ public final class Cache {
public static long getActualCacheSize() {
return fileDBunbuffered.length();
}
+
+ /**
+ * get the current actual cache size
+ * @return
+ */
+ public static long getActualCacheDocCount() {
+ return fileDBunbuffered.size();
+ }
/**
* close the databases
diff --git a/source/net/yacy/data/BookmarksDB.java b/source/net/yacy/data/BookmarksDB.java
index 1c11b4b15..d9c0140a6 100644
--- a/source/net/yacy/data/BookmarksDB.java
+++ b/source/net/yacy/data/BookmarksDB.java
@@ -41,7 +41,10 @@ import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.blob.MapHeap;
+import net.yacy.kelondro.data.meta.URIMetadataRow;
+import net.yacy.kelondro.index.RowHandleSet;
public class BookmarksDB {
@@ -147,11 +150,6 @@ public class BookmarksDB {
ConcurrentLog.logException(e);
}
}
- public String addBookmark(final Bookmark bookmark){
- saveBookmark(bookmark);
- return bookmark.getUrlHash();
-
- }
public Bookmark getBookmark(final String urlHash) throws IOException {
try {
@@ -214,18 +212,13 @@ public class BookmarksDB {
final TreeSet set=new TreeSet(new bookmarkComparator(true));
final String tagHash=BookmarkHelper.tagHash(tagName);
final Tag tag=getTag(tagHash);
- Set hashes=new HashSet();
- if (tag != null) {
- hashes=getTag(tagHash).getUrlHashes();
- }
+ RowHandleSet hashes = tag == null ? new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10) : tag.getUrlHashes();
if (priv) {
- set.addAll(hashes);
+ for (byte[] hash: hashes) set.add(ASCII.String(hash));
} else {
- final Iterator it=hashes.iterator();
- Bookmark bm;
- while(it.hasNext()){
+ for (byte[] hash: hashes) {
try {
- bm = getBookmark(it.next());
+ Bookmark bm = getBookmark(ASCII.String(hash));
if (bm != null && bm.getPublic()) {
set.add(bm.getUrlHash());
}
@@ -249,7 +242,7 @@ public class BookmarksDB {
* retrieve an object of type Tag from the the tagCache, if object is not cached return loadTag(hash)
* @param hash an object of type String, containing a tagHash
*/
- public Tag getTag(final String hash){
+ private Tag getTag(final String hash){
return this.tags.get(hash); //null if it does not exists
}
@@ -257,7 +250,7 @@ public class BookmarksDB {
* store a Tag in tagsTable or remove an empty tag
* @param tag an object of type Tag to be stored/removed
*/
- public void putTag(final Tag tag){
+ private void putTag(final Tag tag){
if (tag == null) return;
if (tag.isEmpty()) {
this.tags.remove(tag.getTagHash());
@@ -266,7 +259,7 @@ public class BookmarksDB {
}
}
- public void removeTag(final String hash) {
+ private void removeTag(final String hash) {
this.tags.remove(hash);
}
@@ -301,7 +294,7 @@ public class BookmarksDB {
return set.iterator();
}
- public Iterator getTagIterator(final String tagName, final boolean priv, final int comp) {
+ private Iterator getTagIterator(final String tagName, final boolean priv, final int comp) {
final TreeSet set=new TreeSet((comp == SORT_SIZE) ? tagSizeComparator : tagComparator);
Iterator it=null;
final Iterator bit=getBookmarksIterator(tagName, priv);
@@ -347,14 +340,14 @@ public class BookmarksDB {
final Tag oldTag=getTag(BookmarkHelper.tagHash(oldName));
if (oldTag != null) {
- final Set urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag
+ final RowHandleSet urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag
removeTag(BookmarkHelper.tagHash(oldName)); // remove oldHash from TagsDB
Bookmark bookmark;
Set tagSet = new TreeSet(String.CASE_INSENSITIVE_ORDER);
- for (final String urlHash : urlHashes) { // looping through all bookmarks which were tagged with oldName
+ for (final byte[] urlHash : urlHashes) { // looping through all bookmarks which were tagged with oldName
try {
- bookmark = getBookmark(urlHash);
+ bookmark = getBookmark(ASCII.String(urlHash));
tagSet = bookmark.getTags();
tagSet.remove(oldName);
bookmark.setTags(tagSet, true); // might not be needed, but doesn't hurt
@@ -371,9 +364,9 @@ public class BookmarksDB {
public void addTag(final String selectTag, final String newTag) {
Bookmark bookmark;
- for (final String urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) { // looping through all bookmarks which were tagged with selectTag
+ for (final byte[] urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) { // looping through all bookmarks which were tagged with selectTag
try {
- bookmark = getBookmark(urlHash);
+ bookmark = getBookmark(ASCII.String(urlHash));
bookmark.addTag(newTag);
saveBookmark(bookmark);
} catch (final IOException e) {
@@ -389,51 +382,24 @@ public class BookmarksDB {
* Subclass of bookmarksDB, which provides the Tag object-type
*/
public class Tag {
- public static final String URL_HASHES = "urlHashes";
- public static final String TAG_NAME = "tagName";
private final String tagHash;
- private final Map mem;
- private Set urlHashes;
-
- public Tag(final String hash, final Map map){
- this.tagHash = hash;
- this.mem = map;
- if (this.mem.containsKey(URL_HASHES)) {
- this.urlHashes = ListManager.string2set(this.mem.get(URL_HASHES));
- } else {
- this.urlHashes = new HashSet();
- }
- }
+ private final String tagName;
+ private RowHandleSet urlHashes;
- public Tag(final String name, final HashSet entries){
+ private Tag(final String name) {
this.tagHash = BookmarkHelper.tagHash(name);
- this.mem = new HashMap();
- //mem.put(URL_HASHES, listManager.arraylist2string(entries));
- this.urlHashes = entries;
- this.mem.put(TAG_NAME, name);
- }
-
- public Tag(final String name){
- this(name, new HashSet());
- }
-
- public Map getMap(){
- this.mem.put(URL_HASHES, ListManager.collection2string(this.urlHashes));
- return this.mem;
+ this.tagName = name;
+ this.urlHashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10);
}
/**
* get the lowercase Tagname
*/
public String getTagName(){
- /*if(this.mem.containsKey(TAG_NAME)){
- return (String) this.mem.get(TAG_NAME);
- }
- return "";*/
return getFriendlyName().toLowerCase();
}
- public String getTagHash(){
+ private String getTagHash(){
return this.tagHash;
}
@@ -441,37 +407,33 @@ public class BookmarksDB {
* @return the tag name, with all uppercase chars
*/
public String getFriendlyName(){
- /*if(this.mem.containsKey(TAG_FRIENDLY_NAME)){
- return (String) this.mem.get(TAG_FRIENDLY_NAME);
- }
- return getTagName();*/
- if(this.mem.containsKey(TAG_NAME)){
- return this.mem.get(TAG_NAME);
- }
- return "notagname";
+ return this.tagName;
}
- public Set getUrlHashes(){
+ private RowHandleSet getUrlHashes(){
return this.urlHashes;
}
- public boolean hasPublicItems(){
+ private boolean hasPublicItems(){
return getBookmarksIterator(getTagName(), false).hasNext();
}
- public void addUrl(final String urlHash){
- this.urlHashes.add(urlHash);
+ private void addUrl(final String urlHash){
+ try {
+ this.urlHashes.put(ASCII.getBytes(urlHash));
+ } catch (SpaceExceededException e) {
+ }
}
- public void delete(final String urlHash){
- this.urlHashes.remove(urlHash);
+ private void delete(final String urlHash){
+ this.urlHashes.remove(ASCII.getBytes(urlHash));
}
public int size(){
return this.urlHashes.size();
}
- public boolean isEmpty() {
+ private boolean isEmpty() {
return this.urlHashes.isEmpty();
}
}
@@ -481,27 +443,19 @@ public class BookmarksDB {
*/
public class Bookmark {
- public static final String BOOKMARK_URL = "bookmarkUrl";
+ private static final String BOOKMARK_URL = "bookmarkUrl";
public static final String BOOKMARK_TITLE = "bookmarkTitle";
public static final String BOOKMARK_DESCRIPTION = "bookmarkDesc";
- public static final String BOOKMARK_TAGS = "bookmarkTags";
- public static final String BOOKMARK_PUBLIC = "bookmarkPublic";
- public static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
- public static final String BOOKMARK_OWNER = "bookmarkOwner";
- public static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
+ private static final String BOOKMARK_TAGS = "bookmarkTags";
+ private static final String BOOKMARK_PUBLIC = "bookmarkPublic";
+ private static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
+ private static final String BOOKMARK_OWNER = "bookmarkOwner";
+ private static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
private final String urlHash;
private Set tagNames;
private long timestamp;
private final Map entry;
- public Bookmark(final String urlHash, final Map map) {
- this.entry = map;
- this.urlHash = urlHash;
- this.tagNames = new TreeSet(String.CASE_INSENSITIVE_ORDER);
- if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
- loadTimestamp();
- }
-
public Bookmark(final DigestURL url) {
this.entry = new HashMap();
this.urlHash = ASCII.String(url.hash());
@@ -529,11 +483,15 @@ public class BookmarksDB {
this(new DigestURL((url.indexOf("://") < 0) ? "http://" + url : url));
}
- public Bookmark(final Map map) throws MalformedURLException {
- this(ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash()), map);
+ private Bookmark(final Map map) throws MalformedURLException {
+ this.entry = map;
+ this.urlHash = ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash());
+ this.tagNames = new TreeSet(String.CASE_INSENSITIVE_ORDER);
+ if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
+ loadTimestamp();
}
- Map toMap() {
+ private Map toMap() {
this.entry.put(BOOKMARK_TAGS, ListManager.collection2string(this.tagNames));
this.entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp));
return this.entry;
@@ -688,11 +646,11 @@ public class BookmarksDB {
/**
* Subclass of bookmarksDB, which provides the bookmarkIterator object-type
*/
- public class bookmarkIterator implements Iterator {
+ private class bookmarkIterator implements Iterator {
Iterator bookmarkIter;
- public bookmarkIterator(final boolean up) throws IOException {
+ private bookmarkIterator(final boolean up) throws IOException {
//flushBookmarkCache(); //XXX: this will cost performance
this.bookmarkIter = BookmarksDB.this.bookmarks.keys(up, false);
//this.nextEntry = null;
@@ -722,14 +680,14 @@ public class BookmarksDB {
/**
* Comparator to sort objects of type Bookmark according to their timestamps
*/
- public class bookmarkComparator implements Comparator {
+ private class bookmarkComparator implements Comparator {
private final boolean newestFirst;
/**
* @param newestFirst newest first, or oldest first?
*/
- public bookmarkComparator(final boolean newestFirst){
+ private bookmarkComparator(final boolean newestFirst){
this.newestFirst = newestFirst;
}
@@ -752,13 +710,13 @@ public class BookmarksDB {
}
}
- public static final TagComparator tagComparator = new TagComparator();
- public static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
+ private static final TagComparator tagComparator = new TagComparator();
+ private static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
/**
* Comparator to sort objects of type Tag according to their names
*/
- public static class TagComparator implements Comparator, Serializable {
+ private static class TagComparator implements Comparator, Serializable {
/**
* generated serial
@@ -772,7 +730,7 @@ public class BookmarksDB {
}
- public static class TagSizeComparator implements Comparator, Serializable {
+ private static class TagSizeComparator implements Comparator, Serializable {
/**
* generated serial
diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java
index 72181ca7a..d74114180 100644
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@@ -32,27 +32,15 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
+import java.lang.reflect.Method;
import java.util.Date;
-import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
-import org.apache.pdfbox.pdmodel.font.PDCIDFont;
-import org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font;
-import org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font;
-import org.apache.pdfbox.pdmodel.font.PDFont;
-import org.apache.pdfbox.pdmodel.font.PDMMType1Font;
-import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
-import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
-import org.apache.pdfbox.pdmodel.font.PDType0Font;
-import org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont;
-import org.apache.pdfbox.pdmodel.font.PDType1CFont;
-import org.apache.pdfbox.pdmodel.font.PDType1Font;
-import org.apache.pdfbox.pdmodel.font.PDType3Font;
import org.apache.pdfbox.util.PDFTextStripper;
import net.yacy.cora.document.id.AnchorURL;
@@ -222,25 +210,54 @@ public class pdfParser extends AbstractParser implements Parser {
false,
docDate)};
}
-
- @SuppressWarnings("static-access")
+
public static void clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes() {
// thank you very much, PDFParser hackers, this font cache will occupy >80MB RAM for a single pdf and then stays forever
// AND I DO NOT EVEN NEED A FONT HERE TO PARSE THE TEXT!
// Don't be so ignorant, just google once "PDFParser OutOfMemoryError" to feel the pain.
- PDFont.clearResources();
- COSName.clearResources();
- PDType1Font.clearResources();
- PDTrueTypeFont.clearResources();
- PDType0Font.clearResources();
- PDType1AfmPfbFont.clearResources();
- PDType3Font.clearResources();
- PDType1CFont.clearResources();
- PDCIDFont.clearResources();
- PDCIDFontType0Font.clearResources();
- PDCIDFontType2Font.clearResources();
- PDMMType1Font.clearResources();
- PDSimpleFont.clearResources();
+ ResourceCleaner cl = new ResourceCleaner();
+ cl.clearClassResources("org.apache.pdfbox.cos.COSName");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDFont");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1Font");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDTrueTypeFont");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType0Font");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType3Font");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1CFont");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFont");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDMMType1Font");
+ cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDSimpleFont");
+ }
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ private static class ResourceCleaner {
+ Method findLoadedClass;
+ private ClassLoader sys;
+ public ResourceCleaner() {
+ try {
+ this.findLoadedClass = ClassLoader.class.getDeclaredMethod("findLoadedClass", new Class[] { String.class });
+ this.findLoadedClass.setAccessible(true);
+ this.sys = ClassLoader.getSystemClassLoader();
+ } catch (Throwable e) {
+ e.printStackTrace();
+ this.findLoadedClass = null;
+ this.sys = null;
+ }
+ }
+ public void clearClassResources(String name) {
+ if (this.findLoadedClass == null) return;
+ try {
+ Object pdfparserpainclass = this.findLoadedClass.invoke(this.sys, name);
+ if (pdfparserpainclass != null) {
+ Method clearResources = ((Class) pdfparserpainclass).getDeclaredMethod("clearResources", new Class[] {});
+ if (clearResources != null) clearResources.invoke(null);
+ }
+ } catch (Throwable e) {
+ e.printStackTrace();
+ }
+ }
}
/**
diff --git a/source/net/yacy/search/ResourceObserver.java b/source/net/yacy/search/ResourceObserver.java
index 9cc6a58e7..32e8d2396 100644
--- a/source/net/yacy/search/ResourceObserver.java
+++ b/source/net/yacy/search/ResourceObserver.java
@@ -129,7 +129,7 @@ public class ResourceObserver {
if(MemoryControl.properState()) return Space.HIGH;
// clear some caches - @all: are there more of these, we could clear here?
- this.sb.index.clearCache();
+ this.sb.index.clearCaches();
SearchEventCache.cleanupEvents(true);
this.sb.trail.clear();
Switchboard.urlBlacklist.clearblacklistCache();
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index c22083579..15ed4e3c7 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2031,7 +2031,7 @@ public final class Switchboard extends serverSwitch {
// clear caches if necessary
if ( !MemoryControl.request(128000000L, false) ) {
- this.index.clearCache();
+ this.index.clearCaches();
SearchEventCache.cleanupEvents(false);
this.trail.clear();
GuiHandler.clear();
diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java
index c74eed18a..cc127ecbe 100644
--- a/source/net/yacy/search/index/Fulltext.java
+++ b/source/net/yacy/search/index/Fulltext.java
@@ -225,10 +225,10 @@ public final class Fulltext {
}
}
- public void clearCache() {
+ public void clearCaches() {
if (this.urlIndexFile != null && this.urlIndexFile instanceof Cache) ((Cache) this.urlIndexFile).clearCache();
if (this.statsDump != null) this.statsDump.clear();
- this.solrInstances.clearCache();
+ this.solrInstances.clearCaches();
this.statsDump = null;
}
@@ -250,7 +250,7 @@ public final class Fulltext {
for (String name: instance.getCoreNames()) new EmbeddedSolrConnector(instance, name).clear();
}
this.commit(false);
- this.solrInstances.clearCache();
+ this.solrInstances.clearCaches();
}
}
@@ -260,7 +260,7 @@ public final class Fulltext {
if (instance != null) {
for (String name: instance.getCoreNames()) new RemoteSolrConnector(instance, name).clear();
}
- this.solrInstances.clearCache();
+ this.solrInstances.clearCaches();
}
}
@@ -400,7 +400,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
- if (MemoryControl.shortStatus()) clearCache();
+ if (MemoryControl.shortStatus()) clearCaches();
}
public void putEdges(final Collection edges) throws IOException {
@@ -412,7 +412,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
- if (MemoryControl.shortStatus()) clearCache();
+ if (MemoryControl.shortStatus()) clearCaches();
}
/**
@@ -432,7 +432,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
- if (MemoryControl.shortStatus()) clearCache();
+ if (MemoryControl.shortStatus()) clearCaches();
}
/**
diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java
index df479736b..617d5269c 100644
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@@ -503,10 +503,10 @@ public class Segment {
}
}
- public void clearCache() {
+ public void clearCaches() {
if (this.urlCitationIndex != null) this.urlCitationIndex.clearCache();
if (this.termIndex != null) this.termIndex.clearCache();
- this.fulltext.clearCache();
+ this.fulltext.clearCaches();
}
public File getLocation() {
diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java
index 745bbb2ac..50861de59 100644
--- a/source/net/yacy/search/query/QueryGoal.java
+++ b/source/net/yacy/search/query/QueryGoal.java
@@ -242,7 +242,8 @@ public class QueryGoal {
// add filter to prevent that results come from failed urls
q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR ");
- q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif))");
+ q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR");
+ q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");
// parse special requests
if (isCatchall()) return q;