diff --git a/defaults/solr/solrconfig.xml b/defaults/solr/solrconfig.xml index 1234dd0d3..e0f895152 100644 --- a/defaults/solr/solrconfig.xml +++ b/defaults/solr/solrconfig.xml @@ -461,19 +461,21 @@ and old cache. --> + size="64" + initialSize="64" + autowarmCount="0" + cleanupThread="true"/> - + - + - + size="64" + autowarmCount="0" + showItems="32" + cleanupThread="true"/> diff --git a/defaults/yacy.init b/defaults/yacy.init index ae70b9162..25769d1a8 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -442,6 +442,7 @@ seedScpPath= peerCycle=2 # debug flags +debug.search.profiling=false debug.search.local.dht.off=false debug.search.local.solr.off=false debug.search.remote.dht.off=false diff --git a/htroot/ConfigHTCache_p.html b/htroot/ConfigHTCache_p.html index ae8c851ce..890ae9845 100644 --- a/htroot/ConfigHTCache_p.html +++ b/htroot/ConfigHTCache_p.html @@ -19,7 +19,7 @@
-
#[actualCacheSize]# MB
+
#[actualCacheSize]# MB for #[actualCacheDocCount]# files, #[docSizeAverage]# KB / file in average
MB
 
diff --git a/htroot/ConfigHTCache_p.java b/htroot/ConfigHTCache_p.java index 73141e65a..48d4df623 100644 --- a/htroot/ConfigHTCache_p.java +++ b/htroot/ConfigHTCache_p.java @@ -77,7 +77,9 @@ public class ConfigHTCache_p { } prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT)); - prop.put("actualCacheSize", (Cache.getActualCacheSize() / 1024 / 1024)); + prop.put("actualCacheSize", Cache.getActualCacheSize() / 1024 / 1024); + prop.put("actualCacheDocCount", Cache.getActualCacheDocCount()); + prop.put("docSizeAverage", Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024); prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64)); // return rewrite properties return prop; diff --git a/htroot/ContentAnalysis_p.java b/htroot/ContentAnalysis_p.java index 2ba573ab0..eed8455e7 100644 --- a/htroot/ContentAnalysis_p.java +++ b/htroot/ContentAnalysis_p.java @@ -34,7 +34,7 @@ public class ContentAnalysis_p { // clean up all search events SearchEventCache.cleanupEvents(true); - sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings + sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings if (post != null && post.containsKey("EnterDoublecheck")) { Ranking.setMinTokenLen(post.getInt("minTokenLen", 3)); diff --git a/htroot/RankingSolr_p.java b/htroot/RankingSolr_p.java index 04784f938..91e543a11 100644 --- a/htroot/RankingSolr_p.java +++ b/htroot/RankingSolr_p.java @@ -38,7 +38,7 @@ public class RankingSolr_p { // clean up all search events SearchEventCache.cleanupEvents(true); - sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings + sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings int profileNr = 0; if (post != null) profileNr = post.getInt("profileNr", profileNr); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 284a0b15e..b79c8061b 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -360,7 +360,7 @@ public class yacysearch { // check available memory and clean up if necessary if ( !MemoryControl.request(8000000L, false) ) { - indexSegment.clearCache(); + indexSegment.clearCaches(); SearchEventCache.cleanupEvents(false); } diff --git a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java index c96fe2d33..eaf93603c 100644 --- a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java @@ -61,7 +61,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo this.missCache = new ConcurrentARC(missCacheMax, partitions); } - public void clearCache() { + public void clearCaches() { this.hitCache.clear(); this.missCache.clear(); this.documentCache.clear(); @@ -70,9 +70,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo @Override public synchronized void close() { + this.clearCaches(); if (this.solr != null) this.solr.close(); this.solr = null; - this.clearCache(); } /** @@ -81,7 +81,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo */ @Override public void clear() throws IOException { - this.clearCache(); + this.clearCaches(); if (this.solr != null) this.solr.clear(); } @@ -119,7 +119,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo @Override public void deleteByQuery(final String querystring) throws IOException { - this.clearCache(); + this.clearCaches(); this.solr.deleteByQuery(querystring); } @@ -261,7 +261,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo } private void addToCache(SolrDocumentList list, boolean doccache) { - if (MemoryControl.shortStatus()) clearCache(); + if (MemoryControl.shortStatus()) clearCaches(); for (final SolrDocument solrdoc: list) { addToCache(solrdoc, doccache); } diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java index 792d921ad..ddbf550ec 100644 --- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java @@ -118,6 +118,12 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { ensureAliveUpdateHandler(); } + @Override + public void clearCaches() { + this.connector.clearCaches(); + this.idCache.clear(); + } + /** * used for debugging */ diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java index 533ecb080..7b5c104d5 100644 --- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java @@ -30,10 +30,12 @@ import java.util.concurrent.LinkedBlockingQueue; import net.yacy.cora.federate.solr.instance.EmbeddedInstance; import net.yacy.cora.federate.solr.instance.SolrInstance; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.search.Switchboard; import net.yacy.search.schema.CollectionSchema; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.Query; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; @@ -47,10 +49,14 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.SearchHandler; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.request.UnInvertedField; import org.apache.solr.response.ResultContext; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.QueryResultKey; +import org.apache.solr.search.SolrCache; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.RefCounted; @@ -88,6 +94,22 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo super.init(this.instance.getServer(coreName)); } + public void clearCaches() { + SolrConfig solrConfig = this.core.getSolrConfig(); + @SuppressWarnings("unchecked") + SolrCache fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null : solrConfig.fieldValueCacheConfig.newInstance(); + if (fieldValueCache != null) fieldValueCache.clear(); + @SuppressWarnings("unchecked") + SolrCache filterCache= solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance(); + if (filterCache != null) filterCache.clear(); + @SuppressWarnings("unchecked") + SolrCache queryResultCache = solrConfig.queryResultCacheConfig == null ? null : solrConfig.queryResultCacheConfig.newInstance(); + if (queryResultCache != null) queryResultCache.clear(); + @SuppressWarnings("unchecked") + SolrCache documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance(); + if (documentCache != null) documentCache.clear(); + } + public SolrInstance getInstance() { return this.instance; } @@ -224,6 +246,17 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo @Override public Set existsByIds(Set ids) { + boolean debug = Switchboard.getSwitchboard().getConfigBool("debug.search.profiling", false); + long debugSingleTime = 0; int debugSingleCount = 0; + if (debug) { + // run this also with single exist queries which might be faster (but we don't know, thats the reason we test that here) + long start = System.currentTimeMillis(); + Set idsr = new HashSet(); + for (String id: ids) if (existsById(id)) idsr.add(id); + debugSingleTime = System.currentTimeMillis() - start; + debugSingleCount = idsr.size(); + } + long start = System.currentTimeMillis(); if (ids == null || ids.size() == 0) return new HashSet(); if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet(); StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)" @@ -246,6 +279,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo } finally { docListSearcher.close(); } + long debugCollectionTime = System.currentTimeMillis() - start; + if (debug) { + ConcurrentLog.info("EmbeddedSolrConnector", "Comparisment of existsByIds: input=" + ids.size() + " records, output=" + idsr.size() + " records, singleTime=" + debugSingleTime + ", collectionTime=" + debugCollectionTime + ", singleCount=" + debugSingleCount + ", collectionCount=" + idsr.size()); + } // construct a new id list from that return idsr; } diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index c6d51e8ec..19fa604c5 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -53,6 +53,12 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo this.solr0 = solr0; this.solr1 = solr1; } + + @Override + public void clearCaches() { + if (this.solr0 != null) this.solr0.clearCaches(); + if (this.solr1 != null) this.solr1.clearCaches(); + } public boolean isConnected0() { return this.solr0 != null; diff --git a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java index 4e2a9369f..0ab5f8b31 100644 --- a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java @@ -71,6 +71,11 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn super.close(); } + @Override + public void clearCaches() { + // we do not have a direct access to the caches here, thus we simply do nothing. + } + @Override public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException { // during the solr query we set the thread name to the query string to get more debugging info in thread dumps @@ -134,4 +139,5 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn } System.exit(0); } + } diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java index 8fb31c531..f28d26f09 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java @@ -36,7 +36,12 @@ import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.ModifiableSolrParams; public interface SolrConnector extends Iterable /* Iterable of document IDs */ { - + + /** + * clear all caches: inside solr and ouside solr within the implementations of this interface + */ + public void clearCaches(); + /** * get the size of the index * @return number of results if solr is queries with a catch-all pattern diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java index f12d43950..aec6352f0 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java @@ -64,7 +64,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen public SolrServer getServer() { return this.server; } - + @Override public void commit(final boolean softCommit) { synchronized (this.server) { diff --git a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java index 6b9b7a939..1d49fd537 100644 --- a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java +++ b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java @@ -24,7 +24,6 @@ import java.util.Collection; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import net.yacy.cora.federate.solr.connector.CachedSolrConnector; import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.MirrorSolrConnector; @@ -161,9 +160,9 @@ public class InstanceMirror { return msc; } - public void clearCache() { + public void clearCaches() { for (SolrConnector csc: this.connectorCache.values()) { - if (csc instanceof CachedSolrConnector) ((CachedSolrConnector) csc).clearCache(); + csc.clearCaches(); } for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true); } diff --git a/source/net/yacy/crawler/data/Cache.java b/source/net/yacy/crawler/data/Cache.java index f1d72354f..9973f08a0 100644 --- a/source/net/yacy/crawler/data/Cache.java +++ b/source/net/yacy/crawler/data/Cache.java @@ -182,6 +182,14 @@ public final class Cache { public static long getActualCacheSize() { return fileDBunbuffered.length(); } + + /** + * get the current actual cache size + * @return + */ + public static long getActualCacheDocCount() { + return fileDBunbuffered.size(); + } /** * close the databases diff --git a/source/net/yacy/data/BookmarksDB.java b/source/net/yacy/data/BookmarksDB.java index 1c11b4b15..d9c0140a6 100644 --- a/source/net/yacy/data/BookmarksDB.java +++ b/source/net/yacy/data/BookmarksDB.java @@ -41,7 +41,10 @@ import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.order.NaturalOrder; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.cora.util.SpaceExceededException; import net.yacy.kelondro.blob.MapHeap; +import net.yacy.kelondro.data.meta.URIMetadataRow; +import net.yacy.kelondro.index.RowHandleSet; public class BookmarksDB { @@ -147,11 +150,6 @@ public class BookmarksDB { ConcurrentLog.logException(e); } } - public String addBookmark(final Bookmark bookmark){ - saveBookmark(bookmark); - return bookmark.getUrlHash(); - - } public Bookmark getBookmark(final String urlHash) throws IOException { try { @@ -214,18 +212,13 @@ public class BookmarksDB { final TreeSet set=new TreeSet(new bookmarkComparator(true)); final String tagHash=BookmarkHelper.tagHash(tagName); final Tag tag=getTag(tagHash); - Set hashes=new HashSet(); - if (tag != null) { - hashes=getTag(tagHash).getUrlHashes(); - } + RowHandleSet hashes = tag == null ? new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10) : tag.getUrlHashes(); if (priv) { - set.addAll(hashes); + for (byte[] hash: hashes) set.add(ASCII.String(hash)); } else { - final Iterator it=hashes.iterator(); - Bookmark bm; - while(it.hasNext()){ + for (byte[] hash: hashes) { try { - bm = getBookmark(it.next()); + Bookmark bm = getBookmark(ASCII.String(hash)); if (bm != null && bm.getPublic()) { set.add(bm.getUrlHash()); } @@ -249,7 +242,7 @@ public class BookmarksDB { * retrieve an object of type Tag from the the tagCache, if object is not cached return loadTag(hash) * @param hash an object of type String, containing a tagHash */ - public Tag getTag(final String hash){ + private Tag getTag(final String hash){ return this.tags.get(hash); //null if it does not exists } @@ -257,7 +250,7 @@ public class BookmarksDB { * store a Tag in tagsTable or remove an empty tag * @param tag an object of type Tag to be stored/removed */ - public void putTag(final Tag tag){ + private void putTag(final Tag tag){ if (tag == null) return; if (tag.isEmpty()) { this.tags.remove(tag.getTagHash()); @@ -266,7 +259,7 @@ public class BookmarksDB { } } - public void removeTag(final String hash) { + private void removeTag(final String hash) { this.tags.remove(hash); } @@ -301,7 +294,7 @@ public class BookmarksDB { return set.iterator(); } - public Iterator getTagIterator(final String tagName, final boolean priv, final int comp) { + private Iterator getTagIterator(final String tagName, final boolean priv, final int comp) { final TreeSet set=new TreeSet((comp == SORT_SIZE) ? tagSizeComparator : tagComparator); Iterator it=null; final Iterator bit=getBookmarksIterator(tagName, priv); @@ -347,14 +340,14 @@ public class BookmarksDB { final Tag oldTag=getTag(BookmarkHelper.tagHash(oldName)); if (oldTag != null) { - final Set urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag + final RowHandleSet urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag removeTag(BookmarkHelper.tagHash(oldName)); // remove oldHash from TagsDB Bookmark bookmark; Set tagSet = new TreeSet(String.CASE_INSENSITIVE_ORDER); - for (final String urlHash : urlHashes) { // looping through all bookmarks which were tagged with oldName + for (final byte[] urlHash : urlHashes) { // looping through all bookmarks which were tagged with oldName try { - bookmark = getBookmark(urlHash); + bookmark = getBookmark(ASCII.String(urlHash)); tagSet = bookmark.getTags(); tagSet.remove(oldName); bookmark.setTags(tagSet, true); // might not be needed, but doesn't hurt @@ -371,9 +364,9 @@ public class BookmarksDB { public void addTag(final String selectTag, final String newTag) { Bookmark bookmark; - for (final String urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) { // looping through all bookmarks which were tagged with selectTag + for (final byte[] urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) { // looping through all bookmarks which were tagged with selectTag try { - bookmark = getBookmark(urlHash); + bookmark = getBookmark(ASCII.String(urlHash)); bookmark.addTag(newTag); saveBookmark(bookmark); } catch (final IOException e) { @@ -389,51 +382,24 @@ public class BookmarksDB { * Subclass of bookmarksDB, which provides the Tag object-type */ public class Tag { - public static final String URL_HASHES = "urlHashes"; - public static final String TAG_NAME = "tagName"; private final String tagHash; - private final Map mem; - private Set urlHashes; - - public Tag(final String hash, final Map map){ - this.tagHash = hash; - this.mem = map; - if (this.mem.containsKey(URL_HASHES)) { - this.urlHashes = ListManager.string2set(this.mem.get(URL_HASHES)); - } else { - this.urlHashes = new HashSet(); - } - } + private final String tagName; + private RowHandleSet urlHashes; - public Tag(final String name, final HashSet entries){ + private Tag(final String name) { this.tagHash = BookmarkHelper.tagHash(name); - this.mem = new HashMap(); - //mem.put(URL_HASHES, listManager.arraylist2string(entries)); - this.urlHashes = entries; - this.mem.put(TAG_NAME, name); - } - - public Tag(final String name){ - this(name, new HashSet()); - } - - public Map getMap(){ - this.mem.put(URL_HASHES, ListManager.collection2string(this.urlHashes)); - return this.mem; + this.tagName = name; + this.urlHashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10); } /** * get the lowercase Tagname */ public String getTagName(){ - /*if(this.mem.containsKey(TAG_NAME)){ - return (String) this.mem.get(TAG_NAME); - } - return "";*/ return getFriendlyName().toLowerCase(); } - public String getTagHash(){ + private String getTagHash(){ return this.tagHash; } @@ -441,37 +407,33 @@ public class BookmarksDB { * @return the tag name, with all uppercase chars */ public String getFriendlyName(){ - /*if(this.mem.containsKey(TAG_FRIENDLY_NAME)){ - return (String) this.mem.get(TAG_FRIENDLY_NAME); - } - return getTagName();*/ - if(this.mem.containsKey(TAG_NAME)){ - return this.mem.get(TAG_NAME); - } - return "notagname"; + return this.tagName; } - public Set getUrlHashes(){ + private RowHandleSet getUrlHashes(){ return this.urlHashes; } - public boolean hasPublicItems(){ + private boolean hasPublicItems(){ return getBookmarksIterator(getTagName(), false).hasNext(); } - public void addUrl(final String urlHash){ - this.urlHashes.add(urlHash); + private void addUrl(final String urlHash){ + try { + this.urlHashes.put(ASCII.getBytes(urlHash)); + } catch (SpaceExceededException e) { + } } - public void delete(final String urlHash){ - this.urlHashes.remove(urlHash); + private void delete(final String urlHash){ + this.urlHashes.remove(ASCII.getBytes(urlHash)); } public int size(){ return this.urlHashes.size(); } - public boolean isEmpty() { + private boolean isEmpty() { return this.urlHashes.isEmpty(); } } @@ -481,27 +443,19 @@ public class BookmarksDB { */ public class Bookmark { - public static final String BOOKMARK_URL = "bookmarkUrl"; + private static final String BOOKMARK_URL = "bookmarkUrl"; public static final String BOOKMARK_TITLE = "bookmarkTitle"; public static final String BOOKMARK_DESCRIPTION = "bookmarkDesc"; - public static final String BOOKMARK_TAGS = "bookmarkTags"; - public static final String BOOKMARK_PUBLIC = "bookmarkPublic"; - public static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp"; - public static final String BOOKMARK_OWNER = "bookmarkOwner"; - public static final String BOOKMARK_IS_FEED = "bookmarkIsFeed"; + private static final String BOOKMARK_TAGS = "bookmarkTags"; + private static final String BOOKMARK_PUBLIC = "bookmarkPublic"; + private static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp"; + private static final String BOOKMARK_OWNER = "bookmarkOwner"; + private static final String BOOKMARK_IS_FEED = "bookmarkIsFeed"; private final String urlHash; private Set tagNames; private long timestamp; private final Map entry; - public Bookmark(final String urlHash, final Map map) { - this.entry = map; - this.urlHash = urlHash; - this.tagNames = new TreeSet(String.CASE_INSENSITIVE_ORDER); - if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS))); - loadTimestamp(); - } - public Bookmark(final DigestURL url) { this.entry = new HashMap(); this.urlHash = ASCII.String(url.hash()); @@ -529,11 +483,15 @@ public class BookmarksDB { this(new DigestURL((url.indexOf("://") < 0) ? "http://" + url : url)); } - public Bookmark(final Map map) throws MalformedURLException { - this(ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash()), map); + private Bookmark(final Map map) throws MalformedURLException { + this.entry = map; + this.urlHash = ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash()); + this.tagNames = new TreeSet(String.CASE_INSENSITIVE_ORDER); + if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS))); + loadTimestamp(); } - Map toMap() { + private Map toMap() { this.entry.put(BOOKMARK_TAGS, ListManager.collection2string(this.tagNames)); this.entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp)); return this.entry; @@ -688,11 +646,11 @@ public class BookmarksDB { /** * Subclass of bookmarksDB, which provides the bookmarkIterator object-type */ - public class bookmarkIterator implements Iterator { + private class bookmarkIterator implements Iterator { Iterator bookmarkIter; - public bookmarkIterator(final boolean up) throws IOException { + private bookmarkIterator(final boolean up) throws IOException { //flushBookmarkCache(); //XXX: this will cost performance this.bookmarkIter = BookmarksDB.this.bookmarks.keys(up, false); //this.nextEntry = null; @@ -722,14 +680,14 @@ public class BookmarksDB { /** * Comparator to sort objects of type Bookmark according to their timestamps */ - public class bookmarkComparator implements Comparator { + private class bookmarkComparator implements Comparator { private final boolean newestFirst; /** * @param newestFirst newest first, or oldest first? */ - public bookmarkComparator(final boolean newestFirst){ + private bookmarkComparator(final boolean newestFirst){ this.newestFirst = newestFirst; } @@ -752,13 +710,13 @@ public class BookmarksDB { } } - public static final TagComparator tagComparator = new TagComparator(); - public static final TagSizeComparator tagSizeComparator = new TagSizeComparator(); + private static final TagComparator tagComparator = new TagComparator(); + private static final TagSizeComparator tagSizeComparator = new TagSizeComparator(); /** * Comparator to sort objects of type Tag according to their names */ - public static class TagComparator implements Comparator, Serializable { + private static class TagComparator implements Comparator, Serializable { /** * generated serial @@ -772,7 +730,7 @@ public class BookmarksDB { } - public static class TagSizeComparator implements Comparator, Serializable { + private static class TagSizeComparator implements Comparator, Serializable { /** * generated serial diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java index 72181ca7a..d74114180 100644 --- a/source/net/yacy/document/parser/pdfParser.java +++ b/source/net/yacy/document/parser/pdfParser.java @@ -32,27 +32,15 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.Method; import java.util.Date; -import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.exceptions.CryptographyException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException; import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; -import org.apache.pdfbox.pdmodel.font.PDCIDFont; -import org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font; -import org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font; -import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.font.PDMMType1Font; -import org.apache.pdfbox.pdmodel.font.PDSimpleFont; -import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont; -import org.apache.pdfbox.pdmodel.font.PDType0Font; -import org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont; -import org.apache.pdfbox.pdmodel.font.PDType1CFont; -import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.util.PDFTextStripper; import net.yacy.cora.document.id.AnchorURL; @@ -222,25 +210,54 @@ public class pdfParser extends AbstractParser implements Parser { false, docDate)}; } - - @SuppressWarnings("static-access") + public static void clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes() { // thank you very much, PDFParser hackers, this font cache will occupy >80MB RAM for a single pdf and then stays forever // AND I DO NOT EVEN NEED A FONT HERE TO PARSE THE TEXT! // Don't be so ignorant, just google once "PDFParser OutOfMemoryError" to feel the pain. - PDFont.clearResources(); - COSName.clearResources(); - PDType1Font.clearResources(); - PDTrueTypeFont.clearResources(); - PDType0Font.clearResources(); - PDType1AfmPfbFont.clearResources(); - PDType3Font.clearResources(); - PDType1CFont.clearResources(); - PDCIDFont.clearResources(); - PDCIDFontType0Font.clearResources(); - PDCIDFontType2Font.clearResources(); - PDMMType1Font.clearResources(); - PDSimpleFont.clearResources(); + ResourceCleaner cl = new ResourceCleaner(); + cl.clearClassResources("org.apache.pdfbox.cos.COSName"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDFont"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1Font"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDTrueTypeFont"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType0Font"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType3Font"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1CFont"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFont"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDMMType1Font"); + cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDSimpleFont"); + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private static class ResourceCleaner { + Method findLoadedClass; + private ClassLoader sys; + public ResourceCleaner() { + try { + this.findLoadedClass = ClassLoader.class.getDeclaredMethod("findLoadedClass", new Class[] { String.class }); + this.findLoadedClass.setAccessible(true); + this.sys = ClassLoader.getSystemClassLoader(); + } catch (Throwable e) { + e.printStackTrace(); + this.findLoadedClass = null; + this.sys = null; + } + } + public void clearClassResources(String name) { + if (this.findLoadedClass == null) return; + try { + Object pdfparserpainclass = this.findLoadedClass.invoke(this.sys, name); + if (pdfparserpainclass != null) { + Method clearResources = ((Class) pdfparserpainclass).getDeclaredMethod("clearResources", new Class[] {}); + if (clearResources != null) clearResources.invoke(null); + } + } catch (Throwable e) { + e.printStackTrace(); + } + } } /** diff --git a/source/net/yacy/search/ResourceObserver.java b/source/net/yacy/search/ResourceObserver.java index 9cc6a58e7..32e8d2396 100644 --- a/source/net/yacy/search/ResourceObserver.java +++ b/source/net/yacy/search/ResourceObserver.java @@ -129,7 +129,7 @@ public class ResourceObserver { if(MemoryControl.properState()) return Space.HIGH; // clear some caches - @all: are there more of these, we could clear here? - this.sb.index.clearCache(); + this.sb.index.clearCaches(); SearchEventCache.cleanupEvents(true); this.sb.trail.clear(); Switchboard.urlBlacklist.clearblacklistCache(); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index c22083579..15ed4e3c7 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2031,7 +2031,7 @@ public final class Switchboard extends serverSwitch { // clear caches if necessary if ( !MemoryControl.request(128000000L, false) ) { - this.index.clearCache(); + this.index.clearCaches(); SearchEventCache.cleanupEvents(false); this.trail.clear(); GuiHandler.clear(); diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index c74eed18a..cc127ecbe 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -225,10 +225,10 @@ public final class Fulltext { } } - public void clearCache() { + public void clearCaches() { if (this.urlIndexFile != null && this.urlIndexFile instanceof Cache) ((Cache) this.urlIndexFile).clearCache(); if (this.statsDump != null) this.statsDump.clear(); - this.solrInstances.clearCache(); + this.solrInstances.clearCaches(); this.statsDump = null; } @@ -250,7 +250,7 @@ public final class Fulltext { for (String name: instance.getCoreNames()) new EmbeddedSolrConnector(instance, name).clear(); } this.commit(false); - this.solrInstances.clearCache(); + this.solrInstances.clearCaches(); } } @@ -260,7 +260,7 @@ public final class Fulltext { if (instance != null) { for (String name: instance.getCoreNames()) new RemoteSolrConnector(instance, name).clear(); } - this.solrInstances.clearCache(); + this.solrInstances.clearCaches(); } } @@ -400,7 +400,7 @@ public final class Fulltext { throw new IOException(e.getMessage(), e); } this.statsDump = null; - if (MemoryControl.shortStatus()) clearCache(); + if (MemoryControl.shortStatus()) clearCaches(); } public void putEdges(final Collection edges) throws IOException { @@ -412,7 +412,7 @@ public final class Fulltext { throw new IOException(e.getMessage(), e); } this.statsDump = null; - if (MemoryControl.shortStatus()) clearCache(); + if (MemoryControl.shortStatus()) clearCaches(); } /** @@ -432,7 +432,7 @@ public final class Fulltext { throw new IOException(e.getMessage(), e); } this.statsDump = null; - if (MemoryControl.shortStatus()) clearCache(); + if (MemoryControl.shortStatus()) clearCaches(); } /** diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index df479736b..617d5269c 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -503,10 +503,10 @@ public class Segment { } } - public void clearCache() { + public void clearCaches() { if (this.urlCitationIndex != null) this.urlCitationIndex.clearCache(); if (this.termIndex != null) this.termIndex.clearCache(); - this.fulltext.clearCache(); + this.fulltext.clearCaches(); } public File getLocation() { diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java index 745bbb2ac..50861de59 100644 --- a/source/net/yacy/search/query/QueryGoal.java +++ b/source/net/yacy/search/query/QueryGoal.java @@ -242,7 +242,8 @@ public class QueryGoal { // add filter to prevent that results come from failed urls q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND ("); q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR "); - q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif))"); + q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR"); + q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))"); // parse special requests if (isCatchall()) return q;