Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
orbiter 11 years ago
commit 4234b0ed6c

@ -461,19 +461,21 @@
and old cache. and old cache.
--> -->
<filterCache class="solr.FastLRUCache" <filterCache class="solr.FastLRUCache"
size="512" size="64"
initialSize="512" initialSize="64"
autowarmCount="0"/> autowarmCount="0"
cleanupThread="true"/>
<!-- Query Result Cache <!-- Query Result Cache
Caches results of searches - ordered lists of document ids Caches results of searches - ordered lists of document ids
(DocList) based on a query, a sort, and the range of documents requested. (DocList) based on a query, a sort, and the range of documents requested.
--> -->
<queryResultCache class="solr.LRUCache" <queryResultCache class="solr.FastLRUCache"
size="512" size="64"
initialSize="512" initialSize="64"
autowarmCount="0"/> autowarmCount="0"
cleanupThread="true"/>
<!-- Document Cache <!-- Document Cache
@ -481,10 +483,11 @@
document). Since Lucene internal document ids are transient, document). Since Lucene internal document ids are transient,
this cache will not be autowarmed. this cache will not be autowarmed.
--> -->
<documentCache class="solr.LRUCache" <documentCache class="solr.FastLRUCache"
size="512" size="64"
initialSize="512" initialSize="64"
autowarmCount="0"/> autowarmCount="0"
cleanupThread="true"/>
<!-- Field Value Cache <!-- Field Value Cache
@ -492,12 +495,11 @@
by document id. The fieldValueCache is created by default by document id. The fieldValueCache is created by default
even if not configured here. even if not configured here.
--> -->
<!--
<fieldValueCache class="solr.FastLRUCache" <fieldValueCache class="solr.FastLRUCache"
size="512" size="64"
autowarmCount="128" autowarmCount="0"
showItems="32" /> showItems="32"
--> cleanupThread="true"/>
<!-- Custom Cache <!-- Custom Cache
@ -510,11 +512,12 @@
--> -->
<!-- <!--
<cache name="myUserCache" <cache name="myUserCache"
class="solr.LRUCache" class="solr.FastLRUCache"
size="4096" size="64"
initialSize="1024" initialSize="64"
autowarmCount="1024" autowarmCount="0"
regenerator="com.mycompany.MyRegenerator" regenerator="com.mycompany.MyRegenerator"
cleanupThread="true"
/> />
--> -->

@ -442,6 +442,7 @@ seedScpPath=
peerCycle=2 peerCycle=2
# debug flags # debug flags
debug.search.profiling=false
debug.search.local.dht.off=false debug.search.local.dht.off=false
debug.search.local.solr.off=false debug.search.local.solr.off=false
debug.search.remote.dht.off=false debug.search.remote.dht.off=false

@ -19,7 +19,7 @@
<dt><label for="HTCachePath">The path where the cache is stored</label></dt> <dt><label for="HTCachePath">The path where the cache is stored</label></dt>
<dd><input name="HTCachePath" id="HTCachePath" type="text" size="20" maxlength="300" value="#[HTCachePath]#" /></dd> <dd><input name="HTCachePath" id="HTCachePath" type="text" size="20" maxlength="300" value="#[HTCachePath]#" /></dd>
<dt><label for="actualCacheSize">The current size of the cache</label></dt> <dt><label for="actualCacheSize">The current size of the cache</label></dt>
<dd><span id="actualCacheSize">#[actualCacheSize]# MB</span></dd> <dd><span id="actualCacheSize">#[actualCacheSize]# MB for #[actualCacheDocCount]# files, #[docSizeAverage]# KB / file in average </span></dd>
<dt><label for="maxCacheSize">The maximum size of the cache</label></dt> <dt><label for="maxCacheSize">The maximum size of the cache</label></dt>
<dd><input name="maxCacheSize" id="maxCacheSize" type="text" size="8" maxlength="24" value="#[maxCacheSize]#" /> MB</dd> <dd><input name="maxCacheSize" id="maxCacheSize" type="text" size="8" maxlength="24" value="#[maxCacheSize]#" /> MB</dd>
<dt>&nbsp;</dt> <dt>&nbsp;</dt>

@ -77,7 +77,9 @@ public class ConfigHTCache_p {
} }
prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT)); prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT));
prop.put("actualCacheSize", (Cache.getActualCacheSize() / 1024 / 1024)); prop.put("actualCacheSize", Cache.getActualCacheSize() / 1024 / 1024);
prop.put("actualCacheDocCount", Cache.getActualCacheDocCount());
prop.put("docSizeAverage", Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024);
prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64)); prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64));
// return rewrite properties // return rewrite properties
return prop; return prop;

@ -34,7 +34,7 @@ public class ContentAnalysis_p {
// clean up all search events // clean up all search events
SearchEventCache.cleanupEvents(true); SearchEventCache.cleanupEvents(true);
sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
if (post != null && post.containsKey("EnterDoublecheck")) { if (post != null && post.containsKey("EnterDoublecheck")) {
Ranking.setMinTokenLen(post.getInt("minTokenLen", 3)); Ranking.setMinTokenLen(post.getInt("minTokenLen", 3));

@ -38,7 +38,7 @@ public class RankingSolr_p {
// clean up all search events // clean up all search events
SearchEventCache.cleanupEvents(true); SearchEventCache.cleanupEvents(true);
sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
int profileNr = 0; int profileNr = 0;
if (post != null) profileNr = post.getInt("profileNr", profileNr); if (post != null) profileNr = post.getInt("profileNr", profileNr);

@ -360,7 +360,7 @@ public class yacysearch {
// check available memory and clean up if necessary // check available memory and clean up if necessary
if ( !MemoryControl.request(8000000L, false) ) { if ( !MemoryControl.request(8000000L, false) ) {
indexSegment.clearCache(); indexSegment.clearCaches();
SearchEventCache.cleanupEvents(false); SearchEventCache.cleanupEvents(false);
} }

@ -61,7 +61,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions); this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
} }
public void clearCache() { public void clearCaches() {
this.hitCache.clear(); this.hitCache.clear();
this.missCache.clear(); this.missCache.clear();
this.documentCache.clear(); this.documentCache.clear();
@ -70,9 +70,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override @Override
public synchronized void close() { public synchronized void close() {
this.clearCaches();
if (this.solr != null) this.solr.close(); if (this.solr != null) this.solr.close();
this.solr = null; this.solr = null;
this.clearCache();
} }
/** /**
@ -81,7 +81,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
*/ */
@Override @Override
public void clear() throws IOException { public void clear() throws IOException {
this.clearCache(); this.clearCaches();
if (this.solr != null) this.solr.clear(); if (this.solr != null) this.solr.clear();
} }
@ -119,7 +119,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override @Override
public void deleteByQuery(final String querystring) throws IOException { public void deleteByQuery(final String querystring) throws IOException {
this.clearCache(); this.clearCaches();
this.solr.deleteByQuery(querystring); this.solr.deleteByQuery(querystring);
} }
@ -261,7 +261,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
} }
private void addToCache(SolrDocumentList list, boolean doccache) { private void addToCache(SolrDocumentList list, boolean doccache) {
if (MemoryControl.shortStatus()) clearCache(); if (MemoryControl.shortStatus()) clearCaches();
for (final SolrDocument solrdoc: list) { for (final SolrDocument solrdoc: list) {
addToCache(solrdoc, doccache); addToCache(solrdoc, doccache);
} }

@ -118,6 +118,12 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
ensureAliveUpdateHandler(); ensureAliveUpdateHandler();
} }
@Override
public void clearCaches() {
this.connector.clearCaches();
this.idCache.clear();
}
/** /**
* used for debugging * used for debugging
*/ */

@ -30,10 +30,12 @@ import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.federate.solr.instance.EmbeddedInstance; import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
import net.yacy.cora.federate.solr.instance.SolrInstance; import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.Query;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
@ -47,10 +49,14 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchHandler; import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.UnInvertedField;
import org.apache.solr.response.ResultContext; import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList; import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QueryResultKey;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
@ -88,6 +94,22 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
super.init(this.instance.getServer(coreName)); super.init(this.instance.getServer(coreName));
} }
public void clearCaches() {
SolrConfig solrConfig = this.core.getSolrConfig();
@SuppressWarnings("unchecked")
SolrCache<String, UnInvertedField> fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null : solrConfig.fieldValueCacheConfig.newInstance();
if (fieldValueCache != null) fieldValueCache.clear();
@SuppressWarnings("unchecked")
SolrCache<Query, DocSet> filterCache= solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance();
if (filterCache != null) filterCache.clear();
@SuppressWarnings("unchecked")
SolrCache<QueryResultKey, DocList> queryResultCache = solrConfig.queryResultCacheConfig == null ? null : solrConfig.queryResultCacheConfig.newInstance();
if (queryResultCache != null) queryResultCache.clear();
@SuppressWarnings("unchecked")
SolrCache<Integer, Document> documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance();
if (documentCache != null) documentCache.clear();
}
public SolrInstance getInstance() { public SolrInstance getInstance() {
return this.instance; return this.instance;
} }
@ -224,6 +246,17 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
@Override @Override
public Set<String> existsByIds(Set<String> ids) { public Set<String> existsByIds(Set<String> ids) {
boolean debug = Switchboard.getSwitchboard().getConfigBool("debug.search.profiling", false);
long debugSingleTime = 0; int debugSingleCount = 0;
if (debug) {
// run this also with single exist queries which might be faster (but we don't know, thats the reason we test that here)
long start = System.currentTimeMillis();
Set <String> idsr = new HashSet<String>();
for (String id: ids) if (existsById(id)) idsr.add(id);
debugSingleTime = System.currentTimeMillis() - start;
debugSingleCount = idsr.size();
}
long start = System.currentTimeMillis();
if (ids == null || ids.size() == 0) return new HashSet<String>(); if (ids == null || ids.size() == 0) return new HashSet<String>();
if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>(); if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>();
StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)" StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
@ -246,6 +279,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
} finally { } finally {
docListSearcher.close(); docListSearcher.close();
} }
long debugCollectionTime = System.currentTimeMillis() - start;
if (debug) {
ConcurrentLog.info("EmbeddedSolrConnector", "Comparisment of existsByIds: input=" + ids.size() + " records, output=" + idsr.size() + " records, singleTime=" + debugSingleTime + ", collectionTime=" + debugCollectionTime + ", singleCount=" + debugSingleCount + ", collectionCount=" + idsr.size());
}
// construct a new id list from that // construct a new id list from that
return idsr; return idsr;
} }

@ -53,6 +53,12 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
this.solr0 = solr0; this.solr0 = solr0;
this.solr1 = solr1; this.solr1 = solr1;
} }
@Override
public void clearCaches() {
if (this.solr0 != null) this.solr0.clearCaches();
if (this.solr1 != null) this.solr1.clearCaches();
}
public boolean isConnected0() { public boolean isConnected0() {
return this.solr0 != null; return this.solr0 != null;

@ -71,6 +71,11 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
super.close(); super.close();
} }
@Override
public void clearCaches() {
// we do not have a direct access to the caches here, thus we simply do nothing.
}
@Override @Override
public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException { public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException {
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps // during the solr query we set the thread name to the query string to get more debugging info in thread dumps
@ -134,4 +139,5 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
} }
System.exit(0); System.exit(0);
} }
} }

@ -36,7 +36,12 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ { public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ {
/**
* clear all caches: inside solr and ouside solr within the implementations of this interface
*/
public void clearCaches();
/** /**
* get the size of the index * get the size of the index
* @return number of results if solr is queries with a catch-all pattern * @return number of results if solr is queries with a catch-all pattern

@ -64,7 +64,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public SolrServer getServer() { public SolrServer getServer() {
return this.server; return this.server;
} }
@Override @Override
public void commit(final boolean softCommit) { public void commit(final boolean softCommit) {
synchronized (this.server) { synchronized (this.server) {

@ -24,7 +24,6 @@ import java.util.Collection;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.federate.solr.connector.CachedSolrConnector;
import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector; import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector; import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
@ -161,9 +160,9 @@ public class InstanceMirror {
return msc; return msc;
} }
public void clearCache() { public void clearCaches() {
for (SolrConnector csc: this.connectorCache.values()) { for (SolrConnector csc: this.connectorCache.values()) {
if (csc instanceof CachedSolrConnector) ((CachedSolrConnector) csc).clearCache(); csc.clearCaches();
} }
for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true); for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true);
} }

@ -182,6 +182,14 @@ public final class Cache {
public static long getActualCacheSize() { public static long getActualCacheSize() {
return fileDBunbuffered.length(); return fileDBunbuffered.length();
} }
/**
* get the current actual cache size
* @return
*/
public static long getActualCacheDocCount() {
return fileDBunbuffered.size();
}
/** /**
* close the databases * close the databases

@ -41,7 +41,10 @@ import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.order.NaturalOrder; import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.index.RowHandleSet;
public class BookmarksDB { public class BookmarksDB {
@ -147,11 +150,6 @@ public class BookmarksDB {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
} }
public String addBookmark(final Bookmark bookmark){
saveBookmark(bookmark);
return bookmark.getUrlHash();
}
public Bookmark getBookmark(final String urlHash) throws IOException { public Bookmark getBookmark(final String urlHash) throws IOException {
try { try {
@ -214,18 +212,13 @@ public class BookmarksDB {
final TreeSet<String> set=new TreeSet<String>(new bookmarkComparator(true)); final TreeSet<String> set=new TreeSet<String>(new bookmarkComparator(true));
final String tagHash=BookmarkHelper.tagHash(tagName); final String tagHash=BookmarkHelper.tagHash(tagName);
final Tag tag=getTag(tagHash); final Tag tag=getTag(tagHash);
Set<String> hashes=new HashSet<String>(); RowHandleSet hashes = tag == null ? new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10) : tag.getUrlHashes();
if (tag != null) {
hashes=getTag(tagHash).getUrlHashes();
}
if (priv) { if (priv) {
set.addAll(hashes); for (byte[] hash: hashes) set.add(ASCII.String(hash));
} else { } else {
final Iterator<String> it=hashes.iterator(); for (byte[] hash: hashes) {
Bookmark bm;
while(it.hasNext()){
try { try {
bm = getBookmark(it.next()); Bookmark bm = getBookmark(ASCII.String(hash));
if (bm != null && bm.getPublic()) { if (bm != null && bm.getPublic()) {
set.add(bm.getUrlHash()); set.add(bm.getUrlHash());
} }
@ -249,7 +242,7 @@ public class BookmarksDB {
* retrieve an object of type Tag from the the tagCache, if object is not cached return loadTag(hash) * retrieve an object of type Tag from the the tagCache, if object is not cached return loadTag(hash)
* @param hash an object of type String, containing a tagHash * @param hash an object of type String, containing a tagHash
*/ */
public Tag getTag(final String hash){ private Tag getTag(final String hash){
return this.tags.get(hash); //null if it does not exists return this.tags.get(hash); //null if it does not exists
} }
@ -257,7 +250,7 @@ public class BookmarksDB {
* store a Tag in tagsTable or remove an empty tag * store a Tag in tagsTable or remove an empty tag
* @param tag an object of type Tag to be stored/removed * @param tag an object of type Tag to be stored/removed
*/ */
public void putTag(final Tag tag){ private void putTag(final Tag tag){
if (tag == null) return; if (tag == null) return;
if (tag.isEmpty()) { if (tag.isEmpty()) {
this.tags.remove(tag.getTagHash()); this.tags.remove(tag.getTagHash());
@ -266,7 +259,7 @@ public class BookmarksDB {
} }
} }
public void removeTag(final String hash) { private void removeTag(final String hash) {
this.tags.remove(hash); this.tags.remove(hash);
} }
@ -301,7 +294,7 @@ public class BookmarksDB {
return set.iterator(); return set.iterator();
} }
public Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp) { private Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp) {
final TreeSet<Tag> set=new TreeSet<Tag>((comp == SORT_SIZE) ? tagSizeComparator : tagComparator); final TreeSet<Tag> set=new TreeSet<Tag>((comp == SORT_SIZE) ? tagSizeComparator : tagComparator);
Iterator<String> it=null; Iterator<String> it=null;
final Iterator<String> bit=getBookmarksIterator(tagName, priv); final Iterator<String> bit=getBookmarksIterator(tagName, priv);
@ -347,14 +340,14 @@ public class BookmarksDB {
final Tag oldTag=getTag(BookmarkHelper.tagHash(oldName)); final Tag oldTag=getTag(BookmarkHelper.tagHash(oldName));
if (oldTag != null) { if (oldTag != null) {
final Set<String> urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag final RowHandleSet urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag
removeTag(BookmarkHelper.tagHash(oldName)); // remove oldHash from TagsDB removeTag(BookmarkHelper.tagHash(oldName)); // remove oldHash from TagsDB
Bookmark bookmark; Bookmark bookmark;
Set<String> tagSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER); Set<String> tagSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
for (final String urlHash : urlHashes) { // looping through all bookmarks which were tagged with oldName for (final byte[] urlHash : urlHashes) { // looping through all bookmarks which were tagged with oldName
try { try {
bookmark = getBookmark(urlHash); bookmark = getBookmark(ASCII.String(urlHash));
tagSet = bookmark.getTags(); tagSet = bookmark.getTags();
tagSet.remove(oldName); tagSet.remove(oldName);
bookmark.setTags(tagSet, true); // might not be needed, but doesn't hurt bookmark.setTags(tagSet, true); // might not be needed, but doesn't hurt
@ -371,9 +364,9 @@ public class BookmarksDB {
public void addTag(final String selectTag, final String newTag) { public void addTag(final String selectTag, final String newTag) {
Bookmark bookmark; Bookmark bookmark;
for (final String urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) { // looping through all bookmarks which were tagged with selectTag for (final byte[] urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) { // looping through all bookmarks which were tagged with selectTag
try { try {
bookmark = getBookmark(urlHash); bookmark = getBookmark(ASCII.String(urlHash));
bookmark.addTag(newTag); bookmark.addTag(newTag);
saveBookmark(bookmark); saveBookmark(bookmark);
} catch (final IOException e) { } catch (final IOException e) {
@ -389,51 +382,24 @@ public class BookmarksDB {
* Subclass of bookmarksDB, which provides the Tag object-type * Subclass of bookmarksDB, which provides the Tag object-type
*/ */
public class Tag { public class Tag {
public static final String URL_HASHES = "urlHashes";
public static final String TAG_NAME = "tagName";
private final String tagHash; private final String tagHash;
private final Map<String, String> mem; private final String tagName;
private Set<String> urlHashes; private RowHandleSet urlHashes;
public Tag(final String hash, final Map<String, String> map){
this.tagHash = hash;
this.mem = map;
if (this.mem.containsKey(URL_HASHES)) {
this.urlHashes = ListManager.string2set(this.mem.get(URL_HASHES));
} else {
this.urlHashes = new HashSet<String>();
}
}
public Tag(final String name, final HashSet<String> entries){ private Tag(final String name) {
this.tagHash = BookmarkHelper.tagHash(name); this.tagHash = BookmarkHelper.tagHash(name);
this.mem = new HashMap<String, String>(); this.tagName = name;
//mem.put(URL_HASHES, listManager.arraylist2string(entries)); this.urlHashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10);
this.urlHashes = entries;
this.mem.put(TAG_NAME, name);
}
public Tag(final String name){
this(name, new HashSet<String>());
}
public Map<String, String> getMap(){
this.mem.put(URL_HASHES, ListManager.collection2string(this.urlHashes));
return this.mem;
} }
/** /**
* get the lowercase Tagname * get the lowercase Tagname
*/ */
public String getTagName(){ public String getTagName(){
/*if(this.mem.containsKey(TAG_NAME)){
return (String) this.mem.get(TAG_NAME);
}
return "";*/
return getFriendlyName().toLowerCase(); return getFriendlyName().toLowerCase();
} }
public String getTagHash(){ private String getTagHash(){
return this.tagHash; return this.tagHash;
} }
@ -441,37 +407,33 @@ public class BookmarksDB {
* @return the tag name, with all uppercase chars * @return the tag name, with all uppercase chars
*/ */
public String getFriendlyName(){ public String getFriendlyName(){
/*if(this.mem.containsKey(TAG_FRIENDLY_NAME)){ return this.tagName;
return (String) this.mem.get(TAG_FRIENDLY_NAME);
}
return getTagName();*/
if(this.mem.containsKey(TAG_NAME)){
return this.mem.get(TAG_NAME);
}
return "notagname";
} }
public Set<String> getUrlHashes(){ private RowHandleSet getUrlHashes(){
return this.urlHashes; return this.urlHashes;
} }
public boolean hasPublicItems(){ private boolean hasPublicItems(){
return getBookmarksIterator(getTagName(), false).hasNext(); return getBookmarksIterator(getTagName(), false).hasNext();
} }
public void addUrl(final String urlHash){ private void addUrl(final String urlHash){
this.urlHashes.add(urlHash); try {
this.urlHashes.put(ASCII.getBytes(urlHash));
} catch (SpaceExceededException e) {
}
} }
public void delete(final String urlHash){ private void delete(final String urlHash){
this.urlHashes.remove(urlHash); this.urlHashes.remove(ASCII.getBytes(urlHash));
} }
public int size(){ public int size(){
return this.urlHashes.size(); return this.urlHashes.size();
} }
public boolean isEmpty() { private boolean isEmpty() {
return this.urlHashes.isEmpty(); return this.urlHashes.isEmpty();
} }
} }
@ -481,27 +443,19 @@ public class BookmarksDB {
*/ */
public class Bookmark { public class Bookmark {
public static final String BOOKMARK_URL = "bookmarkUrl"; private static final String BOOKMARK_URL = "bookmarkUrl";
public static final String BOOKMARK_TITLE = "bookmarkTitle"; public static final String BOOKMARK_TITLE = "bookmarkTitle";
public static final String BOOKMARK_DESCRIPTION = "bookmarkDesc"; public static final String BOOKMARK_DESCRIPTION = "bookmarkDesc";
public static final String BOOKMARK_TAGS = "bookmarkTags"; private static final String BOOKMARK_TAGS = "bookmarkTags";
public static final String BOOKMARK_PUBLIC = "bookmarkPublic"; private static final String BOOKMARK_PUBLIC = "bookmarkPublic";
public static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp"; private static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
public static final String BOOKMARK_OWNER = "bookmarkOwner"; private static final String BOOKMARK_OWNER = "bookmarkOwner";
public static final String BOOKMARK_IS_FEED = "bookmarkIsFeed"; private static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
private final String urlHash; private final String urlHash;
private Set<String> tagNames; private Set<String> tagNames;
private long timestamp; private long timestamp;
private final Map<String, String> entry; private final Map<String, String> entry;
public Bookmark(final String urlHash, final Map<String, String> map) {
this.entry = map;
this.urlHash = urlHash;
this.tagNames = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
loadTimestamp();
}
public Bookmark(final DigestURL url) { public Bookmark(final DigestURL url) {
this.entry = new HashMap<String, String>(); this.entry = new HashMap<String, String>();
this.urlHash = ASCII.String(url.hash()); this.urlHash = ASCII.String(url.hash());
@ -529,11 +483,15 @@ public class BookmarksDB {
this(new DigestURL((url.indexOf("://") < 0) ? "http://" + url : url)); this(new DigestURL((url.indexOf("://") < 0) ? "http://" + url : url));
} }
public Bookmark(final Map<String, String> map) throws MalformedURLException { private Bookmark(final Map<String, String> map) throws MalformedURLException {
this(ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash()), map); this.entry = map;
this.urlHash = ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash());
this.tagNames = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
loadTimestamp();
} }
Map<String, String> toMap() { private Map<String, String> toMap() {
this.entry.put(BOOKMARK_TAGS, ListManager.collection2string(this.tagNames)); this.entry.put(BOOKMARK_TAGS, ListManager.collection2string(this.tagNames));
this.entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp)); this.entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp));
return this.entry; return this.entry;
@ -688,11 +646,11 @@ public class BookmarksDB {
/** /**
* Subclass of bookmarksDB, which provides the bookmarkIterator object-type * Subclass of bookmarksDB, which provides the bookmarkIterator object-type
*/ */
public class bookmarkIterator implements Iterator<Bookmark> { private class bookmarkIterator implements Iterator<Bookmark> {
Iterator<byte[]> bookmarkIter; Iterator<byte[]> bookmarkIter;
public bookmarkIterator(final boolean up) throws IOException { private bookmarkIterator(final boolean up) throws IOException {
//flushBookmarkCache(); //XXX: this will cost performance //flushBookmarkCache(); //XXX: this will cost performance
this.bookmarkIter = BookmarksDB.this.bookmarks.keys(up, false); this.bookmarkIter = BookmarksDB.this.bookmarks.keys(up, false);
//this.nextEntry = null; //this.nextEntry = null;
@ -722,14 +680,14 @@ public class BookmarksDB {
/** /**
* Comparator to sort objects of type Bookmark according to their timestamps * Comparator to sort objects of type Bookmark according to their timestamps
*/ */
public class bookmarkComparator implements Comparator<String> { private class bookmarkComparator implements Comparator<String> {
private final boolean newestFirst; private final boolean newestFirst;
/** /**
* @param newestFirst newest first, or oldest first? * @param newestFirst newest first, or oldest first?
*/ */
public bookmarkComparator(final boolean newestFirst){ private bookmarkComparator(final boolean newestFirst){
this.newestFirst = newestFirst; this.newestFirst = newestFirst;
} }
@ -752,13 +710,13 @@ public class BookmarksDB {
} }
} }
public static final TagComparator tagComparator = new TagComparator(); private static final TagComparator tagComparator = new TagComparator();
public static final TagSizeComparator tagSizeComparator = new TagSizeComparator(); private static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
/** /**
* Comparator to sort objects of type Tag according to their names * Comparator to sort objects of type Tag according to their names
*/ */
public static class TagComparator implements Comparator<Tag>, Serializable { private static class TagComparator implements Comparator<Tag>, Serializable {
/** /**
* generated serial * generated serial
@ -772,7 +730,7 @@ public class BookmarksDB {
} }
public static class TagSizeComparator implements Comparator<Tag>, Serializable { private static class TagSizeComparator implements Comparator<Tag>, Serializable {
/** /**
* generated serial * generated serial

@ -32,27 +32,15 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.lang.reflect.Method;
import java.util.Date; import java.util.Date;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.exceptions.CryptographyException; import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException; import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.apache.pdfbox.pdmodel.font.PDCIDFont;
import org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font;
import org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDMMType1Font;
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont;
import org.apache.pdfbox.pdmodel.font.PDType1CFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.PDType3Font;
import org.apache.pdfbox.util.PDFTextStripper; import org.apache.pdfbox.util.PDFTextStripper;
import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.AnchorURL;
@ -222,25 +210,54 @@ public class pdfParser extends AbstractParser implements Parser {
false, false,
docDate)}; docDate)};
} }
@SuppressWarnings("static-access")
public static void clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes() { public static void clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes() {
// thank you very much, PDFParser hackers, this font cache will occupy >80MB RAM for a single pdf and then stays forever // thank you very much, PDFParser hackers, this font cache will occupy >80MB RAM for a single pdf and then stays forever
// AND I DO NOT EVEN NEED A FONT HERE TO PARSE THE TEXT! // AND I DO NOT EVEN NEED A FONT HERE TO PARSE THE TEXT!
// Don't be so ignorant, just google once "PDFParser OutOfMemoryError" to feel the pain. // Don't be so ignorant, just google once "PDFParser OutOfMemoryError" to feel the pain.
PDFont.clearResources(); ResourceCleaner cl = new ResourceCleaner();
COSName.clearResources(); cl.clearClassResources("org.apache.pdfbox.cos.COSName");
PDType1Font.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDFont");
PDTrueTypeFont.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1Font");
PDType0Font.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDTrueTypeFont");
PDType1AfmPfbFont.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType0Font");
PDType3Font.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont");
PDType1CFont.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType3Font");
PDCIDFont.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1CFont");
PDCIDFontType0Font.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFont");
PDCIDFontType2Font.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font");
PDMMType1Font.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font");
PDSimpleFont.clearResources(); cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDMMType1Font");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDSimpleFont");
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private static class ResourceCleaner {
Method findLoadedClass;
private ClassLoader sys;
public ResourceCleaner() {
try {
this.findLoadedClass = ClassLoader.class.getDeclaredMethod("findLoadedClass", new Class[] { String.class });
this.findLoadedClass.setAccessible(true);
this.sys = ClassLoader.getSystemClassLoader();
} catch (Throwable e) {
e.printStackTrace();
this.findLoadedClass = null;
this.sys = null;
}
}
public void clearClassResources(String name) {
if (this.findLoadedClass == null) return;
try {
Object pdfparserpainclass = this.findLoadedClass.invoke(this.sys, name);
if (pdfparserpainclass != null) {
Method clearResources = ((Class) pdfparserpainclass).getDeclaredMethod("clearResources", new Class[] {});
if (clearResources != null) clearResources.invoke(null);
}
} catch (Throwable e) {
e.printStackTrace();
}
}
} }
/** /**

@ -129,7 +129,7 @@ public class ResourceObserver {
if(MemoryControl.properState()) return Space.HIGH; if(MemoryControl.properState()) return Space.HIGH;
// clear some caches - @all: are there more of these, we could clear here? // clear some caches - @all: are there more of these, we could clear here?
this.sb.index.clearCache(); this.sb.index.clearCaches();
SearchEventCache.cleanupEvents(true); SearchEventCache.cleanupEvents(true);
this.sb.trail.clear(); this.sb.trail.clear();
Switchboard.urlBlacklist.clearblacklistCache(); Switchboard.urlBlacklist.clearblacklistCache();

@ -2031,7 +2031,7 @@ public final class Switchboard extends serverSwitch {
// clear caches if necessary // clear caches if necessary
if ( !MemoryControl.request(128000000L, false) ) { if ( !MemoryControl.request(128000000L, false) ) {
this.index.clearCache(); this.index.clearCaches();
SearchEventCache.cleanupEvents(false); SearchEventCache.cleanupEvents(false);
this.trail.clear(); this.trail.clear();
GuiHandler.clear(); GuiHandler.clear();

@ -225,10 +225,10 @@ public final class Fulltext {
} }
} }
public void clearCache() { public void clearCaches() {
if (this.urlIndexFile != null && this.urlIndexFile instanceof Cache) ((Cache) this.urlIndexFile).clearCache(); if (this.urlIndexFile != null && this.urlIndexFile instanceof Cache) ((Cache) this.urlIndexFile).clearCache();
if (this.statsDump != null) this.statsDump.clear(); if (this.statsDump != null) this.statsDump.clear();
this.solrInstances.clearCache(); this.solrInstances.clearCaches();
this.statsDump = null; this.statsDump = null;
} }
@ -250,7 +250,7 @@ public final class Fulltext {
for (String name: instance.getCoreNames()) new EmbeddedSolrConnector(instance, name).clear(); for (String name: instance.getCoreNames()) new EmbeddedSolrConnector(instance, name).clear();
} }
this.commit(false); this.commit(false);
this.solrInstances.clearCache(); this.solrInstances.clearCaches();
} }
} }
@ -260,7 +260,7 @@ public final class Fulltext {
if (instance != null) { if (instance != null) {
for (String name: instance.getCoreNames()) new RemoteSolrConnector(instance, name).clear(); for (String name: instance.getCoreNames()) new RemoteSolrConnector(instance, name).clear();
} }
this.solrInstances.clearCache(); this.solrInstances.clearCaches();
} }
} }
@ -400,7 +400,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e); throw new IOException(e.getMessage(), e);
} }
this.statsDump = null; this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache(); if (MemoryControl.shortStatus()) clearCaches();
} }
public void putEdges(final Collection<SolrInputDocument> edges) throws IOException { public void putEdges(final Collection<SolrInputDocument> edges) throws IOException {
@ -412,7 +412,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e); throw new IOException(e.getMessage(), e);
} }
this.statsDump = null; this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache(); if (MemoryControl.shortStatus()) clearCaches();
} }
/** /**
@ -432,7 +432,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e); throw new IOException(e.getMessage(), e);
} }
this.statsDump = null; this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache(); if (MemoryControl.shortStatus()) clearCaches();
} }
/** /**

@ -503,10 +503,10 @@ public class Segment {
} }
} }
public void clearCache() { public void clearCaches() {
if (this.urlCitationIndex != null) this.urlCitationIndex.clearCache(); if (this.urlCitationIndex != null) this.urlCitationIndex.clearCache();
if (this.termIndex != null) this.termIndex.clearCache(); if (this.termIndex != null) this.termIndex.clearCache();
this.fulltext.clearCache(); this.fulltext.clearCaches();
} }
public File getLocation() { public File getLocation() {

@ -242,7 +242,8 @@ public class QueryGoal {
// add filter to prevent that results come from failed urls // add filter to prevent that results come from failed urls
q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND ("); q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR "); q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR ");
q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif))"); q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR");
q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");
// parse special requests // parse special requests
if (isCatchall()) return q; if (isCatchall()) return q;

Loading…
Cancel
Save