Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
orbiter 11 years ago
commit 4234b0ed6c

@ -461,19 +461,21 @@
and old cache.
-->
<filterCache class="solr.FastLRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
size="64"
initialSize="64"
autowarmCount="0"
cleanupThread="true"/>
<!-- Query Result Cache
Caches results of searches - ordered lists of document ids
(DocList) based on a query, a sort, and the range of documents requested.
-->
<queryResultCache class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<queryResultCache class="solr.FastLRUCache"
size="64"
initialSize="64"
autowarmCount="0"
cleanupThread="true"/>
<!-- Document Cache
@ -481,10 +483,11 @@
document). Since Lucene internal document ids are transient,
this cache will not be autowarmed.
-->
<documentCache class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<documentCache class="solr.FastLRUCache"
size="64"
initialSize="64"
autowarmCount="0"
cleanupThread="true"/>
<!-- Field Value Cache
@ -492,12 +495,11 @@
by document id. The fieldValueCache is created by default
even if not configured here.
-->
<!--
<fieldValueCache class="solr.FastLRUCache"
size="512"
autowarmCount="128"
showItems="32" />
-->
size="64"
autowarmCount="0"
showItems="32"
cleanupThread="true"/>
<!-- Custom Cache
@ -510,11 +512,12 @@
-->
<!--
<cache name="myUserCache"
class="solr.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
class="solr.FastLRUCache"
size="64"
initialSize="64"
autowarmCount="0"
regenerator="com.mycompany.MyRegenerator"
cleanupThread="true"
/>
-->

@ -442,6 +442,7 @@ seedScpPath=
peerCycle=2
# debug flags
debug.search.profiling=false
debug.search.local.dht.off=false
debug.search.local.solr.off=false
debug.search.remote.dht.off=false

@ -19,7 +19,7 @@
<dt><label for="HTCachePath">The path where the cache is stored</label></dt>
<dd><input name="HTCachePath" id="HTCachePath" type="text" size="20" maxlength="300" value="#[HTCachePath]#" /></dd>
<dt><label for="actualCacheSize">The current size of the cache</label></dt>
<dd><span id="actualCacheSize">#[actualCacheSize]# MB</span></dd>
<dd><span id="actualCacheSize">#[actualCacheSize]# MB for #[actualCacheDocCount]# files, #[docSizeAverage]# KB / file in average </span></dd>
<dt><label for="maxCacheSize">The maximum size of the cache</label></dt>
<dd><input name="maxCacheSize" id="maxCacheSize" type="text" size="8" maxlength="24" value="#[maxCacheSize]#" /> MB</dd>
<dt>&nbsp;</dt>

@ -77,7 +77,9 @@ public class ConfigHTCache_p {
}
prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT));
prop.put("actualCacheSize", (Cache.getActualCacheSize() / 1024 / 1024));
prop.put("actualCacheSize", Cache.getActualCacheSize() / 1024 / 1024);
prop.put("actualCacheDocCount", Cache.getActualCacheDocCount());
prop.put("docSizeAverage", Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024);
prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64));
// return rewrite properties
return prop;

@ -34,7 +34,7 @@ public class ContentAnalysis_p {
// clean up all search events
SearchEventCache.cleanupEvents(true);
sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
if (post != null && post.containsKey("EnterDoublecheck")) {
Ranking.setMinTokenLen(post.getInt("minTokenLen", 3));

@ -38,7 +38,7 @@ public class RankingSolr_p {
// clean up all search events
SearchEventCache.cleanupEvents(true);
sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings
int profileNr = 0;
if (post != null) profileNr = post.getInt("profileNr", profileNr);

@ -360,7 +360,7 @@ public class yacysearch {
// check available memory and clean up if necessary
if ( !MemoryControl.request(8000000L, false) ) {
indexSegment.clearCache();
indexSegment.clearCaches();
SearchEventCache.cleanupEvents(false);
}

@ -61,7 +61,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
}
public void clearCache() {
public void clearCaches() {
this.hitCache.clear();
this.missCache.clear();
this.documentCache.clear();
@ -70,9 +70,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public synchronized void close() {
this.clearCaches();
if (this.solr != null) this.solr.close();
this.solr = null;
this.clearCache();
}
/**
@ -81,7 +81,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
*/
@Override
public void clear() throws IOException {
this.clearCache();
this.clearCaches();
if (this.solr != null) this.solr.clear();
}
@ -119,7 +119,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public void deleteByQuery(final String querystring) throws IOException {
this.clearCache();
this.clearCaches();
this.solr.deleteByQuery(querystring);
}
@ -261,7 +261,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
}
private void addToCache(SolrDocumentList list, boolean doccache) {
if (MemoryControl.shortStatus()) clearCache();
if (MemoryControl.shortStatus()) clearCaches();
for (final SolrDocument solrdoc: list) {
addToCache(solrdoc, doccache);
}

@ -118,6 +118,12 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
ensureAliveUpdateHandler();
}
@Override
public void clearCaches() {
this.connector.clearCaches();
this.idCache.clear();
}
/**
* used for debugging
*/

@ -30,10 +30,12 @@ import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.Query;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
@ -47,10 +49,14 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.UnInvertedField;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QueryResultKey;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
@ -88,6 +94,22 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
super.init(this.instance.getServer(coreName));
}
public void clearCaches() {
SolrConfig solrConfig = this.core.getSolrConfig();
@SuppressWarnings("unchecked")
SolrCache<String, UnInvertedField> fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null : solrConfig.fieldValueCacheConfig.newInstance();
if (fieldValueCache != null) fieldValueCache.clear();
@SuppressWarnings("unchecked")
SolrCache<Query, DocSet> filterCache= solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance();
if (filterCache != null) filterCache.clear();
@SuppressWarnings("unchecked")
SolrCache<QueryResultKey, DocList> queryResultCache = solrConfig.queryResultCacheConfig == null ? null : solrConfig.queryResultCacheConfig.newInstance();
if (queryResultCache != null) queryResultCache.clear();
@SuppressWarnings("unchecked")
SolrCache<Integer, Document> documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance();
if (documentCache != null) documentCache.clear();
}
public SolrInstance getInstance() {
return this.instance;
}
@ -224,6 +246,17 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
@Override
public Set<String> existsByIds(Set<String> ids) {
boolean debug = Switchboard.getSwitchboard().getConfigBool("debug.search.profiling", false);
long debugSingleTime = 0; int debugSingleCount = 0;
if (debug) {
// run this also with single exist queries which might be faster (but we don't know, thats the reason we test that here)
long start = System.currentTimeMillis();
Set <String> idsr = new HashSet<String>();
for (String id: ids) if (existsById(id)) idsr.add(id);
debugSingleTime = System.currentTimeMillis() - start;
debugSingleCount = idsr.size();
}
long start = System.currentTimeMillis();
if (ids == null || ids.size() == 0) return new HashSet<String>();
if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>();
StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
@ -246,6 +279,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
} finally {
docListSearcher.close();
}
long debugCollectionTime = System.currentTimeMillis() - start;
if (debug) {
ConcurrentLog.info("EmbeddedSolrConnector", "Comparisment of existsByIds: input=" + ids.size() + " records, output=" + idsr.size() + " records, singleTime=" + debugSingleTime + ", collectionTime=" + debugCollectionTime + ", singleCount=" + debugSingleCount + ", collectionCount=" + idsr.size());
}
// construct a new id list from that
return idsr;
}

@ -53,6 +53,12 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
this.solr0 = solr0;
this.solr1 = solr1;
}
@Override
public void clearCaches() {
if (this.solr0 != null) this.solr0.clearCaches();
if (this.solr1 != null) this.solr1.clearCaches();
}
public boolean isConnected0() {
return this.solr0 != null;

@ -71,6 +71,11 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
super.close();
}
@Override
public void clearCaches() {
// we do not have a direct access to the caches here, thus we simply do nothing.
}
@Override
public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException {
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
@ -134,4 +139,5 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
}
System.exit(0);
}
}

@ -36,7 +36,12 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ {
/**
* clear all caches: inside solr and ouside solr within the implementations of this interface
*/
public void clearCaches();
/**
* get the size of the index
* @return number of results if solr is queries with a catch-all pattern

@ -64,7 +64,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public SolrServer getServer() {
return this.server;
}
@Override
public void commit(final boolean softCommit) {
synchronized (this.server) {

@ -24,7 +24,6 @@ import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.federate.solr.connector.CachedSolrConnector;
import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
@ -161,9 +160,9 @@ public class InstanceMirror {
return msc;
}
public void clearCache() {
public void clearCaches() {
for (SolrConnector csc: this.connectorCache.values()) {
if (csc instanceof CachedSolrConnector) ((CachedSolrConnector) csc).clearCache();
csc.clearCaches();
}
for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true);
}

@ -182,6 +182,14 @@ public final class Cache {
public static long getActualCacheSize() {
return fileDBunbuffered.length();
}
/**
* get the current actual cache size
* @return
*/
public static long getActualCacheDocCount() {
return fileDBunbuffered.size();
}
/**
* close the databases

@ -41,7 +41,10 @@ import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.index.RowHandleSet;
public class BookmarksDB {
@ -147,11 +150,6 @@ public class BookmarksDB {
ConcurrentLog.logException(e);
}
}
public String addBookmark(final Bookmark bookmark){
saveBookmark(bookmark);
return bookmark.getUrlHash();
}
public Bookmark getBookmark(final String urlHash) throws IOException {
try {
@ -214,18 +212,13 @@ public class BookmarksDB {
final TreeSet<String> set=new TreeSet<String>(new bookmarkComparator(true));
final String tagHash=BookmarkHelper.tagHash(tagName);
final Tag tag=getTag(tagHash);
Set<String> hashes=new HashSet<String>();
if (tag != null) {
hashes=getTag(tagHash).getUrlHashes();
}
RowHandleSet hashes = tag == null ? new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10) : tag.getUrlHashes();
if (priv) {
set.addAll(hashes);
for (byte[] hash: hashes) set.add(ASCII.String(hash));
} else {
final Iterator<String> it=hashes.iterator();
Bookmark bm;
while(it.hasNext()){
for (byte[] hash: hashes) {
try {
bm = getBookmark(it.next());
Bookmark bm = getBookmark(ASCII.String(hash));
if (bm != null && bm.getPublic()) {
set.add(bm.getUrlHash());
}
@ -249,7 +242,7 @@ public class BookmarksDB {
* retrieve an object of type Tag from the the tagCache, if object is not cached return loadTag(hash)
* @param hash an object of type String, containing a tagHash
*/
public Tag getTag(final String hash){
private Tag getTag(final String hash){
return this.tags.get(hash); //null if it does not exists
}
@ -257,7 +250,7 @@ public class BookmarksDB {
* store a Tag in tagsTable or remove an empty tag
* @param tag an object of type Tag to be stored/removed
*/
public void putTag(final Tag tag){
private void putTag(final Tag tag){
if (tag == null) return;
if (tag.isEmpty()) {
this.tags.remove(tag.getTagHash());
@ -266,7 +259,7 @@ public class BookmarksDB {
}
}
public void removeTag(final String hash) {
private void removeTag(final String hash) {
this.tags.remove(hash);
}
@ -301,7 +294,7 @@ public class BookmarksDB {
return set.iterator();
}
public Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp) {
private Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp) {
final TreeSet<Tag> set=new TreeSet<Tag>((comp == SORT_SIZE) ? tagSizeComparator : tagComparator);
Iterator<String> it=null;
final Iterator<String> bit=getBookmarksIterator(tagName, priv);
@ -347,14 +340,14 @@ public class BookmarksDB {
final Tag oldTag=getTag(BookmarkHelper.tagHash(oldName));
if (oldTag != null) {
final Set<String> urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag
final RowHandleSet urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag
removeTag(BookmarkHelper.tagHash(oldName)); // remove oldHash from TagsDB
Bookmark bookmark;
Set<String> tagSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
for (final String urlHash : urlHashes) { // looping through all bookmarks which were tagged with oldName
for (final byte[] urlHash : urlHashes) { // looping through all bookmarks which were tagged with oldName
try {
bookmark = getBookmark(urlHash);
bookmark = getBookmark(ASCII.String(urlHash));
tagSet = bookmark.getTags();
tagSet.remove(oldName);
bookmark.setTags(tagSet, true); // might not be needed, but doesn't hurt
@ -371,9 +364,9 @@ public class BookmarksDB {
public void addTag(final String selectTag, final String newTag) {
Bookmark bookmark;
for (final String urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) { // looping through all bookmarks which were tagged with selectTag
for (final byte[] urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) { // looping through all bookmarks which were tagged with selectTag
try {
bookmark = getBookmark(urlHash);
bookmark = getBookmark(ASCII.String(urlHash));
bookmark.addTag(newTag);
saveBookmark(bookmark);
} catch (final IOException e) {
@ -389,51 +382,24 @@ public class BookmarksDB {
* Subclass of bookmarksDB, which provides the Tag object-type
*/
public class Tag {
public static final String URL_HASHES = "urlHashes";
public static final String TAG_NAME = "tagName";
private final String tagHash;
private final Map<String, String> mem;
private Set<String> urlHashes;
public Tag(final String hash, final Map<String, String> map){
this.tagHash = hash;
this.mem = map;
if (this.mem.containsKey(URL_HASHES)) {
this.urlHashes = ListManager.string2set(this.mem.get(URL_HASHES));
} else {
this.urlHashes = new HashSet<String>();
}
}
private final String tagName;
private RowHandleSet urlHashes;
public Tag(final String name, final HashSet<String> entries){
private Tag(final String name) {
this.tagHash = BookmarkHelper.tagHash(name);
this.mem = new HashMap<String, String>();
//mem.put(URL_HASHES, listManager.arraylist2string(entries));
this.urlHashes = entries;
this.mem.put(TAG_NAME, name);
}
public Tag(final String name){
this(name, new HashSet<String>());
}
public Map<String, String> getMap(){
this.mem.put(URL_HASHES, ListManager.collection2string(this.urlHashes));
return this.mem;
this.tagName = name;
this.urlHashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10);
}
/**
* get the lowercase Tagname
*/
public String getTagName(){
/*if(this.mem.containsKey(TAG_NAME)){
return (String) this.mem.get(TAG_NAME);
}
return "";*/
return getFriendlyName().toLowerCase();
}
public String getTagHash(){
private String getTagHash(){
return this.tagHash;
}
@ -441,37 +407,33 @@ public class BookmarksDB {
* @return the tag name, with all uppercase chars
*/
public String getFriendlyName(){
/*if(this.mem.containsKey(TAG_FRIENDLY_NAME)){
return (String) this.mem.get(TAG_FRIENDLY_NAME);
}
return getTagName();*/
if(this.mem.containsKey(TAG_NAME)){
return this.mem.get(TAG_NAME);
}
return "notagname";
return this.tagName;
}
public Set<String> getUrlHashes(){
private RowHandleSet getUrlHashes(){
return this.urlHashes;
}
public boolean hasPublicItems(){
private boolean hasPublicItems(){
return getBookmarksIterator(getTagName(), false).hasNext();
}
public void addUrl(final String urlHash){
this.urlHashes.add(urlHash);
private void addUrl(final String urlHash){
try {
this.urlHashes.put(ASCII.getBytes(urlHash));
} catch (SpaceExceededException e) {
}
}
public void delete(final String urlHash){
this.urlHashes.remove(urlHash);
private void delete(final String urlHash){
this.urlHashes.remove(ASCII.getBytes(urlHash));
}
public int size(){
return this.urlHashes.size();
}
public boolean isEmpty() {
private boolean isEmpty() {
return this.urlHashes.isEmpty();
}
}
@ -481,27 +443,19 @@ public class BookmarksDB {
*/
public class Bookmark {
public static final String BOOKMARK_URL = "bookmarkUrl";
private static final String BOOKMARK_URL = "bookmarkUrl";
public static final String BOOKMARK_TITLE = "bookmarkTitle";
public static final String BOOKMARK_DESCRIPTION = "bookmarkDesc";
public static final String BOOKMARK_TAGS = "bookmarkTags";
public static final String BOOKMARK_PUBLIC = "bookmarkPublic";
public static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
public static final String BOOKMARK_OWNER = "bookmarkOwner";
public static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
private static final String BOOKMARK_TAGS = "bookmarkTags";
private static final String BOOKMARK_PUBLIC = "bookmarkPublic";
private static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
private static final String BOOKMARK_OWNER = "bookmarkOwner";
private static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
private final String urlHash;
private Set<String> tagNames;
private long timestamp;
private final Map<String, String> entry;
public Bookmark(final String urlHash, final Map<String, String> map) {
this.entry = map;
this.urlHash = urlHash;
this.tagNames = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
loadTimestamp();
}
public Bookmark(final DigestURL url) {
this.entry = new HashMap<String, String>();
this.urlHash = ASCII.String(url.hash());
@ -529,11 +483,15 @@ public class BookmarksDB {
this(new DigestURL((url.indexOf("://") < 0) ? "http://" + url : url));
}
public Bookmark(final Map<String, String> map) throws MalformedURLException {
this(ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash()), map);
private Bookmark(final Map<String, String> map) throws MalformedURLException {
this.entry = map;
this.urlHash = ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash());
this.tagNames = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
loadTimestamp();
}
Map<String, String> toMap() {
private Map<String, String> toMap() {
this.entry.put(BOOKMARK_TAGS, ListManager.collection2string(this.tagNames));
this.entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp));
return this.entry;
@ -688,11 +646,11 @@ public class BookmarksDB {
/**
* Subclass of bookmarksDB, which provides the bookmarkIterator object-type
*/
public class bookmarkIterator implements Iterator<Bookmark> {
private class bookmarkIterator implements Iterator<Bookmark> {
Iterator<byte[]> bookmarkIter;
public bookmarkIterator(final boolean up) throws IOException {
private bookmarkIterator(final boolean up) throws IOException {
//flushBookmarkCache(); //XXX: this will cost performance
this.bookmarkIter = BookmarksDB.this.bookmarks.keys(up, false);
//this.nextEntry = null;
@ -722,14 +680,14 @@ public class BookmarksDB {
/**
* Comparator to sort objects of type Bookmark according to their timestamps
*/
public class bookmarkComparator implements Comparator<String> {
private class bookmarkComparator implements Comparator<String> {
private final boolean newestFirst;
/**
* @param newestFirst newest first, or oldest first?
*/
public bookmarkComparator(final boolean newestFirst){
private bookmarkComparator(final boolean newestFirst){
this.newestFirst = newestFirst;
}
@ -752,13 +710,13 @@ public class BookmarksDB {
}
}
public static final TagComparator tagComparator = new TagComparator();
public static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
private static final TagComparator tagComparator = new TagComparator();
private static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
/**
* Comparator to sort objects of type Tag according to their names
*/
public static class TagComparator implements Comparator<Tag>, Serializable {
private static class TagComparator implements Comparator<Tag>, Serializable {
/**
* generated serial
@ -772,7 +730,7 @@ public class BookmarksDB {
}
public static class TagSizeComparator implements Comparator<Tag>, Serializable {
private static class TagSizeComparator implements Comparator<Tag>, Serializable {
/**
* generated serial

@ -32,27 +32,15 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.util.Date;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.apache.pdfbox.pdmodel.font.PDCIDFont;
import org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font;
import org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDMMType1Font;
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont;
import org.apache.pdfbox.pdmodel.font.PDType1CFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.PDType3Font;
import org.apache.pdfbox.util.PDFTextStripper;
import net.yacy.cora.document.id.AnchorURL;
@ -222,25 +210,54 @@ public class pdfParser extends AbstractParser implements Parser {
false,
docDate)};
}
@SuppressWarnings("static-access")
public static void clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes() {
// thank you very much, PDFParser hackers, this font cache will occupy >80MB RAM for a single pdf and then stays forever
// AND I DO NOT EVEN NEED A FONT HERE TO PARSE THE TEXT!
// Don't be so ignorant, just google once "PDFParser OutOfMemoryError" to feel the pain.
PDFont.clearResources();
COSName.clearResources();
PDType1Font.clearResources();
PDTrueTypeFont.clearResources();
PDType0Font.clearResources();
PDType1AfmPfbFont.clearResources();
PDType3Font.clearResources();
PDType1CFont.clearResources();
PDCIDFont.clearResources();
PDCIDFontType0Font.clearResources();
PDCIDFontType2Font.clearResources();
PDMMType1Font.clearResources();
PDSimpleFont.clearResources();
ResourceCleaner cl = new ResourceCleaner();
cl.clearClassResources("org.apache.pdfbox.cos.COSName");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDFont");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1Font");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDTrueTypeFont");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType0Font");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType3Font");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1CFont");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFont");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDMMType1Font");
cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDSimpleFont");
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private static class ResourceCleaner {
Method findLoadedClass;
private ClassLoader sys;
public ResourceCleaner() {
try {
this.findLoadedClass = ClassLoader.class.getDeclaredMethod("findLoadedClass", new Class[] { String.class });
this.findLoadedClass.setAccessible(true);
this.sys = ClassLoader.getSystemClassLoader();
} catch (Throwable e) {
e.printStackTrace();
this.findLoadedClass = null;
this.sys = null;
}
}
public void clearClassResources(String name) {
if (this.findLoadedClass == null) return;
try {
Object pdfparserpainclass = this.findLoadedClass.invoke(this.sys, name);
if (pdfparserpainclass != null) {
Method clearResources = ((Class) pdfparserpainclass).getDeclaredMethod("clearResources", new Class[] {});
if (clearResources != null) clearResources.invoke(null);
}
} catch (Throwable e) {
e.printStackTrace();
}
}
}
/**

@ -129,7 +129,7 @@ public class ResourceObserver {
if(MemoryControl.properState()) return Space.HIGH;
// clear some caches - @all: are there more of these, we could clear here?
this.sb.index.clearCache();
this.sb.index.clearCaches();
SearchEventCache.cleanupEvents(true);
this.sb.trail.clear();
Switchboard.urlBlacklist.clearblacklistCache();

@ -2031,7 +2031,7 @@ public final class Switchboard extends serverSwitch {
// clear caches if necessary
if ( !MemoryControl.request(128000000L, false) ) {
this.index.clearCache();
this.index.clearCaches();
SearchEventCache.cleanupEvents(false);
this.trail.clear();
GuiHandler.clear();

@ -225,10 +225,10 @@ public final class Fulltext {
}
}
public void clearCache() {
public void clearCaches() {
if (this.urlIndexFile != null && this.urlIndexFile instanceof Cache) ((Cache) this.urlIndexFile).clearCache();
if (this.statsDump != null) this.statsDump.clear();
this.solrInstances.clearCache();
this.solrInstances.clearCaches();
this.statsDump = null;
}
@ -250,7 +250,7 @@ public final class Fulltext {
for (String name: instance.getCoreNames()) new EmbeddedSolrConnector(instance, name).clear();
}
this.commit(false);
this.solrInstances.clearCache();
this.solrInstances.clearCaches();
}
}
@ -260,7 +260,7 @@ public final class Fulltext {
if (instance != null) {
for (String name: instance.getCoreNames()) new RemoteSolrConnector(instance, name).clear();
}
this.solrInstances.clearCache();
this.solrInstances.clearCaches();
}
}
@ -400,7 +400,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache();
if (MemoryControl.shortStatus()) clearCaches();
}
public void putEdges(final Collection<SolrInputDocument> edges) throws IOException {
@ -412,7 +412,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache();
if (MemoryControl.shortStatus()) clearCaches();
}
/**
@ -432,7 +432,7 @@ public final class Fulltext {
throw new IOException(e.getMessage(), e);
}
this.statsDump = null;
if (MemoryControl.shortStatus()) clearCache();
if (MemoryControl.shortStatus()) clearCaches();
}
/**

@ -503,10 +503,10 @@ public class Segment {
}
}
public void clearCache() {
public void clearCaches() {
if (this.urlCitationIndex != null) this.urlCitationIndex.clearCache();
if (this.termIndex != null) this.termIndex.clearCache();
this.fulltext.clearCache();
this.fulltext.clearCaches();
}
public File getLocation() {

@ -242,7 +242,8 @@ public class QueryGoal {
// add filter to prevent that results come from failed urls
q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR ");
q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif))");
q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR");
q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");
// parse special requests
if (isCatchall()) return q;

Loading…
Cancel
Save