Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

Conflicts:
	source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java
pull/1/head
Michael Peter Christen 13 years ago
commit 96e9d77270

@ -245,8 +245,8 @@ sessionidNamesFile = defaults/sessionid.names
proxyCache = DATA/HTCACHE proxyCache = DATA/HTCACHE
# the maximum disc cache size for files in Cache in megabytes # the maximum disc cache size for files in Cache in megabytes
# default: 32 Gigabyte # default: 4 Gigabyte
proxyCacheSize = 32768 proxyCacheSize = 4096
# a path to the surrogate input directory # a path to the surrogate input directory
surrogates.in = DATA/SURROGATES/in surrogates.in = DATA/SURROGATES/in

@ -1,4 +1,4 @@
// ConfigHTCache_p.java // ConfigHTCache_p.java
// --------------------------- // ---------------------------
// (C) by Michael Peter Christen; mc@yacy.net // (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de // first published on http://www.anomic.de
@ -32,7 +32,6 @@ import java.io.IOException;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache; import de.anomic.crawler.Cache;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -59,12 +58,12 @@ public class ConfigHTCache_p {
cache.mkdirs(); cache.mkdirs();
} }
// proxyCacheSize // proxyCacheSize
final int newProxyCacheSize = Math.max(post.getInt("maxCacheSize", 64), 4); final int newProxyCacheSize = Math.max(post.getInt("maxCacheSize", 64), 4);
env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize); env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize);
Cache.setMaxCacheSize(newProxyCacheSize * 1024 * 1024); Cache.setMaxCacheSize(newProxyCacheSize * 1024L * 1024L);
} }
if (post != null && post.containsKey("deletecomplete")) { if (post != null && post.containsKey("deletecomplete")) {
if ("on".equals(post.get("deleteCache", ""))) { if ("on".equals(post.get("deleteCache", ""))) {
Cache.clear(); Cache.clear();

@ -69,7 +69,7 @@
</td> </td>
</tr> </tr>
<tr> <tr>
<td><label for="file"><span class="nobr">From File</span></label>:</td> <td><label for="file"><span class="nobr">From File (enter a path<br/>within your local file system)</span></label>:</td>
<td><input type="radio" name="crawlingMode" id="file" value="file" onclick="document.getElementById('Crawler').rangeDomain.checked = true;"/></td> <td><input type="radio" name="crawlingMode" id="file" value="file" onclick="document.getElementById('Crawler').rangeDomain.checked = true;"/></td>
<td><input type="text" name="crawlingFile" size="41" onfocus="check('file')"/><!--<input type="file" name="crawlingFile" size="18" onfocus="check('file')"/>--></td> <td><input type="text" name="crawlingFile" size="41" onfocus="check('file')"/><!--<input type="file" name="crawlingFile" size="18" onfocus="check('file')"/>--></td>
</tr> </tr>

@ -154,8 +154,14 @@ public class Crawler_p {
} }
// remove crawlingFileContent before we record the call // remove crawlingFileContent before we record the call
final String crawlingFileName = post.get("crawlingFile"); String crawlingFileName = post.get("crawlingFile");
final File crawlingFile = (crawlingFileName != null && crawlingFileName.length() > 0) ? new File(crawlingFileName) : null; final File crawlingFile;
if (crawlingFileName == null || crawlingFileName.length() == 0) {
crawlingFile = null;
} else {
if (crawlingFileName.startsWith("file://")) crawlingFileName = crawlingFileName.substring(7);
crawlingFile = new File(crawlingFileName);
}
if (crawlingFile != null && crawlingFile.exists()) { if (crawlingFile != null && crawlingFile.exists()) {
post.remove("crawlingFile$file"); post.remove("crawlingFile$file");
} }
@ -644,7 +650,7 @@ public class Crawler_p {
prop.put("crawlProfilesShow_list", count); prop.put("crawlProfilesShow_list", count);
prop.put("crawlProfilesShow", count == 0 ? 0 : 1); prop.put("crawlProfilesShow", count == 0 ? 0 : 1);
// return rewrite properties // return rewrite properties
return prop; return prop;
} }

@ -124,7 +124,7 @@ public class PerformanceMemory_p {
c++; c++;
} }
prop.put("EcoList", c); prop.put("EcoList", c);
prop.putNum("EcoIndexTotalMem", totalmem / (1024 * 1024d)); prop.putNum("EcoIndexTotalMem", totalmem / (1024d * 1024d));
// write object cache table // write object cache table
final Iterator<Map.Entry<String, RAMIndex>> oi = RAMIndex.objects(); final Iterator<Map.Entry<String, RAMIndex>> oi = RAMIndex.objects();
@ -153,7 +153,7 @@ public class PerformanceMemory_p {
c++; c++;
} }
prop.put("indexcache", c); prop.put("indexcache", c);
prop.putNum("indexcacheTotalMem", totalhitmem / (1024 * 1024d)); prop.putNum("indexcacheTotalMem", totalhitmem / (1024d * 1024d));
// write object cache table // write object cache table
i = Cache.filenames(); i = Cache.filenames();
@ -195,10 +195,10 @@ public class PerformanceMemory_p {
c++; c++;
} }
prop.put("ObjectList", c); prop.put("ObjectList", c);
prop.putNum("objectCacheStopGrow", Cache.getMemStopGrow() / (1024 * 1024d)); prop.putNum("objectCacheStopGrow", Cache.getMemStopGrow() / (1024d * 1024d));
prop.putNum("objectCacheStartShrink", Cache.getMemStartShrink() / (1024 * 1024d)); prop.putNum("objectCacheStartShrink", Cache.getMemStartShrink() / (1024d * 1024d));
prop.putNum("objectHitCacheTotalMem", totalhitmem / (1024 * 1024d)); prop.putNum("objectHitCacheTotalMem", totalhitmem / (1024d * 1024d));
prop.putNum("objectMissCacheTotalMem", totalmissmem / (1024 * 1024d)); prop.putNum("objectMissCacheTotalMem", totalmissmem / (1024d * 1024d));
// other caching structures // other caching structures
prop.putNum("namecacheHit.size", Domains.nameCacheHitSize()); prop.putNum("namecacheHit.size", Domains.nameCacheHitSize());

@ -55,13 +55,13 @@ public class PerformanceQueues_p {
performanceProfiles.put("defaults/yacy.init", "default (crawl)"); performanceProfiles.put("defaults/yacy.init", "default (crawl)");
performanceProfiles.put("defaults/performance_dht.profile", "prefer DHT"); performanceProfiles.put("defaults/performance_dht.profile", "prefer DHT");
} }
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements // return variable that accumulates replacements
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
File defaultSettingsFile = new File(sb.getAppPath(), "defaults/yacy.init"); File defaultSettingsFile = new File(sb.getAppPath(), "defaults/yacy.init");
// get segment // get segment
Segment indexSegment = null; Segment indexSegment = null;
if (post != null && post.containsKey("segment")) { if (post != null && post.containsKey("segment")) {
@ -73,7 +73,7 @@ public class PerformanceQueues_p {
// take default segment // take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
} }
if(post != null) { if(post != null) {
if(post.containsKey("defaultFile")){ if(post.containsKey("defaultFile")){
// TODO check file-path! // TODO check file-path!
@ -108,10 +108,10 @@ public class PerformanceQueues_p {
Iterator<String> threads = sb.threadNames(); Iterator<String> threads = sb.threadNames();
String threadName; String threadName;
BusyThread thread; BusyThread thread;
final boolean xml = (header.get(HeaderFramework.CONNECTION_PROP_PATH)).endsWith(".xml"); final boolean xml = (header.get(HeaderFramework.CONNECTION_PROP_PATH)).endsWith(".xml");
prop.setLocalized(!xml); prop.setLocalized(!xml);
// calculate totals // calculate totals
long blocktime_total = 0, sleeptime_total = 0, exectime_total = 0; long blocktime_total = 0, sleeptime_total = 0, exectime_total = 0;
while (threads.hasNext()) { while (threads.hasNext()) {
@ -120,11 +120,11 @@ public class PerformanceQueues_p {
blocktime_total += thread.getBlockTime(); blocktime_total += thread.getBlockTime();
sleeptime_total += thread.getSleepTime(); sleeptime_total += thread.getSleepTime();
exectime_total += thread.getExecTime(); exectime_total += thread.getExecTime();
} }
if (blocktime_total == 0) blocktime_total = 1; if (blocktime_total == 0) blocktime_total = 1;
if (sleeptime_total == 0) sleeptime_total = 1; if (sleeptime_total == 0) sleeptime_total = 1;
if (exectime_total == 0) exectime_total = 1; if (exectime_total == 0) exectime_total = 1;
// set templates for latest news from the threads // set templates for latest news from the threads
long blocktime, sleeptime, exectime; long blocktime, sleeptime, exectime;
long idlesleep, busysleep, memuse, memprereq; long idlesleep, busysleep, memuse, memprereq;
@ -141,11 +141,11 @@ public class PerformanceQueues_p {
sb.setConfig("performanceProfile", post.get("defaultFile", "defaults/yacy.init")); sb.setConfig("performanceProfile", post.get("defaultFile", "defaults/yacy.init"));
sb.setConfig("performanceSpeed", post.getInt("profileSpeed", 100)); sb.setConfig("performanceSpeed", post.getInt("profileSpeed", 100));
} }
while (threads.hasNext()) { while (threads.hasNext()) {
threadName = threads.next(); threadName = threads.next();
thread = sb.getThread(threadName); thread = sb.getThread(threadName);
// set values to templates // set values to templates
prop.put("table_" + c + "_threadname", threadName); prop.put("table_" + c + "_threadname", threadName);
@ -159,7 +159,7 @@ public class PerformanceQueues_p {
prop.putHTML("table_" + c + "_longdescr", thread.getLongDescription()); prop.putHTML("table_" + c + "_longdescr", thread.getLongDescription());
queuesize = thread.getJobCount(); queuesize = thread.getJobCount();
prop.put("table_" + c + "_queuesize", (queuesize == Integer.MAX_VALUE) ? "unlimited" : Formatter.number(queuesize, !xml)); prop.put("table_" + c + "_queuesize", (queuesize == Integer.MAX_VALUE) ? "unlimited" : Formatter.number(queuesize, !xml));
blocktime = thread.getBlockTime(); blocktime = thread.getBlockTime();
sleeptime = thread.getSleepTime(); sleeptime = thread.getSleepTime();
exectime = thread.getExecTime(); exectime = thread.getExecTime();
@ -180,7 +180,7 @@ public class PerformanceQueues_p {
prop.putNum("table_" + c + "_sleeppercycle", ((idleCycles + busyCycles) == 0) ? -1 : sleeptime / (idleCycles + busyCycles)); prop.putNum("table_" + c + "_sleeppercycle", ((idleCycles + busyCycles) == 0) ? -1 : sleeptime / (idleCycles + busyCycles));
prop.putNum("table_" + c + "_execpercycle", (busyCycles == 0) ? -1 : exectime / busyCycles); prop.putNum("table_" + c + "_execpercycle", (busyCycles == 0) ? -1 : exectime / busyCycles);
prop.putNum("table_" + c + "_memusepercycle", (busyCycles == 0) ? -1 : memuse / busyCycles / 1024); prop.putNum("table_" + c + "_memusepercycle", (busyCycles == 0) ? -1 : memuse / busyCycles / 1024);
// load with old values // load with old values
idlesleep = sb.getConfigLong(threadName + "_idlesleep" , 1000); idlesleep = sb.getConfigLong(threadName + "_idlesleep" , 1000);
busysleep = sb.getConfigLong(threadName + "_busysleep", 100); busysleep = sb.getConfigLong(threadName + "_busysleep", 100);
@ -189,13 +189,13 @@ public class PerformanceQueues_p {
// load with new values // load with new values
idlesleep = post.getLong(threadName + "_idlesleep", idlesleep); idlesleep = post.getLong(threadName + "_idlesleep", idlesleep);
busysleep = post.getLong(threadName + "_busysleep", busysleep); busysleep = post.getLong(threadName + "_busysleep", busysleep);
memprereq = post.getLong(threadName + "_memprereq", memprereq) * 1024; memprereq = post.getLong(threadName + "_memprereq", memprereq) * 1024l;
if (memprereq == 0) memprereq = sb.getConfigLong(threadName + "_memprereq", 0); if (memprereq == 0) memprereq = sb.getConfigLong(threadName + "_memprereq", 0);
// check values to prevent short-cut loops // check values to prevent short-cut loops
if (idlesleep < 1000) idlesleep = 1000; if (idlesleep < 1000) idlesleep = 1000;
if (threadName.equals("10_httpd")) { idlesleep = 0; busysleep = 0; memprereq = 0; } if (threadName.equals("10_httpd")) { idlesleep = 0; busysleep = 0; memprereq = 0; }
sb.setThreadPerformance(threadName, idlesleep, busysleep, memprereq); sb.setThreadPerformance(threadName, idlesleep, busysleep, memprereq);
idlesleep = sb.getConfigLong(threadName + "_idlesleep", idlesleep); idlesleep = sb.getConfigLong(threadName + "_idlesleep", idlesleep);
busysleep = sb.getConfigLong(threadName + "_busysleep", busysleep); busysleep = sb.getConfigLong(threadName + "_busysleep", busysleep);
@ -228,7 +228,7 @@ public class PerformanceQueues_p {
c++; c++;
} }
prop.put("table", c); prop.put("table", c);
// performance profiles // performance profiles
c = 0; c = 0;
final String usedfile = sb.getConfig("performanceProfile", "defaults/yacy.init"); final String usedfile = sb.getConfig("performanceProfile", "defaults/yacy.init");
@ -239,7 +239,7 @@ public class PerformanceQueues_p {
c++; c++;
} }
prop.put("profile", c); prop.put("profile", c);
c = 0; c = 0;
final int[] speedValues = {200,150,100,50,25,10}; final int[] speedValues = {200,150,100,50,25,10};
final int usedspeed = sb.getConfigInt("performanceSpeed", 100); final int usedspeed = sb.getConfigInt("performanceSpeed", 100);
@ -250,27 +250,27 @@ public class PerformanceQueues_p {
c++; c++;
} }
prop.put("speed", c); prop.put("speed", c);
if ((post != null) && (post.containsKey("cacheSizeSubmit"))) { if ((post != null) && (post.containsKey("cacheSizeSubmit"))) {
final int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000); final int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000);
sb.setConfig(SwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount)); sb.setConfig(SwitchboardConstants.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount));
indexSegment.termIndex().setBufferMaxWordCount(wordCacheMaxCount); indexSegment.termIndex().setBufferMaxWordCount(wordCacheMaxCount);
} }
if ((post != null) && (post.containsKey("poolConfig"))) { if ((post != null) && (post.containsKey("poolConfig"))) {
/* /*
* configuring the crawler pool * configuring the crawler pool
*/ */
// get the current crawler pool configuration // get the current crawler pool configuration
int maxBusy = post.getInt("Crawler Pool_maxActive", 8); int maxBusy = post.getInt("Crawler Pool_maxActive", 8);
// storing the new values into configfile // storing the new values into configfile
sb.setConfig(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX,maxBusy); sb.setConfig(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX,maxBusy);
//switchboard.setConfig("crawler.MinIdleThreads",minIdle); //switchboard.setConfig("crawler.MinIdleThreads",minIdle);
/* /*
* configuring the http pool * configuring the http pool
*/ */
final WorkflowThread httpd = sb.getThread("10_httpd"); final WorkflowThread httpd = sb.getThread("10_httpd");
try { try {
@ -279,23 +279,23 @@ public class PerformanceQueues_p {
maxBusy = 8; maxBusy = 8;
} }
((serverCore)httpd).setMaxSessionCount(maxBusy); ((serverCore)httpd).setMaxSessionCount(maxBusy);
// storing the new values into configfile // storing the new values into configfile
sb.setConfig("httpdMaxBusySessions",maxBusy); sb.setConfig("httpdMaxBusySessions",maxBusy);
} }
if ((post != null) && (post.containsKey("PrioritySubmit"))) { if ((post != null) && (post.containsKey("PrioritySubmit"))) {
sb.setConfig("javastart_priority",post.get("YaCyPriority","0")); sb.setConfig("javastart_priority",post.get("YaCyPriority","0"));
} }
if ((post != null) && (post.containsKey("onlineCautionSubmit"))) { if ((post != null) && (post.containsKey("onlineCautionSubmit"))) {
sb.setConfig(SwitchboardConstants.PROXY_ONLINE_CAUTION_DELAY, Integer.toString(post.getInt("crawlPauseProxy", 30000))); sb.setConfig(SwitchboardConstants.PROXY_ONLINE_CAUTION_DELAY, Integer.toString(post.getInt("crawlPauseProxy", 30000)));
sb.setConfig(SwitchboardConstants.LOCALSEACH_ONLINE_CAUTION_DELAY, Integer.toString(post.getInt("crawlPauseLocalsearch", 30000))); sb.setConfig(SwitchboardConstants.LOCALSEACH_ONLINE_CAUTION_DELAY, Integer.toString(post.getInt("crawlPauseLocalsearch", 30000)));
sb.setConfig(SwitchboardConstants.REMOTESEARCH_ONLINE_CAUTION_DELAY, Integer.toString(post.getInt("crawlPauseRemotesearch", 30000))); sb.setConfig(SwitchboardConstants.REMOTESEARCH_ONLINE_CAUTION_DELAY, Integer.toString(post.getInt("crawlPauseRemotesearch", 30000)));
} }
if ((post != null) && (post.containsKey("minimumDeltaSubmit"))) { if ((post != null) && (post.containsKey("minimumDeltaSubmit"))) {
final long minimumLocalDelta = post.getLong("minimumLocalDelta", sb.crawlQueues.noticeURL.getMinimumLocalDelta()); final long minimumLocalDelta = post.getLong("minimumLocalDelta", sb.crawlQueues.noticeURL.getMinimumLocalDelta());
final long minimumGlobalDelta = post.getLong("minimumGlobalDelta", sb.crawlQueues.noticeURL.getMinimumGlobalDelta()); final long minimumGlobalDelta = post.getLong("minimumGlobalDelta", sb.crawlQueues.noticeURL.getMinimumGlobalDelta());
@ -303,13 +303,13 @@ public class PerformanceQueues_p {
sb.setConfig("minimumGlobalDelta", minimumGlobalDelta); sb.setConfig("minimumGlobalDelta", minimumGlobalDelta);
sb.crawlQueues.noticeURL.setMinimumDelta(minimumLocalDelta, minimumGlobalDelta); sb.crawlQueues.noticeURL.setMinimumDelta(minimumLocalDelta, minimumGlobalDelta);
} }
// delta settings // delta settings
prop.put("minimumLocalDelta", sb.crawlQueues.noticeURL.getMinimumLocalDelta()); prop.put("minimumLocalDelta", sb.crawlQueues.noticeURL.getMinimumLocalDelta());
prop.put("minimumGlobalDelta", sb.crawlQueues.noticeURL.getMinimumGlobalDelta()); prop.put("minimumGlobalDelta", sb.crawlQueues.noticeURL.getMinimumGlobalDelta());
// table cache settings // table cache settings
prop.putNum("urlCacheSize", indexSegment.urlMetadata().writeCacheSize()); prop.putNum("urlCacheSize", indexSegment.urlMetadata().writeCacheSize());
prop.putNum("wordCacheSize", indexSegment.termIndex().getBufferSize()); prop.putNum("wordCacheSize", indexSegment.termIndex().getBufferSize());
prop.putNum("wordCacheSizeKBytes", indexSegment.termIndex().getBufferSizeBytes()/1024); prop.putNum("wordCacheSizeKBytes", indexSegment.termIndex().getBufferSizeBytes()/1024);
prop.putNum("maxURLinCache", indexSegment.termIndex().getBufferMaxReferences()); prop.putNum("maxURLinCache", indexSegment.termIndex().getBufferMaxReferences());
@ -323,30 +323,30 @@ public class PerformanceQueues_p {
prop.putNum("crawlPauseProxyCurrent", (System.currentTimeMillis() - sb.proxyLastAccess) / 1000); prop.putNum("crawlPauseProxyCurrent", (System.currentTimeMillis() - sb.proxyLastAccess) / 1000);
prop.putNum("crawlPauseLocalsearchCurrent", (System.currentTimeMillis() - sb.localSearchLastAccess) / 1000); prop.putNum("crawlPauseLocalsearchCurrent", (System.currentTimeMillis() - sb.localSearchLastAccess) / 1000);
prop.putNum("crawlPauseRemotesearchCurrent", (System.currentTimeMillis() - sb.remoteSearchLastAccess) / 1000); prop.putNum("crawlPauseRemotesearchCurrent", (System.currentTimeMillis() - sb.remoteSearchLastAccess) / 1000);
// table thread pool settings // table thread pool settings
prop.put("pool_0_name","Crawler Pool"); prop.put("pool_0_name","Crawler Pool");
prop.put("pool_0_maxActive", sb.getConfigLong("crawler.MaxActiveThreads", 0)); prop.put("pool_0_maxActive", sb.getConfigLong("crawler.MaxActiveThreads", 0));
prop.put("pool_0_numActive",sb.crawlQueues.workerSize()); prop.put("pool_0_numActive",sb.crawlQueues.workerSize());
final WorkflowThread httpd = sb.getThread("10_httpd"); final WorkflowThread httpd = sb.getThread("10_httpd");
prop.put("pool_1_name", "httpd Session Pool"); prop.put("pool_1_name", "httpd Session Pool");
prop.put("pool_1_maxActive", ((serverCore)httpd).getMaxSessionCount()); prop.put("pool_1_maxActive", ((serverCore)httpd).getMaxSessionCount());
prop.put("pool_1_numActive", ((serverCore)httpd).getJobCount()); prop.put("pool_1_numActive", ((serverCore)httpd).getJobCount());
prop.put("pool", "2"); prop.put("pool", "2");
final long curr_prio = sb.getConfigLong("javastart_priority",0); final long curr_prio = sb.getConfigLong("javastart_priority",0);
prop.put("priority_normal",(curr_prio == 0) ? "1" : "0"); prop.put("priority_normal",(curr_prio == 0) ? "1" : "0");
prop.put("priority_below",(curr_prio == 10) ? "1" : "0"); prop.put("priority_below",(curr_prio == 10) ? "1" : "0");
prop.put("priority_low",(curr_prio == 20) ? "1" : "0"); prop.put("priority_low",(curr_prio == 20) ? "1" : "0");
// parse initialization memory settings // parse initialization memory settings
final String Xmx = sb.getConfig("javastart_Xmx", "Xmx500m").substring(3); final String Xmx = sb.getConfig("javastart_Xmx", "Xmx500m").substring(3);
prop.put("Xmx", Xmx.substring(0, Xmx.length() - 1)); prop.put("Xmx", Xmx.substring(0, Xmx.length() - 1));
final String Xms = sb.getConfig("javastart_Xms", "Xms500m").substring(3); final String Xms = sb.getConfig("javastart_Xms", "Xms500m").substring(3);
prop.put("Xms", Xms.substring(0, Xms.length() - 1)); prop.put("Xms", Xms.substring(0, Xms.length() - 1));
final long diskFree = sb.getConfigLong(SwitchboardConstants.DISK_FREE, 3000L); final long diskFree = sb.getConfigLong(SwitchboardConstants.DISK_FREE, 3000L);
final long diskFreeHardlimit = sb.getConfigLong(SwitchboardConstants.DISK_FREE_HARDLIMIT, 1000L); final long diskFreeHardlimit = sb.getConfigLong(SwitchboardConstants.DISK_FREE_HARDLIMIT, 1000L);
final long memoryAcceptDHT = sb.getConfigLong(SwitchboardConstants.MEMORY_ACCEPTDHT, 50000L); final long memoryAcceptDHT = sb.getConfigLong(SwitchboardConstants.MEMORY_ACCEPTDHT, 50000L);
@ -355,11 +355,11 @@ public class PerformanceQueues_p {
prop.put("diskFreeHardlimit", diskFreeHardlimit); prop.put("diskFreeHardlimit", diskFreeHardlimit);
prop.put("memoryAcceptDHT", memoryAcceptDHT); prop.put("memoryAcceptDHT", memoryAcceptDHT);
if(observerTrigger) prop.put("observerTrigger", "1"); if(observerTrigger) prop.put("observerTrigger", "1");
// return rewrite values for templates // return rewrite values for templates
return prop; return prop;
} }
private static String d(final String a, final String b) { private static String d(final String a, final String b) {
return (a == null) ? b : a; return (a == null) ? b : a;
} }

@ -1,4 +1,4 @@
// ProxyIndexingMonitor_p.java // ProxyIndexingMonitor_p.java
// --------------------------- // ---------------------------
// part of the AnomicHTTPD caching proxy // part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@yacy.net // (C) by Michael Peter Christen; mc@yacy.net
@ -33,7 +33,6 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache; import de.anomic.crawler.Cache;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -66,7 +65,7 @@ public class ProxyIndexingMonitor_p {
final boolean proxyYaCyOnly = post.containsKey("proxyYacyOnly"); final boolean proxyYaCyOnly = post.containsKey("proxyYacyOnly");
env.setConfig(SwitchboardConstants.PROXY_YACY_ONLY, (proxyYaCyOnly) ? true : false); env.setConfig(SwitchboardConstants.PROXY_YACY_ONLY, (proxyYaCyOnly) ? true : false);
int newProxyPrefetchDepth = post.getInt("proxyPrefetchDepth", 0); int newProxyPrefetchDepth = post.getInt("proxyPrefetchDepth", 0);
if (newProxyPrefetchDepth < 0) newProxyPrefetchDepth = 0; if (newProxyPrefetchDepth < 0) newProxyPrefetchDepth = 0;
if (newProxyPrefetchDepth > 20) newProxyPrefetchDepth = 20; // self protection ? if (newProxyPrefetchDepth > 20) newProxyPrefetchDepth = 20; // self protection ?
env.setConfig("proxyPrefetchDepth", Integer.toString(newProxyPrefetchDepth)); env.setConfig("proxyPrefetchDepth", Integer.toString(newProxyPrefetchDepth));
final boolean proxyStoreHTCache = post.containsKey("proxyStoreHTCache"); final boolean proxyStoreHTCache = post.containsKey("proxyStoreHTCache");
@ -77,7 +76,7 @@ public class ProxyIndexingMonitor_p {
env.setConfig("proxyIndexingLocalText", proxyIndexingLocalText ? true : false); env.setConfig("proxyIndexingLocalText", proxyIndexingLocalText ? true : false);
final boolean proxyIndexingLocalMedia = post.containsKey("proxyIndexingLocalMedia"); final boolean proxyIndexingLocalMedia = post.containsKey("proxyIndexingLocalMedia");
env.setConfig("proxyIndexingLocalMedia", proxyIndexingLocalMedia ? true : false); env.setConfig("proxyIndexingLocalMedia", proxyIndexingLocalMedia ? true : false);
// added proxyCache, proxyCacheSize - Borg-0300 // added proxyCache, proxyCacheSize - Borg-0300
// proxyCache - check and create the directory // proxyCache - check and create the directory
oldProxyCachePath = env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT); oldProxyCachePath = env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT);
@ -90,12 +89,12 @@ public class ProxyIndexingMonitor_p {
final File cache = env.getDataPath(SwitchboardConstants.HTCACHE_PATH, oldProxyCachePath); final File cache = env.getDataPath(SwitchboardConstants.HTCACHE_PATH, oldProxyCachePath);
if (!cache.isDirectory() && !cache.isFile()) cache.mkdirs(); if (!cache.isDirectory() && !cache.isFile()) cache.mkdirs();
// proxyCacheSize // proxyCacheSize
oldProxyCacheSize = env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64L); oldProxyCacheSize = env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64L);
newProxyCacheSize = post.getLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64L); newProxyCacheSize = post.getLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64L);
if (newProxyCacheSize < 4) { newProxyCacheSize = 4; } if (newProxyCacheSize < 4) { newProxyCacheSize = 4; }
env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize); env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize);
Cache.setMaxCacheSize(newProxyCacheSize * 1024 * 1024); Cache.setMaxCacheSize(newProxyCacheSize * 1024L * 1024L);
// implant these settings also into the crawling profile for the proxy // implant these settings also into the crawling profile for the proxy
if (sb.crawler.defaultProxyProfile == null) { if (sb.crawler.defaultProxyProfile == null) {
@ -108,7 +107,7 @@ public class ProxyIndexingMonitor_p {
sb.crawler.defaultProxyProfile.put("indexText", proxyIndexingLocalText); sb.crawler.defaultProxyProfile.put("indexText", proxyIndexingLocalText);
sb.crawler.defaultProxyProfile.put("indexMedia", proxyIndexingLocalMedia); sb.crawler.defaultProxyProfile.put("indexMedia", proxyIndexingLocalMedia);
sb.crawler.putActive(sb.crawler.defaultProxyProfile.handle().getBytes(), sb.crawler.defaultProxyProfile); sb.crawler.putActive(sb.crawler.defaultProxyProfile.handle().getBytes(), sb.crawler.defaultProxyProfile);
prop.put("info", "2");//new proxyPrefetchdepth prop.put("info", "2");//new proxyPrefetchdepth
prop.put("info_message", newProxyPrefetchDepth); prop.put("info_message", newProxyPrefetchDepth);
prop.put("info_caching", proxyStoreHTCache ? "1" : "0"); prop.put("info_caching", proxyStoreHTCache ? "1" : "0");

@ -28,7 +28,7 @@ public class YBRFetch_p
final servletProperties prop = new servletProperties(); final servletProperties prop = new servletProperties();
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
if ( post == null || !post.containsKey("ghrt4") || MemoryControl.available() < 1024 * 1024 * 1024 ) { if ( post == null || !post.containsKey("ghrt4") || MemoryControl.available() < 1024L * 1024L * 1024L ) {
return prop; return prop;
} }
final File hostIndexFile = new File(sb.queuesRoot, "hostIndex.blob"); final File hostIndexFile = new File(sb.queuesRoot, "hostIndex.blob");

@ -55,7 +55,7 @@ import net.yacy.document.Condenser;
import net.yacy.document.Document; import net.yacy.document.Document;
import net.yacy.document.LibraryProvider; import net.yacy.document.LibraryProvider;
import net.yacy.document.Parser; import net.yacy.document.Parser;
import net.yacy.document.geolocalization.Location; import net.yacy.document.geolocalization.GeoLocation;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
@ -909,12 +909,12 @@ public class yacysearch {
} }
// find geographic info // find geographic info
final SortedSet<Location> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false); final SortedSet<GeoLocation> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
if ( coordinates == null || coordinates.isEmpty() || startRecord > 0 ) { if ( coordinates == null || coordinates.isEmpty() || startRecord > 0 ) {
prop.put("geoinfo", "0"); prop.put("geoinfo", "0");
} else { } else {
int i = 0; int i = 0;
for ( final Location c : coordinates ) { for ( final GeoLocation c : coordinates ) {
prop.put("geoinfo_loc_" + i + "_lon", Math.round(c.lon() * 10000.0f) / 10000.0f); prop.put("geoinfo_loc_" + i + "_lon", Math.round(c.lon() * 10000.0f) / 10000.0f);
prop.put("geoinfo_loc_" + i + "_lat", Math.round(c.lat() * 10000.0f) / 10000.0f); prop.put("geoinfo_loc_" + i + "_lat", Math.round(c.lat() * 10000.0f) / 10000.0f);
prop.put("geoinfo_loc_" + i + "_name", c.getName()); prop.put("geoinfo_loc_" + i + "_name", c.getName());

@ -28,7 +28,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.opensearch.SRURSSConnector; import net.yacy.cora.services.federated.opensearch.SRURSSConnector;
import net.yacy.document.LibraryProvider; import net.yacy.document.LibraryProvider;
import net.yacy.document.geolocalization.Location; import net.yacy.document.geolocalization.GeoLocation;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
@ -67,11 +67,11 @@ public class yacysearch_location {
int placemarkCounter = 0; int placemarkCounter = 0;
if (query.length() > 0 && search_query) { if (query.length() > 0 && search_query) {
final Set<Location> locations = LibraryProvider.geoLoc.find(query, true); final Set<GeoLocation> locations = LibraryProvider.geoLoc.find(query, true);
for (final String qp: query.split(" ")) { for (final String qp: query.split(" ")) {
locations.addAll(LibraryProvider.geoLoc.find(qp, true)); locations.addAll(LibraryProvider.geoLoc.find(qp, true));
} }
for (final Location location: locations) { for (final GeoLocation location: locations) {
// write for all locations a point to this message // write for all locations a point to this message
prop.put("kml_placemark_" + placemarkCounter + "_location", location.getName()); prop.put("kml_placemark_" + placemarkCounter + "_location", location.getName());
prop.put("kml_placemark_" + placemarkCounter + "_name", location.getName()); prop.put("kml_placemark_" + placemarkCounter + "_name", location.getName());

@ -39,6 +39,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
@ -47,6 +48,7 @@ import net.yacy.kelondro.blob.Compressor;
import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
@ -57,7 +59,7 @@ public final class Cache {
private static final String RESPONSE_HEADER_DB_NAME = "responseHeader.heap"; private static final String RESPONSE_HEADER_DB_NAME = "responseHeader.heap";
private static final String FILE_DB_NAME = "file.array"; private static final String FILE_DB_NAME = "file.array";
private static Map<byte[], Map<String, String>> responseHeaderDB = null; private static MapHeap responseHeaderDB = null;
private static Compressor fileDB = null; private static Compressor fileDB = null;
private static ArrayStack fileDBunbuffered = null; private static ArrayStack fileDBunbuffered = null;
@ -84,6 +86,7 @@ public final class Cache {
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
} }
// open the cache file
try { try {
fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, Base64Order.enhancedCoder, 12, 1024 * 1024 * 2, false); fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, Base64Order.enhancedCoder, 12, 1024 * 1024 * 2, false);
fileDBunbuffered.setMaxSize(maxCacheSize); fileDBunbuffered.setMaxSize(maxCacheSize);
@ -91,6 +94,53 @@ public final class Cache {
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
} }
Log.logInfo("Cache", "initialized cache database responseHeaderDB.size() = " + responseHeaderDB.size() + ", fileDB.size() = " + fileDB.size());
// clean up the responseHeaderDB which cannot be cleaned the same way as the cache files.
// We do this as a concurrent job only once after start-up silently
if (responseHeaderDB.size() != fileDB.size()) {
Log.logWarning("Cache", "file and metadata size is not equal, starting a cleanup thread...");
Thread startupCleanup = new Thread() {
@Override
public void run() {
// enumerate the responseHeaderDB and find out all entries that are not inside the fileDBunbuffered
BlockingQueue<byte[]> q = responseHeaderDB.keyQueue(1000);
final HandleSet delkeys = new HandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 1);
Log.logInfo("Cache", "started cleanup thread to remove unused cache metadata");
try {
byte[] k;
while (((k = q.take()) != MapHeap.POISON_QUEUE_ENTRY)) {
if (!fileDB.containsKey(k)) try { delkeys.put(k); } catch (RowSpaceExceededException e) { break; }
}
} catch (InterruptedException e) {
} finally {
// delete the collected keys from the metadata
Log.logInfo("Cache", "cleanup thread collected " + delkeys.size() + " unused metadata entries; now deleting them from the file...");
for (byte[] k: delkeys) {
try {
responseHeaderDB.delete(k);
} catch (IOException e) {
}
}
}
Log.logInfo("Cache", "running check to remove unused file cache data");
delkeys.clear();
for (byte[] k: fileDB) {
if (!responseHeaderDB.containsKey(k)) try { delkeys.put(k); } catch (RowSpaceExceededException e) { break; }
}
Log.logInfo("Cache", "cleanup thread collected " + delkeys.size() + " unused cache entries; now deleting them from the file...");
for (byte[] k: delkeys) {
try {
fileDB.delete(k);
} catch (IOException e) {
}
}
Log.logInfo("Cache", "terminated cleanup thread; responseHeaderDB.size() = " + responseHeaderDB.size() + ", fileDB.size() = " + fileDB.size());
}
};
startupCleanup.start();
}
} }
/** /**
@ -131,9 +181,7 @@ public final class Cache {
* close the databases * close the databases
*/ */
public static void close() { public static void close() {
if (responseHeaderDB instanceof MapHeap) { responseHeaderDB.close();
((MapHeap) responseHeaderDB).close();
}
fileDB.close(true); fileDB.close(true);
} }
@ -156,12 +204,9 @@ public final class Cache {
hm.putAll(responseHeader); hm.putAll(responseHeader);
hm.put("@@URL", url.toNormalform(true, false)); hm.put("@@URL", url.toNormalform(true, false));
try { try {
if (responseHeaderDB instanceof MapHeap) { responseHeaderDB.insert(url.hash(), hm);
((MapHeap) responseHeaderDB).insert(url.hash(), hm);
} else {
responseHeaderDB.put(url.hash(), hm);
}
} catch (final Exception e) { } catch (final Exception e) {
fileDB.delete(url.hash());
throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage()); throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage());
} }
if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false)); if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false));
@ -184,11 +229,7 @@ public final class Cache {
// if not both is there then we do a clean-up // if not both is there then we do a clean-up
if (headerExists) try { if (headerExists) try {
log.logWarning("header but not content of urlhash " + ASCII.String(urlhash) + " in cache; cleaned up"); log.logWarning("header but not content of urlhash " + ASCII.String(urlhash) + " in cache; cleaned up");
if (responseHeaderDB instanceof MapHeap) { responseHeaderDB.delete(urlhash);
((MapHeap) responseHeaderDB).delete(urlhash);
} else {
responseHeaderDB.remove(urlhash);
}
} catch (final IOException e) {} } catch (final IOException e) {}
if (fileExists) try { if (fileExists) try {
//log.logWarning("content but not header of url " + url.toString() + " in cache; cleaned up"); //log.logWarning("content but not header of url " + url.toString() + " in cache; cleaned up");
@ -209,8 +250,14 @@ public final class Cache {
public static ResponseHeader getResponseHeader(final byte[] hash) { public static ResponseHeader getResponseHeader(final byte[] hash) {
// loading data from database // loading data from database
Map<String, String> hdb; Map<String, String> hdb = null;
hdb = responseHeaderDB.get(hash); try {
hdb = responseHeaderDB.get(hash);
} catch (IOException e) {
return null;
} catch (RowSpaceExceededException e) {
return null;
}
if (hdb == null) return null; if (hdb == null) return null;
return new ResponseHeader(null, hdb); return new ResponseHeader(null, hdb);
@ -251,11 +298,7 @@ public final class Cache {
* @throws IOException * @throws IOException
*/ */
public static void delete(final byte[] hash) throws IOException { public static void delete(final byte[] hash) throws IOException {
if (responseHeaderDB instanceof MapHeap) { responseHeaderDB.delete(hash);
((MapHeap) responseHeaderDB).delete(hash);
} else {
responseHeaderDB.remove(hash);
}
fileDB.delete(hash); fileDB.delete(hash);
} }
} }

@ -453,6 +453,7 @@ public class Domains {
globalHosts = null; globalHosts = null;
} else try { } else try {
globalHosts = new KeyList(globalHostsnameCache); globalHosts = new KeyList(globalHostsnameCache);
Log.logInfo("Domains", "loaded globalHosts cache of hostnames, size = " + globalHosts.size());
} catch (final IOException e) { } catch (final IOException e) {
globalHosts = null; globalHosts = null;
} }

@ -12,12 +12,12 @@
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either * License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version. * version 2.1 of the License, or (at your option) any later version.
* *
* This library is distributed in the hope that it will be useful, * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. * Lesser General Public License for more details.
* *
* You should have received a copy of the GNU Lesser General Public License * You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt * along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>. * If not, see <http://www.gnu.org/licenses/>.
@ -72,7 +72,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
this.queue.clear(); this.queue.clear();
this.enqueued.drainPermits(); this.enqueued.drainPermits();
} }
/** /**
* test if the queue is empty * test if the queue is empty
* @return true if the queue is empty, false if not * @return true if the queue is empty, false if not
@ -80,7 +80,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
public boolean isEmpty() { public boolean isEmpty() {
return this.queue.isEmpty() & this.drained.isEmpty(); return this.queue.isEmpty() & this.drained.isEmpty();
} }
/** /**
* get the number of elements in the queue, waiting to be removed with take() or poll() * get the number of elements in the queue, waiting to be removed with take() or poll()
* @return * @return
@ -91,7 +91,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
/** /**
* get the number of elements that had been drained so far and are wainting * get the number of elements that had been drained so far and are waiting
* in a list to get enumerated with element() * in a list to get enumerated with element()
* @return * @return
*/ */
@ -105,9 +105,9 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
* @return * @return
*/ */
public synchronized int sizeAvailable() { public synchronized int sizeAvailable() {
return this.queue.size() + this.drained.size(); return Math.min(this.maxsize, this.queue.size() + this.drained.size());
} }
/** /**
* put a element on the stack using a order of the weight * put a element on the stack using a order of the weight
* elements that had been on the stack cannot be put in again, * elements that had been on the stack cannot be put in again,
@ -128,7 +128,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
} }
assert this.queue.size() >= this.enqueued.availablePermits() : "(put) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits(); assert this.queue.size() >= this.enqueued.availablePermits() : "(put) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits();
} }
/** /**
* return the element with the smallest weight and remove it from the stack * return the element with the smallest weight and remove it from the stack
* @return null if no element is on the queue or the head of the queue * @return null if no element is on the queue or the head of the queue
@ -140,7 +140,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
return takeUnsafe(); return takeUnsafe();
} }
} }
/** /**
* Retrieves and removes the head of this queue, waiting if necessary * Retrieves and removes the head of this queue, waiting if necessary
* up to the specified wait time if no elements are present on this queue. * up to the specified wait time if no elements are present on this queue.
@ -155,7 +155,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
return takeUnsafe(); return takeUnsafe();
} }
} }
/** /**
* Retrieves and removes the head of this queue, waiting if no elements are present on this queue. * Retrieves and removes the head of this queue, waiting if no elements are present on this queue.
* @return the head element from the queue * @return the head element from the queue
@ -167,17 +167,17 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
return takeUnsafe(); return takeUnsafe();
} }
} }
private Element<E> takeUnsafe() { private Element<E> takeUnsafe() {
final Element<E> element = this.queue.first(); final Element<E> element = this.queue.first();
assert element != null; assert element != null;
this.queue.remove(element); this.queue.remove(element);
this.drained.add(element); if (this.drained.size() < this.maxsize) this.drained.add(element);
assert this.queue.size() >= this.enqueued.availablePermits() : "(take) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits(); assert this.queue.size() >= this.enqueued.availablePermits() : "(take) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits();
return element; return element;
} }
/** /**
* return the element with the smallest weight, but do not remove it * return the element with the smallest weight, but do not remove it
* @return null if no element is on the queue or the head of the queue * @return null if no element is on the queue or the head of the queue
@ -186,7 +186,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
if (this.queue.isEmpty()) return null; if (this.queue.isEmpty()) return null;
return this.queue.first(); return this.queue.first();
} }
/** /**
* all objects that have been returned by poll or take are stored in a back-up list * all objects that have been returned by poll or take are stored in a back-up list
* where they can be retrieved afterward. The elements from that list are stored in * where they can be retrieved afterward. The elements from that list are stored in
@ -216,7 +216,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
return this.drained.get(position); return this.drained.get(position);
} }
} }
/** /**
* retrieve an element from the drained queue but wait until a timeout * retrieve an element from the drained queue but wait until a timeout
* until returning null when no element will be available within the time * until returning null when no element will be available within the time
@ -239,7 +239,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
if (position >= this.drained.size()) return null; // we still don't have that element if (position >= this.drained.size()) return null; // we still don't have that element
return this.drained.get(position); return this.drained.get(position);
} }
/** /**
* return the specific amount of entries as they would be retrievable with element() * return the specific amount of entries as they would be retrievable with element()
* if count is < 0 then all elements are taken * if count is < 0 then all elements are taken
@ -251,11 +251,11 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
if (count < 0) { if (count < 0) {
return list(); return list();
} }
if (count > sizeAvailable()) throw new RuntimeException("list(" + count + ") exceeded avaiable number of elements (" + sizeAvailable() + ")"); if (count > sizeAvailable()) throw new RuntimeException("list(" + count + ") exceeded avaiable number of elements (" + sizeAvailable() + ")");
while (count > this.drained.size()) this.poll(); while (count > this.drained.size()) this.poll();
return this.drained; return this.drained;
} }
/** /**
* return all entries as they would be retrievable with element() * return all entries as they would be retrievable with element()
* @return a list of all elements in the stack * @return a list of all elements in the stack
@ -265,7 +265,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
while (!this.queue.isEmpty()) this.poll(); while (!this.queue.isEmpty()) this.poll();
return this.drained; return this.drained;
} }
/** /**
* iterate over all elements available. All elements that are still in the queue are drained to recorded positions * iterate over all elements available. All elements that are still in the queue are drained to recorded positions
* @return an iterator over all drained positions. * @return an iterator over all drained positions.
@ -285,22 +285,25 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
@Override @Override
public String toString(); public String toString();
} }
protected abstract static class AbstractElement<E> implements Element<E>, Serializable { protected abstract static class AbstractElement<E> implements Element<E>, Serializable {
private static final long serialVersionUID = -7026597258248026566L; private static final long serialVersionUID = -7026597258248026566L;
public long weight; public long weight;
public E element; public E element;
@Override
public long getWeight() { public long getWeight() {
return this.weight; return this.weight;
} }
@Override
public E getElement() { public E getElement() {
return this.element; return this.element;
} }
@Override
public boolean equals(Element<E> o) { public boolean equals(Element<E> o) {
return this.element.equals(o.getElement()); return this.element.equals(o.getElement());
} }
@ -309,13 +312,13 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
public int hashCode() { public int hashCode() {
return this.element.hashCode(); return this.element.hashCode();
} }
@Override @Override
public String toString() { public String toString() {
return element.toString() + "/" + weight; return this.element.toString() + "/" + this.weight;
} }
} }
/** /**
* natural ordering elements, can be used as container of objects <E> in the priority queue * natural ordering elements, can be used as container of objects <E> in the priority queue
* the elements with smallest ordering weights are first in the queue when elements are taken * the elements with smallest ordering weights are first in the queue when elements are taken
@ -329,10 +332,12 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
this.weight = weight; this.weight = weight;
} }
@Override
public int compare(NaturalElement<E> o1, NaturalElement<E> o2) { public int compare(NaturalElement<E> o1, NaturalElement<E> o2) {
return o1.compareTo(o2); return o1.compareTo(o2);
} }
@Override
public int compareTo(NaturalElement<E> o) { public int compareTo(NaturalElement<E> o) {
if (this.element == o.getElement()) return 0; if (this.element == o.getElement()) return 0;
if (this.element.equals(o.getElement())) return 0; if (this.element.equals(o.getElement())) return 0;
@ -344,9 +349,9 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
if (o1h < o2h) return -1; if (o1h < o2h) return -1;
return 0; return 0;
} }
} }
/** /**
* reverse ordering elements, can be used as container of objects <E> in the priority queue * reverse ordering elements, can be used as container of objects <E> in the priority queue
* the elements with highest ordering weights are first in the queue when elements are taken * the elements with highest ordering weights are first in the queue when elements are taken
@ -360,10 +365,12 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
this.weight = weight; this.weight = weight;
} }
@Override
public int compare(ReverseElement<E> o1, ReverseElement<E> o2) { public int compare(ReverseElement<E> o1, ReverseElement<E> o2) {
return o1.compareTo(o2); return o1.compareTo(o2);
} }
@Override
public int compareTo(ReverseElement<E> o) { public int compareTo(ReverseElement<E> o) {
if (this.element == o.getElement()) return 0; if (this.element == o.getElement()) return 0;
if (this.element.equals(o.getElement())) return 0; if (this.element.equals(o.getElement())) return 0;
@ -376,7 +383,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
return 0; return 0;
} }
} }
public static void main(String[] args) { public static void main(String[] args) {
final WeakPriorityBlockingQueue<String> a = new WeakPriorityBlockingQueue<String>(3); final WeakPriorityBlockingQueue<String> a = new WeakPriorityBlockingQueue<String>(3);
//final Element<String> REVERSE_POISON = new ReverseElement<String>("", Long.MIN_VALUE); //final Element<String> REVERSE_POISON = new ReverseElement<String>("", Long.MIN_VALUE);

@ -37,13 +37,13 @@ public final class HashARC<K, V> extends SimpleARC<K, V> implements Map<K, V>, I
public HashARC(final int cacheSize) { public HashARC(final int cacheSize) {
this.cacheSize = cacheSize / 2; this.cacheSize = cacheSize / 2;
super.levelA = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) { super.levelA = Collections.synchronizedMap(new LinkedHashMap<K, V>(1, 0.1f, accessOrder) {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
@Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) { @Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) {
return size() > HashARC.this.cacheSize; return size() > HashARC.this.cacheSize;
} }
}); });
this.levelB = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) { this.levelB = Collections.synchronizedMap(new LinkedHashMap<K, V>(1, 0.1f, accessOrder) {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
@Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) { @Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) {
return size() > HashARC.this.cacheSize; return size() > HashARC.this.cacheSize;

@ -76,6 +76,10 @@ public class KeyList implements Iterable<String> {
} }
public int size() {
return this.keys.size();
}
public boolean contains(final String key) { public boolean contains(final String key) {
return this.keys.containsKey(key.trim().toLowerCase()); return this.keys.containsKey(key.trim().toLowerCase());
} }

@ -1,73 +0,0 @@
/**
* Coordinates.java
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 04.10.2009 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
public class Coordinates {
private static final double tenmeter = 90.0d / 1.0e6d;
private final double lon, lat;
public Coordinates(double lon, double lat) {
this.lon = lon;
this.lat = lat;
}
public double lon() {
return this.lon;
}
public double lat() {
return this.lat;
}
private static final double bits30 = new Double(1L << 30).doubleValue(); // this is about one billion (US)
private static final double upscale = bits30 / 360.0;
private static final int coord2int(double coord) {
return (int) ((180.0 - coord) * upscale);
}
/**
* compute the hash code of a coordinate
* this produces identical hash codes for locations that are close to each other
*/
public int hashCode() {
return coord2int(this.lon) + (coord2int(this.lat) >> 15);
}
/**
* equality test that is needed to use the class inside HashMap/HashSet
*/
public boolean equals(final Object o) {
if (!(o instanceof Coordinates)) return false;
Coordinates oo = (Coordinates) o;
if (this.lon == oo.lon && this.lat == oo.lat) return true;
// we access fuzzy values that are considered as equal if they are close to each other
return Math.abs(this.lon - oo.lon) < tenmeter && Math.abs(this.lat - oo.lat) < tenmeter;
}
public String toString() {
return "[" + this.lon + "," + this.lat + "]";
}
}

@ -1,90 +1,93 @@
/** /**
* Location.java * GeoLocation
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany * Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 08.10.2009 on http://yacy.net * first published 08.10.2009 on http://yacy.net
* *
* This file is part of YaCy Content Integration * This file is part of YaCy Content Integration
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either * License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version. * version 2.1 of the License, or (at your option) any later version.
* *
* This library is distributed in the hope that it will be useful, * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. * Lesser General Public License for more details.
* *
* You should have received a copy of the GNU Lesser General Public License * You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt * along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>. * If not, see <http://www.gnu.org/licenses/>.
*/ */
package net.yacy.document.geolocalization; package net.yacy.document.geolocalization;
import java.util.Comparator; import java.util.Comparator;
public class Location extends Coordinates implements Comparable<Location>, Comparator<Location> { public class GeoLocation extends GeoPoint implements Comparable<GeoLocation>, Comparator<GeoLocation> {
private String name; private String name;
private int population; private int population;
public Location(float lon, float lat) { public GeoLocation(double lat, double lon) {
super(lon, lat); super(lat, lon);
this.name = null; this.name = null;
this.population = 0; this.population = 0;
} }
public Location(float lon, float lat, String name) { public GeoLocation(double lat, double lon, String name) {
super(lon, lat); super(lat, lon);
this.name = name; this.name = name;
} }
public void setName(String name) { public void setName(String name) {
this.name = name; this.name = name;
} }
public String getName() { public String getName() {
return this.name; return this.name;
} }
public void setPopulation(int population) { public void setPopulation(int population) {
this.population = population; this.population = population;
} }
public int getPopulation() { public int getPopulation() {
return this.population; return this.population;
} }
public boolean equals(Object loc) { @Override
if (!(loc instanceof Location)) return false; public boolean equals(Object loc) {
if (this.name == null || ((Location) loc).name == null) return super.equals(loc); if (!(loc instanceof GeoLocation)) return false;
return super.equals(loc) && this.name.toLowerCase().equals(((Location) loc).name.toLowerCase()); if (this.name == null || ((GeoLocation) loc).name == null) return super.equals(loc);
} return super.equals(loc) && this.name.toLowerCase().equals(((GeoLocation) loc).name.toLowerCase());
}
/**
* comparator that is needed to use the object inside TreeMap/TreeSet /**
* a Location is smaller than another if it has a _greater_ population * comparator that is needed to use the object inside TreeMap/TreeSet
* this order is used to get sorted lists of locations where the first elements * a Location is smaller than another if it has a _greater_ population
* have the greatest population * this order is used to get sorted lists of locations where the first elements
*/ * have the greatest population
public int compareTo(Location o) { */
if (this.equals(o)) return 0; @Override
long s = (ph(this.getPopulation()) << 30) + this.hashCode(); public int compareTo(GeoLocation o) {
long t = (ph(o.getPopulation()) << 30) + o.hashCode(); if (this.equals(o)) return 0;
if (s > t) return -1; long s = (ph(this.getPopulation()) << 30) + this.hashCode();
if (s < t) return 1; long t = (ph(o.getPopulation()) << 30) + o.hashCode();
return 0; if (s > t) return -1;
} if (s < t) return 1;
return 0;
private long ph(int population) { }
if (population > 10000) population -= 10000;
return (long) population; private long ph(int population) {
} if (population > 10000) population -= 10000;
return population;
public int compare(Location o1, Location o2) { }
return o1.compareTo(o2);
} @Override
public int compare(GeoLocation o1, GeoLocation o2) {
} return o1.compareTo(o2);
}
}

@ -0,0 +1,91 @@
/**
* GeoPoint
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 08.10.2009 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
public class GeoPoint {
public static final double meter = 90.0d / 1.0e7d; // this is actually the definition of 'meter': 10 million meter shall be the distance from the equator to the pole
private final long latlon; // using one variable for the coordinate pair saves some space
public GeoPoint(double lat, double lon) {
this.latlon = (((long) coord2int(lat)) << 32) | (coord2int(lon));
}
public GeoPoint(int lat, int lon) {
this.latlon = (((long) coord2int(lat / 1e6d)) << 32) | (coord2int(lon / 1e6d));
}
public double lon() {
return int2coord((int) (this.latlon & (Integer.MAX_VALUE)));
}
public double lat() {
return int2coord((int) (this.latlon >>> 32));
}
private static final double maxint = new Double(Integer.MAX_VALUE).doubleValue();
private static final double upscale = maxint / 360.0;
private static final int coord2int(double coord) {
return (int) ((coord + 180.0) * upscale);
}
private static final double int2coord(int z) {
return (z / upscale) - 180.0;
}
/**
* compute the hash code of a coordinate
* this produces identical hash codes for locations that are close to each other
*/
@Override
public int hashCode() {
return (int) ((this.latlon & Integer.MAX_VALUE) >> 1) + (int) (this.latlon >> 33);
}
/**
* equality test that is needed to use the class inside HashMap/HashSet
*/
@Override
public boolean equals(final Object o) {
if (!(o instanceof GeoPoint)) return false;
GeoPoint oo = (GeoPoint) o;
return (this.latlon == oo.latlon);
}
@Override
public String toString() {
return "[" + this.lat() + "," + this.lon() + "]";
}
public static void main(String[] args) {
double lat = 13.419444d;
double lon = 52.548611d;
GeoPoint c = new GeoPoint(lat, lon);
System.out.println(c.toString() + " #" + c.hashCode());
System.out.println("error: lat: " + (Math.abs(c.lat() - lat) / meter) + " meter; lon: " + (Math.abs(c.lon() - lon) / meter) + " meter");
}
}

@ -69,7 +69,7 @@ public class GeonamesLocalization implements Localization
modification date : date of last modification in yyyy-MM-dd format modification date : date of last modification in yyyy-MM-dd format
*/ */
private final Map<Integer, Location> id2loc; private final Map<Integer, GeoLocation> id2loc;
private final TreeMap<StringBuilder, List<Integer>> name2ids; private final TreeMap<StringBuilder, List<Integer>> name2ids;
private final File file; private final File file;
@ -77,7 +77,7 @@ public class GeonamesLocalization implements Localization
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/ // this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file; this.file = file;
this.id2loc = new HashMap<Integer, Location>(); this.id2loc = new HashMap<Integer, GeoLocation>();
this.name2ids = this.name2ids =
new TreeMap<StringBuilder, List<Integer>>(StringBuilderComparator.CASE_INSENSITIVE_ORDER); new TreeMap<StringBuilder, List<Integer>>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
@ -112,8 +112,8 @@ public class GeonamesLocalization implements Localization
for ( final String s : fields[3].split(",") ) { for ( final String s : fields[3].split(",") ) {
locnames.add(new StringBuilder(s)); locnames.add(new StringBuilder(s));
} }
final Location c = final GeoLocation c =
new Location(Float.parseFloat(fields[5]), Float.parseFloat(fields[4]), fields[1]); new GeoLocation(Float.parseFloat(fields[4]), Float.parseFloat(fields[5]), fields[1]);
c.setPopulation((int) Long.parseLong(fields[14])); c.setPopulation((int) Long.parseLong(fields[14]));
this.id2loc.put(id, c); this.id2loc.put(id, c);
for ( final StringBuilder name : locnames ) { for ( final StringBuilder name : locnames ) {
@ -136,7 +136,7 @@ public class GeonamesLocalization implements Localization
} }
@Override @Override
public TreeSet<Location> find(final String anyname, final boolean locationexact) { public TreeSet<GeoLocation> find(final String anyname, final boolean locationexact) {
final Set<Integer> r = new HashSet<Integer>(); final Set<Integer> r = new HashSet<Integer>();
List<Integer> c; List<Integer> c;
final StringBuilder an = new StringBuilder(anyname); final StringBuilder an = new StringBuilder(anyname);
@ -155,9 +155,9 @@ public class GeonamesLocalization implements Localization
} }
} }
} }
final TreeSet<Location> a = new TreeSet<Location>(); final TreeSet<GeoLocation> a = new TreeSet<GeoLocation>();
for ( final Integer e : r ) { for ( final Integer e : r ) {
final Location w = this.id2loc.get(e); final GeoLocation w = this.id2loc.get(e);
if ( w != null ) { if ( w != null ) {
a.add(w); a.add(w);
} }

@ -45,7 +45,7 @@ public interface Localization {
* @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names * @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names
* @return a set of locations, ordered by population (if this information is given) * @return a set of locations, ordered by population (if this information is given)
*/ */
public TreeSet<Location> find(String anyname, boolean locationexact); public TreeSet<GeoLocation> find(String anyname, boolean locationexact);
/** /**
* produce a set of location names * produce a set of location names

@ -53,7 +53,7 @@ public class OpenGeoDBLocalization implements Localization
{ {
private final Map<Integer, String> locTypeHash2locType; private final Map<Integer, String> locTypeHash2locType;
private final Map<Integer, Location> id2loc; private final Map<Integer, GeoLocation> id2loc;
private final Map<Integer, Integer> id2locTypeHash; private final Map<Integer, Integer> id2locTypeHash;
private final TreeMap<StringBuilder, List<Integer>> name2ids; private final TreeMap<StringBuilder, List<Integer>> name2ids;
private final TreeMap<StringBuilder, List<Integer>> kfz2ids; private final TreeMap<StringBuilder, List<Integer>> kfz2ids;
@ -65,7 +65,7 @@ public class OpenGeoDBLocalization implements Localization
this.file = file; this.file = file;
this.locTypeHash2locType = new HashMap<Integer, String>(); this.locTypeHash2locType = new HashMap<Integer, String>();
this.id2loc = new HashMap<Integer, Location>(); this.id2loc = new HashMap<Integer, GeoLocation>();
this.id2locTypeHash = new HashMap<Integer, Integer>(); this.id2locTypeHash = new HashMap<Integer, Integer>();
this.name2ids = this.name2ids =
new TreeMap<StringBuilder, List<Integer>>(StringBuilderComparator.CASE_INSENSITIVE_ORDER); new TreeMap<StringBuilder, List<Integer>>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
@ -112,7 +112,7 @@ public class OpenGeoDBLocalization implements Localization
lat = Float.parseFloat(v[2]); lat = Float.parseFloat(v[2]);
lon = Float.parseFloat(v[3]); lon = Float.parseFloat(v[3]);
} }
this.id2loc.put(Integer.parseInt(v[0]), new Location(lon, lat)); this.id2loc.put(Integer.parseInt(v[0]), new GeoLocation(lat, lon));
} }
if ( line.startsWith("geodb_textdata ") ) { if ( line.startsWith("geodb_textdata ") ) {
line = line.substring(15 + 7); line = line.substring(15 + 7);
@ -126,7 +126,7 @@ public class OpenGeoDBLocalization implements Localization
} }
l.add(id); l.add(id);
this.name2ids.put(new StringBuilder(h), l); this.name2ids.put(new StringBuilder(h), l);
final Location loc = this.id2loc.get(id); final GeoLocation loc = this.id2loc.get(id);
if ( loc != null ) { if ( loc != null ) {
loc.setName(h); loc.setName(h);
} }
@ -200,7 +200,7 @@ public class OpenGeoDBLocalization implements Localization
* @return * @return
*/ */
@Override @Override
public TreeSet<Location> find(final String anyname, final boolean locationexact) { public TreeSet<GeoLocation> find(final String anyname, final boolean locationexact) {
final HashSet<Integer> r = new HashSet<Integer>(); final HashSet<Integer> r = new HashSet<Integer>();
List<Integer> c; List<Integer> c;
final StringBuilder an = new StringBuilder(anyname); final StringBuilder an = new StringBuilder(anyname);
@ -231,9 +231,9 @@ public class OpenGeoDBLocalization implements Localization
r.add(i); r.add(i);
} }
} }
final TreeSet<Location> a = new TreeSet<Location>(); final TreeSet<GeoLocation> a = new TreeSet<GeoLocation>();
for ( final Integer e : r ) { for ( final Integer e : r ) {
final Location w = this.id2loc.get(e); final GeoLocation w = this.id2loc.get(e);
if ( w != null ) { if ( w != null ) {
a.add(w); a.add(w);
} }

@ -76,8 +76,8 @@ public class OverarchingLocalization implements Localization {
* @return a set of locations, ordered by population (if this information is given) * @return a set of locations, ordered by population (if this information is given)
*/ */
@Override @Override
public TreeSet<Location> find(final String anyname, final boolean locationexact) { public TreeSet<GeoLocation> find(final String anyname, final boolean locationexact) {
final TreeSet<Location> locations = new TreeSet<Location>(); final TreeSet<GeoLocation> locations = new TreeSet<GeoLocation>();
for (final Localization service: this.services.values()) { for (final Localization service: this.services.values()) {
locations.addAll(service.find(anyname, locationexact)); locations.addAll(service.find(anyname, locationexact));
} }

@ -128,7 +128,7 @@ public class ArrayStack implements BLOB {
Runtime.getRuntime().availableProcessors(), 100, Runtime.getRuntime().availableProcessors(), 100,
TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(), new LinkedBlockingQueue<Runnable>(),
new NamePrefixThreadFactory(prefix)); new NamePrefixThreadFactory(this.prefix));
// check existence of the heap directory // check existence of the heap directory
if (heapLocation.exists()) { if (heapLocation.exists()) {
@ -183,9 +183,9 @@ public class ArrayStack implements BLOB {
File f; File f;
long maxtime = 0; long maxtime = 0;
for (final String file : files) { for (final String file : files) {
if (file.length() >= 22 && file.startsWith(prefix) && file.endsWith(".blob")) { if (file.length() >= 22 && file.charAt(this.prefix.length()) == '.' && file.endsWith(".blob")) {
try { try {
d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(prefix.length() + 1, prefix.length() + 18)); d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(this.prefix.length() + 1, this.prefix.length() + 18));
time = d.getTime(); time = d.getTime();
if (time > maxtime) maxtime = time; if (time > maxtime) maxtime = time;
} catch (final ParseException e) {continue;} } catch (final ParseException e) {continue;}
@ -194,9 +194,9 @@ public class ArrayStack implements BLOB {
// open all blob files // open all blob files
for (final String file : files) { for (final String file : files) {
if (file.length() >= 22 && file.startsWith(prefix) && file.endsWith(".blob")) { if (file.length() >= 22 && file.charAt(this.prefix.length()) == '.' && file.endsWith(".blob")) {
try { try {
d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(prefix.length() + 1, prefix.length() + 18)); d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(this.prefix.length() + 1, this.prefix.length() + 18));
f = new File(heapLocation, file); f = new File(heapLocation, file);
time = d.getTime(); time = d.getTime();
if (time == maxtime && !trimall) { if (time == maxtime && !trimall) {

@ -32,6 +32,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
@ -45,7 +46,7 @@ import net.yacy.kelondro.util.ByteArray;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
public class Compressor implements BLOB { public class Compressor implements BLOB, Iterable<byte[]> {
static byte[] gzipMagic = {(byte) 'z', (byte) '|'}; // magic for gzip-encoded content static byte[] gzipMagic = {(byte) 'z', (byte) '|'}; // magic for gzip-encoded content
static byte[] plainMagic = {(byte) 'p', (byte) '|'}; // magic for plain content (no encoding) static byte[] plainMagic = {(byte) 'p', (byte) '|'}; // magic for plain content (no encoding)
@ -61,18 +62,22 @@ public class Compressor implements BLOB {
initBuffer(); initBuffer();
} }
@Override
public long mem() { public long mem() {
return this.backend.mem(); return this.backend.mem();
} }
@Override
public void trim() { public void trim() {
this.backend.trim(); this.backend.trim();
} }
@Override
public String name() { public String name() {
return this.backend.name(); return this.backend.name();
} }
@Override
public synchronized void clear() throws IOException { public synchronized void clear() throws IOException {
initBuffer(); initBuffer();
this.backend.clear(); this.backend.clear();
@ -83,10 +88,12 @@ public class Compressor implements BLOB {
this.bufferlength = 0; this.bufferlength = 0;
} }
@Override
public ByteOrder ordering() { public ByteOrder ordering() {
return this.backend.ordering(); return this.backend.ordering();
} }
@Override
public synchronized void close(final boolean writeIDX) { public synchronized void close(final boolean writeIDX) {
// no more thread is running, flush all queues // no more thread is running, flush all queues
flushAll(); flushAll();
@ -164,6 +171,7 @@ public class Compressor implements BLOB {
} }
} }
@Override
public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException { public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException {
// depending on the source of the result, we additionally do entry compression // depending on the source of the result, we additionally do entry compression
// because if a document was read once, we think that it will not be retrieved another time again soon // because if a document was read once, we think that it will not be retrieved another time again soon
@ -186,6 +194,7 @@ public class Compressor implements BLOB {
return decompress(b); return decompress(b);
} }
@Override
public byte[] get(final Object key) { public byte[] get(final Object key) {
if (!(key instanceof byte[])) return null; if (!(key instanceof byte[])) return null;
try { try {
@ -198,16 +207,19 @@ public class Compressor implements BLOB {
return null; return null;
} }
@Override
public boolean containsKey(final byte[] key) { public boolean containsKey(final byte[] key) {
synchronized (this) { synchronized (this) {
return this.buffer.containsKey(key) || this.backend.containsKey(key); return this.buffer.containsKey(key) || this.backend.containsKey(key);
} }
} }
@Override
public int keylength() { public int keylength() {
return this.backend.keylength(); return this.backend.keylength();
} }
@Override
public synchronized long length() { public synchronized long length() {
try { try {
return this.backend.length() + this.bufferlength; return this.backend.length() + this.bufferlength;
@ -217,6 +229,7 @@ public class Compressor implements BLOB {
} }
} }
@Override
public long length(final byte[] key) throws IOException { public long length(final byte[] key) throws IOException {
synchronized (this) { synchronized (this) {
byte[] b = this.buffer.get(key); byte[] b = this.buffer.get(key);
@ -238,6 +251,7 @@ public class Compressor implements BLOB {
return 0; return 0;
} }
@Override
public void insert(final byte[] key, final byte[] b) throws IOException { public void insert(final byte[] key, final byte[] b) throws IOException {
// first ensure that the files do not exist anywhere // first ensure that the files do not exist anywhere
@ -265,32 +279,47 @@ public class Compressor implements BLOB {
if (MemoryControl.shortStatus()) flushAll(); if (MemoryControl.shortStatus()) flushAll();
} }
@Override
public synchronized void delete(final byte[] key) throws IOException { public synchronized void delete(final byte[] key) throws IOException {
this.backend.delete(key); this.backend.delete(key);
final long rx = removeFromQueues(key); final long rx = removeFromQueues(key);
if (rx > 0) this.bufferlength -= rx; if (rx > 0) this.bufferlength -= rx;
} }
@Override
public synchronized int size() { public synchronized int size() {
return this.backend.size() + this.buffer.size(); return this.backend.size() + this.buffer.size();
} }
@Override
public synchronized boolean isEmpty() { public synchronized boolean isEmpty() {
if (!this.backend.isEmpty()) return false; if (!this.backend.isEmpty()) return false;
if (!this.buffer.isEmpty()) return false; if (!this.buffer.isEmpty()) return false;
return true; return true;
} }
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException { public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
flushAll(); flushAll();
return this.backend.keys(up, rotating); return this.backend.keys(up, rotating);
} }
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException { public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
flushAll(); flushAll();
return this.backend.keys(up, firstKey); return this.backend.keys(up, firstKey);
} }
@Override
public Iterator<byte[]> iterator() {
flushAll();
try {
return this.backend.keys(true, false);
} catch (IOException e) {
return null;
}
}
private boolean flushOne() { private boolean flushOne() {
if (this.buffer.isEmpty()) return false; if (this.buffer.isEmpty()) return false;
// depending on process case, write it to the file or compress it to the other queue // depending on process case, write it to the file or compress it to the other queue
@ -312,6 +341,7 @@ public class Compressor implements BLOB {
} }
} }
@Override
public int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException { public int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException {
final byte[] b = get(key); final byte[] b = get(key);
if (b == null) return 0; if (b == null) return 0;
@ -323,6 +353,7 @@ public class Compressor implements BLOB {
return reduction; return reduction;
} }
@Override
public int reduce(final byte[] key, final Reducer reducer) throws IOException, RowSpaceExceededException { public int reduce(final byte[] key, final Reducer reducer) throws IOException, RowSpaceExceededException {
final byte[] b = get(key); final byte[] b = get(key);
if (b == null) return 0; if (b == null) return 0;
@ -334,4 +365,5 @@ public class Compressor implements BLOB {
return reduction; return reduction;
} }
} }

@ -39,6 +39,8 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
@ -57,8 +59,8 @@ import net.yacy.kelondro.util.MemoryControl;
public class MapHeap implements Map<byte[], Map<String, String>> { public class MapHeap implements Map<byte[], Map<String, String>> {
private BLOB blob; private final BLOB blob;
private ARC<byte[], Map<String, String>> cache; private final ARC<byte[], Map<String, String>> cache;
private final char fillchar; private final char fillchar;
@ -445,11 +447,10 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
* close the Map table * close the Map table
*/ */
public synchronized void close() { public synchronized void close() {
this.cache = null; this.cache.clear();
// close file // close file
if (this.blob != null) this.blob.close(true); if (this.blob != null) this.blob.close(true);
this.blob = null;
} }
@Override @Override
@ -516,6 +517,29 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
return set; return set;
} }
public final static byte[] POISON_QUEUE_ENTRY = "POISON".getBytes();
public BlockingQueue<byte[]> keyQueue(final int size) {
final ArrayBlockingQueue<byte[]> set = new ArrayBlockingQueue<byte[]>(size);
(new Thread() {
@Override
public void run() {
try {
final Iterator<byte[]> i = MapHeap.this.blob.keys(true, false);
while (i.hasNext())
try {
set.put(i.next());
} catch (InterruptedException e) {
break;
}
} catch (final IOException e) {}
try {
set.put(MapHeap.POISON_QUEUE_ENTRY);
} catch (InterruptedException e) {
}
}}).start();
return set;
}
@Override @Override
public Collection<Map<String, String>> values() { public Collection<Map<String, String>> values() {
// this method shall not be used because it is not appropriate for this kind of data // this method shall not be used because it is not appropriate for this kind of data

@ -98,6 +98,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
is.close(); is.close();
is = null; is = null;
assert this.index.size() == file.length() / (keylength + idxbytes); assert this.index.size() == file.length() / (keylength + idxbytes);
trim();
} }
public void trim() { public void trim() {
@ -415,6 +416,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
return this.result.get(); return this.result.get();
} }
@Override
public final HandleMap call() throws IOException { public final HandleMap call() throws IOException {
try { try {
finishloop: while (true) { finishloop: while (true) {
@ -439,7 +441,8 @@ public final class HandleMap implements Iterable<Row.Entry> {
} }
} }
public Iterator<Row.Entry> iterator() { @Override
public Iterator<Row.Entry> iterator() {
return rows(true, null); return rows(true, null);
} }
} }

@ -473,6 +473,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
} }
} }
} }
references.trim();
System.out.println("CELL REFERENCE COLLECTION finished"); System.out.println("CELL REFERENCE COLLECTION finished");
return references; return references;
} }

@ -187,6 +187,7 @@ public class Table implements Index, Iterable<Row.Entry> {
} }
} }
} }
this.index.trim();
// open the file // open the file
this.file = new BufferedRecords(new Records(tablefile, rowdef.objectsize), this.buffersize); this.file = new BufferedRecords(new Records(tablefile, rowdef.objectsize), this.buffersize);
@ -594,6 +595,7 @@ public class Table implements Index, Iterable<Row.Entry> {
* @throws IOException * @throws IOException
* @throws RowSpaceExceededException * @throws RowSpaceExceededException
*/ */
@Override
public boolean put(final Entry row) throws IOException, RowSpaceExceededException { public boolean put(final Entry row) throws IOException, RowSpaceExceededException {
assert row != null; assert row != null;
if (this.file == null || row == null) return true; if (this.file == null || row == null) return true;
@ -702,6 +704,7 @@ public class Table implements Index, Iterable<Row.Entry> {
} }
} }
@Override
public boolean delete(final byte[] key) throws IOException { public boolean delete(final byte[] key) throws IOException {
return remove(key) != null; return remove(key) != null;
} }

@ -556,7 +556,7 @@ public final class Switchboard extends serverSwitch
getDataPath(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT); getDataPath(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT);
this.log.logInfo("HTCACHE Path = " + this.htCachePath.getAbsolutePath()); this.log.logInfo("HTCACHE Path = " + this.htCachePath.getAbsolutePath());
final long maxCacheSize = final long maxCacheSize =
1024 * 1024 * Long.parseLong(getConfig(SwitchboardConstants.PROXY_CACHE_SIZE, "2")); // this is megabyte 1024L * 1024L * Long.parseLong(getConfig(SwitchboardConstants.PROXY_CACHE_SIZE, "2")); // this is megabyte
Cache.init(this.htCachePath, this.peers.mySeed().hash, maxCacheSize); Cache.init(this.htCachePath, this.peers.mySeed().hash, maxCacheSize);
// create the surrogates directories // create the surrogates directories

@ -109,8 +109,8 @@ public class SnippetProcess {
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
this.result = new WeakPriorityBlockingQueue<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking this.result = new WeakPriorityBlockingQueue<ResultEntry>(Math.max(1000, 10 * query.itemsPerPage())); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new WeakPriorityBlockingQueue<MediaSnippet>(-1); this.images = new WeakPriorityBlockingQueue<MediaSnippet>(Math.max(1000, 10 * query.itemsPerPage()));
// snippets do not need to match with the complete query hashes, // snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search // only with the query minus the stopwords which had not been used for the search

@ -206,10 +206,10 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
if (de.anomic.crawler.Cache.has(url.hash())) { if (de.anomic.crawler.Cache.has(url.hash())) {
// get the sentences from the cache // get the sentences from the cache
final Request request = loader.request(url, true, reindexing); final Request request = loader == null ? null : loader.request(url, true, reindexing);
Response response; Response response;
try { try {
response = loader == null ? null : loader.load(request, CacheStrategy.CACHEONLY, true); response = loader == null || request == null ? null : loader.load(request, CacheStrategy.CACHEONLY, true);
} catch (IOException e1) { } catch (IOException e1) {
response = null; response = null;
} }

Loading…
Cancel
Save