Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

Conflicts:
	source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java
pull/1/head
Michael Peter Christen 13 years ago
commit 96e9d77270

@ -245,8 +245,8 @@ sessionidNamesFile = defaults/sessionid.names
proxyCache = DATA/HTCACHE
# the maximum disc cache size for files in Cache in megabytes
# default: 32 Gigabyte
proxyCacheSize = 32768
# default: 4 Gigabyte
proxyCacheSize = 4096
# a path to the surrogate input directory
surrogates.in = DATA/SURROGATES/in

@ -32,7 +32,6 @@ import java.io.IOException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache;
import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects;
@ -62,7 +61,7 @@ public class ConfigHTCache_p {
// proxyCacheSize
final int newProxyCacheSize = Math.max(post.getInt("maxCacheSize", 64), 4);
env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize);
Cache.setMaxCacheSize(newProxyCacheSize * 1024 * 1024);
Cache.setMaxCacheSize(newProxyCacheSize * 1024L * 1024L);
}
if (post != null && post.containsKey("deletecomplete")) {

@ -69,7 +69,7 @@
</td>
</tr>
<tr>
<td><label for="file"><span class="nobr">From File</span></label>:</td>
<td><label for="file"><span class="nobr">From File (enter a path<br/>within your local file system)</span></label>:</td>
<td><input type="radio" name="crawlingMode" id="file" value="file" onclick="document.getElementById('Crawler').rangeDomain.checked = true;"/></td>
<td><input type="text" name="crawlingFile" size="41" onfocus="check('file')"/><!--<input type="file" name="crawlingFile" size="18" onfocus="check('file')"/>--></td>
</tr>

@ -154,8 +154,14 @@ public class Crawler_p {
}
// remove crawlingFileContent before we record the call
final String crawlingFileName = post.get("crawlingFile");
final File crawlingFile = (crawlingFileName != null && crawlingFileName.length() > 0) ? new File(crawlingFileName) : null;
String crawlingFileName = post.get("crawlingFile");
final File crawlingFile;
if (crawlingFileName == null || crawlingFileName.length() == 0) {
crawlingFile = null;
} else {
if (crawlingFileName.startsWith("file://")) crawlingFileName = crawlingFileName.substring(7);
crawlingFile = new File(crawlingFileName);
}
if (crawlingFile != null && crawlingFile.exists()) {
post.remove("crawlingFile$file");
}

@ -124,7 +124,7 @@ public class PerformanceMemory_p {
c++;
}
prop.put("EcoList", c);
prop.putNum("EcoIndexTotalMem", totalmem / (1024 * 1024d));
prop.putNum("EcoIndexTotalMem", totalmem / (1024d * 1024d));
// write object cache table
final Iterator<Map.Entry<String, RAMIndex>> oi = RAMIndex.objects();
@ -153,7 +153,7 @@ public class PerformanceMemory_p {
c++;
}
prop.put("indexcache", c);
prop.putNum("indexcacheTotalMem", totalhitmem / (1024 * 1024d));
prop.putNum("indexcacheTotalMem", totalhitmem / (1024d * 1024d));
// write object cache table
i = Cache.filenames();
@ -195,10 +195,10 @@ public class PerformanceMemory_p {
c++;
}
prop.put("ObjectList", c);
prop.putNum("objectCacheStopGrow", Cache.getMemStopGrow() / (1024 * 1024d));
prop.putNum("objectCacheStartShrink", Cache.getMemStartShrink() / (1024 * 1024d));
prop.putNum("objectHitCacheTotalMem", totalhitmem / (1024 * 1024d));
prop.putNum("objectMissCacheTotalMem", totalmissmem / (1024 * 1024d));
prop.putNum("objectCacheStopGrow", Cache.getMemStopGrow() / (1024d * 1024d));
prop.putNum("objectCacheStartShrink", Cache.getMemStartShrink() / (1024d * 1024d));
prop.putNum("objectHitCacheTotalMem", totalhitmem / (1024d * 1024d));
prop.putNum("objectMissCacheTotalMem", totalmissmem / (1024d * 1024d));
// other caching structures
prop.putNum("namecacheHit.size", Domains.nameCacheHitSize());

@ -189,7 +189,7 @@ public class PerformanceQueues_p {
// load with new values
idlesleep = post.getLong(threadName + "_idlesleep", idlesleep);
busysleep = post.getLong(threadName + "_busysleep", busysleep);
memprereq = post.getLong(threadName + "_memprereq", memprereq) * 1024;
memprereq = post.getLong(threadName + "_memprereq", memprereq) * 1024l;
if (memprereq == 0) memprereq = sb.getConfigLong(threadName + "_memprereq", 0);
// check values to prevent short-cut loops

@ -33,7 +33,6 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -95,7 +94,7 @@ public class ProxyIndexingMonitor_p {
newProxyCacheSize = post.getLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64L);
if (newProxyCacheSize < 4) { newProxyCacheSize = 4; }
env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize);
Cache.setMaxCacheSize(newProxyCacheSize * 1024 * 1024);
Cache.setMaxCacheSize(newProxyCacheSize * 1024L * 1024L);
// implant these settings also into the crawling profile for the proxy
if (sb.crawler.defaultProxyProfile == null) {

@ -28,7 +28,7 @@ public class YBRFetch_p
final servletProperties prop = new servletProperties();
final Switchboard sb = (Switchboard) env;
if ( post == null || !post.containsKey("ghrt4") || MemoryControl.available() < 1024 * 1024 * 1024 ) {
if ( post == null || !post.containsKey("ghrt4") || MemoryControl.available() < 1024L * 1024L * 1024L ) {
return prop;
}
final File hostIndexFile = new File(sb.queuesRoot, "hostIndex.blob");

@ -55,7 +55,7 @@ import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
import net.yacy.document.Parser;
import net.yacy.document.geolocalization.Location;
import net.yacy.document.geolocalization.GeoLocation;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
@ -909,12 +909,12 @@ public class yacysearch {
}
// find geographic info
final SortedSet<Location> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
final SortedSet<GeoLocation> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
if ( coordinates == null || coordinates.isEmpty() || startRecord > 0 ) {
prop.put("geoinfo", "0");
} else {
int i = 0;
for ( final Location c : coordinates ) {
for ( final GeoLocation c : coordinates ) {
prop.put("geoinfo_loc_" + i + "_lon", Math.round(c.lon() * 10000.0f) / 10000.0f);
prop.put("geoinfo_loc_" + i + "_lat", Math.round(c.lat() * 10000.0f) / 10000.0f);
prop.put("geoinfo_loc_" + i + "_name", c.getName());

@ -28,7 +28,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.opensearch.SRURSSConnector;
import net.yacy.document.LibraryProvider;
import net.yacy.document.geolocalization.Location;
import net.yacy.document.geolocalization.GeoLocation;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.server.serverCore;
@ -67,11 +67,11 @@ public class yacysearch_location {
int placemarkCounter = 0;
if (query.length() > 0 && search_query) {
final Set<Location> locations = LibraryProvider.geoLoc.find(query, true);
final Set<GeoLocation> locations = LibraryProvider.geoLoc.find(query, true);
for (final String qp: query.split(" ")) {
locations.addAll(LibraryProvider.geoLoc.find(qp, true));
}
for (final Location location: locations) {
for (final GeoLocation location: locations) {
// write for all locations a point to this message
prop.put("kml_placemark_" + placemarkCounter + "_location", location.getName());
prop.put("kml_placemark_" + placemarkCounter + "_name", location.getName());

@ -39,6 +39,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.ResponseHeader;
@ -47,6 +48,7 @@ import net.yacy.kelondro.blob.Compressor;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
@ -57,7 +59,7 @@ public final class Cache {
private static final String RESPONSE_HEADER_DB_NAME = "responseHeader.heap";
private static final String FILE_DB_NAME = "file.array";
private static Map<byte[], Map<String, String>> responseHeaderDB = null;
private static MapHeap responseHeaderDB = null;
private static Compressor fileDB = null;
private static ArrayStack fileDBunbuffered = null;
@ -84,6 +86,7 @@ public final class Cache {
} catch (final IOException e) {
Log.logException(e);
}
// open the cache file
try {
fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, Base64Order.enhancedCoder, 12, 1024 * 1024 * 2, false);
fileDBunbuffered.setMaxSize(maxCacheSize);
@ -91,6 +94,53 @@ public final class Cache {
} catch (final IOException e) {
Log.logException(e);
}
Log.logInfo("Cache", "initialized cache database responseHeaderDB.size() = " + responseHeaderDB.size() + ", fileDB.size() = " + fileDB.size());
// clean up the responseHeaderDB which cannot be cleaned the same way as the cache files.
// We do this as a concurrent job only once after start-up silently
if (responseHeaderDB.size() != fileDB.size()) {
Log.logWarning("Cache", "file and metadata size is not equal, starting a cleanup thread...");
Thread startupCleanup = new Thread() {
@Override
public void run() {
// enumerate the responseHeaderDB and find out all entries that are not inside the fileDBunbuffered
BlockingQueue<byte[]> q = responseHeaderDB.keyQueue(1000);
final HandleSet delkeys = new HandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 1);
Log.logInfo("Cache", "started cleanup thread to remove unused cache metadata");
try {
byte[] k;
while (((k = q.take()) != MapHeap.POISON_QUEUE_ENTRY)) {
if (!fileDB.containsKey(k)) try { delkeys.put(k); } catch (RowSpaceExceededException e) { break; }
}
} catch (InterruptedException e) {
} finally {
// delete the collected keys from the metadata
Log.logInfo("Cache", "cleanup thread collected " + delkeys.size() + " unused metadata entries; now deleting them from the file...");
for (byte[] k: delkeys) {
try {
responseHeaderDB.delete(k);
} catch (IOException e) {
}
}
}
Log.logInfo("Cache", "running check to remove unused file cache data");
delkeys.clear();
for (byte[] k: fileDB) {
if (!responseHeaderDB.containsKey(k)) try { delkeys.put(k); } catch (RowSpaceExceededException e) { break; }
}
Log.logInfo("Cache", "cleanup thread collected " + delkeys.size() + " unused cache entries; now deleting them from the file...");
for (byte[] k: delkeys) {
try {
fileDB.delete(k);
} catch (IOException e) {
}
}
Log.logInfo("Cache", "terminated cleanup thread; responseHeaderDB.size() = " + responseHeaderDB.size() + ", fileDB.size() = " + fileDB.size());
}
};
startupCleanup.start();
}
}
/**
@ -131,9 +181,7 @@ public final class Cache {
* close the databases
*/
public static void close() {
if (responseHeaderDB instanceof MapHeap) {
((MapHeap) responseHeaderDB).close();
}
responseHeaderDB.close();
fileDB.close(true);
}
@ -156,12 +204,9 @@ public final class Cache {
hm.putAll(responseHeader);
hm.put("@@URL", url.toNormalform(true, false));
try {
if (responseHeaderDB instanceof MapHeap) {
((MapHeap) responseHeaderDB).insert(url.hash(), hm);
} else {
responseHeaderDB.put(url.hash(), hm);
}
responseHeaderDB.insert(url.hash(), hm);
} catch (final Exception e) {
fileDB.delete(url.hash());
throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage());
}
if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false));
@ -184,11 +229,7 @@ public final class Cache {
// if not both is there then we do a clean-up
if (headerExists) try {
log.logWarning("header but not content of urlhash " + ASCII.String(urlhash) + " in cache; cleaned up");
if (responseHeaderDB instanceof MapHeap) {
((MapHeap) responseHeaderDB).delete(urlhash);
} else {
responseHeaderDB.remove(urlhash);
}
responseHeaderDB.delete(urlhash);
} catch (final IOException e) {}
if (fileExists) try {
//log.logWarning("content but not header of url " + url.toString() + " in cache; cleaned up");
@ -209,8 +250,14 @@ public final class Cache {
public static ResponseHeader getResponseHeader(final byte[] hash) {
// loading data from database
Map<String, String> hdb;
hdb = responseHeaderDB.get(hash);
Map<String, String> hdb = null;
try {
hdb = responseHeaderDB.get(hash);
} catch (IOException e) {
return null;
} catch (RowSpaceExceededException e) {
return null;
}
if (hdb == null) return null;
return new ResponseHeader(null, hdb);
@ -251,11 +298,7 @@ public final class Cache {
* @throws IOException
*/
public static void delete(final byte[] hash) throws IOException {
if (responseHeaderDB instanceof MapHeap) {
((MapHeap) responseHeaderDB).delete(hash);
} else {
responseHeaderDB.remove(hash);
}
responseHeaderDB.delete(hash);
fileDB.delete(hash);
}
}

@ -453,6 +453,7 @@ public class Domains {
globalHosts = null;
} else try {
globalHosts = new KeyList(globalHostsnameCache);
Log.logInfo("Domains", "loaded globalHosts cache of hostnames, size = " + globalHosts.size());
} catch (final IOException e) {
globalHosts = null;
}

@ -91,7 +91,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
/**
* get the number of elements that had been drained so far and are wainting
* get the number of elements that had been drained so far and are waiting
* in a list to get enumerated with element()
* @return
*/
@ -105,7 +105,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
* @return
*/
public synchronized int sizeAvailable() {
return this.queue.size() + this.drained.size();
return Math.min(this.maxsize, this.queue.size() + this.drained.size());
}
/**
@ -172,7 +172,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
final Element<E> element = this.queue.first();
assert element != null;
this.queue.remove(element);
this.drained.add(element);
if (this.drained.size() < this.maxsize) this.drained.add(element);
assert this.queue.size() >= this.enqueued.availablePermits() : "(take) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits();
return element;
}
@ -293,14 +293,17 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
public long weight;
public E element;
@Override
public long getWeight() {
return this.weight;
}
@Override
public E getElement() {
return this.element;
}
@Override
public boolean equals(Element<E> o) {
return this.element.equals(o.getElement());
}
@ -312,7 +315,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
@Override
public String toString() {
return element.toString() + "/" + weight;
return this.element.toString() + "/" + this.weight;
}
}
@ -329,10 +332,12 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
this.weight = weight;
}
@Override
public int compare(NaturalElement<E> o1, NaturalElement<E> o2) {
return o1.compareTo(o2);
}
@Override
public int compareTo(NaturalElement<E> o) {
if (this.element == o.getElement()) return 0;
if (this.element.equals(o.getElement())) return 0;
@ -360,10 +365,12 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
this.weight = weight;
}
@Override
public int compare(ReverseElement<E> o1, ReverseElement<E> o2) {
return o1.compareTo(o2);
}
@Override
public int compareTo(ReverseElement<E> o) {
if (this.element == o.getElement()) return 0;
if (this.element.equals(o.getElement())) return 0;

@ -37,13 +37,13 @@ public final class HashARC<K, V> extends SimpleARC<K, V> implements Map<K, V>, I
public HashARC(final int cacheSize) {
this.cacheSize = cacheSize / 2;
super.levelA = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
super.levelA = Collections.synchronizedMap(new LinkedHashMap<K, V>(1, 0.1f, accessOrder) {
private static final long serialVersionUID = 1L;
@Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) {
return size() > HashARC.this.cacheSize;
}
});
this.levelB = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
this.levelB = Collections.synchronizedMap(new LinkedHashMap<K, V>(1, 0.1f, accessOrder) {
private static final long serialVersionUID = 1L;
@Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) {
return size() > HashARC.this.cacheSize;

@ -76,6 +76,10 @@ public class KeyList implements Iterable<String> {
}
public int size() {
return this.keys.size();
}
public boolean contains(final String key) {
return this.keys.containsKey(key.trim().toLowerCase());
}

@ -1,73 +0,0 @@
/**
* Coordinates.java
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 04.10.2009 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
public class Coordinates {
private static final double tenmeter = 90.0d / 1.0e6d;
private final double lon, lat;
public Coordinates(double lon, double lat) {
this.lon = lon;
this.lat = lat;
}
public double lon() {
return this.lon;
}
public double lat() {
return this.lat;
}
private static final double bits30 = new Double(1L << 30).doubleValue(); // this is about one billion (US)
private static final double upscale = bits30 / 360.0;
private static final int coord2int(double coord) {
return (int) ((180.0 - coord) * upscale);
}
/**
* compute the hash code of a coordinate
* this produces identical hash codes for locations that are close to each other
*/
public int hashCode() {
return coord2int(this.lon) + (coord2int(this.lat) >> 15);
}
/**
* equality test that is needed to use the class inside HashMap/HashSet
*/
public boolean equals(final Object o) {
if (!(o instanceof Coordinates)) return false;
Coordinates oo = (Coordinates) o;
if (this.lon == oo.lon && this.lat == oo.lat) return true;
// we access fuzzy values that are considered as equal if they are close to each other
return Math.abs(this.lon - oo.lon) < tenmeter && Math.abs(this.lat - oo.lat) < tenmeter;
}
public String toString() {
return "[" + this.lon + "," + this.lat + "]";
}
}

@ -1,5 +1,5 @@
/**
* Location.java
* GeoLocation
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 08.10.2009 on http://yacy.net
*
@ -25,19 +25,19 @@ package net.yacy.document.geolocalization;
import java.util.Comparator;
public class Location extends Coordinates implements Comparable<Location>, Comparator<Location> {
public class GeoLocation extends GeoPoint implements Comparable<GeoLocation>, Comparator<GeoLocation> {
private String name;
private int population;
public Location(float lon, float lat) {
super(lon, lat);
public GeoLocation(double lat, double lon) {
super(lat, lon);
this.name = null;
this.population = 0;
}
public Location(float lon, float lat, String name) {
super(lon, lat);
public GeoLocation(double lat, double lon, String name) {
super(lat, lon);
this.name = name;
}
@ -57,10 +57,11 @@ public class Location extends Coordinates implements Comparable<Location>, Compa
return this.population;
}
@Override
public boolean equals(Object loc) {
if (!(loc instanceof Location)) return false;
if (this.name == null || ((Location) loc).name == null) return super.equals(loc);
return super.equals(loc) && this.name.toLowerCase().equals(((Location) loc).name.toLowerCase());
if (!(loc instanceof GeoLocation)) return false;
if (this.name == null || ((GeoLocation) loc).name == null) return super.equals(loc);
return super.equals(loc) && this.name.toLowerCase().equals(((GeoLocation) loc).name.toLowerCase());
}
/**
@ -69,7 +70,8 @@ public class Location extends Coordinates implements Comparable<Location>, Compa
* this order is used to get sorted lists of locations where the first elements
* have the greatest population
*/
public int compareTo(Location o) {
@Override
public int compareTo(GeoLocation o) {
if (this.equals(o)) return 0;
long s = (ph(this.getPopulation()) << 30) + this.hashCode();
long t = (ph(o.getPopulation()) << 30) + o.hashCode();
@ -80,10 +82,11 @@ public class Location extends Coordinates implements Comparable<Location>, Compa
private long ph(int population) {
if (population > 10000) population -= 10000;
return (long) population;
return population;
}
public int compare(Location o1, Location o2) {
@Override
public int compare(GeoLocation o1, GeoLocation o2) {
return o1.compareTo(o2);
}

@ -0,0 +1,91 @@
/**
* GeoPoint
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 08.10.2009 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
public class GeoPoint {
public static final double meter = 90.0d / 1.0e7d; // this is actually the definition of 'meter': 10 million meter shall be the distance from the equator to the pole
private final long latlon; // using one variable for the coordinate pair saves some space
public GeoPoint(double lat, double lon) {
this.latlon = (((long) coord2int(lat)) << 32) | (coord2int(lon));
}
public GeoPoint(int lat, int lon) {
this.latlon = (((long) coord2int(lat / 1e6d)) << 32) | (coord2int(lon / 1e6d));
}
public double lon() {
return int2coord((int) (this.latlon & (Integer.MAX_VALUE)));
}
public double lat() {
return int2coord((int) (this.latlon >>> 32));
}
private static final double maxint = new Double(Integer.MAX_VALUE).doubleValue();
private static final double upscale = maxint / 360.0;
private static final int coord2int(double coord) {
return (int) ((coord + 180.0) * upscale);
}
private static final double int2coord(int z) {
return (z / upscale) - 180.0;
}
/**
* compute the hash code of a coordinate
* this produces identical hash codes for locations that are close to each other
*/
@Override
public int hashCode() {
return (int) ((this.latlon & Integer.MAX_VALUE) >> 1) + (int) (this.latlon >> 33);
}
/**
* equality test that is needed to use the class inside HashMap/HashSet
*/
@Override
public boolean equals(final Object o) {
if (!(o instanceof GeoPoint)) return false;
GeoPoint oo = (GeoPoint) o;
return (this.latlon == oo.latlon);
}
@Override
public String toString() {
return "[" + this.lat() + "," + this.lon() + "]";
}
public static void main(String[] args) {
double lat = 13.419444d;
double lon = 52.548611d;
GeoPoint c = new GeoPoint(lat, lon);
System.out.println(c.toString() + " #" + c.hashCode());
System.out.println("error: lat: " + (Math.abs(c.lat() - lat) / meter) + " meter; lon: " + (Math.abs(c.lon() - lon) / meter) + " meter");
}
}

@ -69,7 +69,7 @@ public class GeonamesLocalization implements Localization
modification date : date of last modification in yyyy-MM-dd format
*/
private final Map<Integer, Location> id2loc;
private final Map<Integer, GeoLocation> id2loc;
private final TreeMap<StringBuilder, List<Integer>> name2ids;
private final File file;
@ -77,7 +77,7 @@ public class GeonamesLocalization implements Localization
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file;
this.id2loc = new HashMap<Integer, Location>();
this.id2loc = new HashMap<Integer, GeoLocation>();
this.name2ids =
new TreeMap<StringBuilder, List<Integer>>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
@ -112,8 +112,8 @@ public class GeonamesLocalization implements Localization
for ( final String s : fields[3].split(",") ) {
locnames.add(new StringBuilder(s));
}
final Location c =
new Location(Float.parseFloat(fields[5]), Float.parseFloat(fields[4]), fields[1]);
final GeoLocation c =
new GeoLocation(Float.parseFloat(fields[4]), Float.parseFloat(fields[5]), fields[1]);
c.setPopulation((int) Long.parseLong(fields[14]));
this.id2loc.put(id, c);
for ( final StringBuilder name : locnames ) {
@ -136,7 +136,7 @@ public class GeonamesLocalization implements Localization
}
@Override
public TreeSet<Location> find(final String anyname, final boolean locationexact) {
public TreeSet<GeoLocation> find(final String anyname, final boolean locationexact) {
final Set<Integer> r = new HashSet<Integer>();
List<Integer> c;
final StringBuilder an = new StringBuilder(anyname);
@ -155,9 +155,9 @@ public class GeonamesLocalization implements Localization
}
}
}
final TreeSet<Location> a = new TreeSet<Location>();
final TreeSet<GeoLocation> a = new TreeSet<GeoLocation>();
for ( final Integer e : r ) {
final Location w = this.id2loc.get(e);
final GeoLocation w = this.id2loc.get(e);
if ( w != null ) {
a.add(w);
}

@ -45,7 +45,7 @@ public interface Localization {
* @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names
* @return a set of locations, ordered by population (if this information is given)
*/
public TreeSet<Location> find(String anyname, boolean locationexact);
public TreeSet<GeoLocation> find(String anyname, boolean locationexact);
/**
* produce a set of location names

@ -53,7 +53,7 @@ public class OpenGeoDBLocalization implements Localization
{
private final Map<Integer, String> locTypeHash2locType;
private final Map<Integer, Location> id2loc;
private final Map<Integer, GeoLocation> id2loc;
private final Map<Integer, Integer> id2locTypeHash;
private final TreeMap<StringBuilder, List<Integer>> name2ids;
private final TreeMap<StringBuilder, List<Integer>> kfz2ids;
@ -65,7 +65,7 @@ public class OpenGeoDBLocalization implements Localization
this.file = file;
this.locTypeHash2locType = new HashMap<Integer, String>();
this.id2loc = new HashMap<Integer, Location>();
this.id2loc = new HashMap<Integer, GeoLocation>();
this.id2locTypeHash = new HashMap<Integer, Integer>();
this.name2ids =
new TreeMap<StringBuilder, List<Integer>>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
@ -112,7 +112,7 @@ public class OpenGeoDBLocalization implements Localization
lat = Float.parseFloat(v[2]);
lon = Float.parseFloat(v[3]);
}
this.id2loc.put(Integer.parseInt(v[0]), new Location(lon, lat));
this.id2loc.put(Integer.parseInt(v[0]), new GeoLocation(lat, lon));
}
if ( line.startsWith("geodb_textdata ") ) {
line = line.substring(15 + 7);
@ -126,7 +126,7 @@ public class OpenGeoDBLocalization implements Localization
}
l.add(id);
this.name2ids.put(new StringBuilder(h), l);
final Location loc = this.id2loc.get(id);
final GeoLocation loc = this.id2loc.get(id);
if ( loc != null ) {
loc.setName(h);
}
@ -200,7 +200,7 @@ public class OpenGeoDBLocalization implements Localization
* @return
*/
@Override
public TreeSet<Location> find(final String anyname, final boolean locationexact) {
public TreeSet<GeoLocation> find(final String anyname, final boolean locationexact) {
final HashSet<Integer> r = new HashSet<Integer>();
List<Integer> c;
final StringBuilder an = new StringBuilder(anyname);
@ -231,9 +231,9 @@ public class OpenGeoDBLocalization implements Localization
r.add(i);
}
}
final TreeSet<Location> a = new TreeSet<Location>();
final TreeSet<GeoLocation> a = new TreeSet<GeoLocation>();
for ( final Integer e : r ) {
final Location w = this.id2loc.get(e);
final GeoLocation w = this.id2loc.get(e);
if ( w != null ) {
a.add(w);
}

@ -76,8 +76,8 @@ public class OverarchingLocalization implements Localization {
* @return a set of locations, ordered by population (if this information is given)
*/
@Override
public TreeSet<Location> find(final String anyname, final boolean locationexact) {
final TreeSet<Location> locations = new TreeSet<Location>();
public TreeSet<GeoLocation> find(final String anyname, final boolean locationexact) {
final TreeSet<GeoLocation> locations = new TreeSet<GeoLocation>();
for (final Localization service: this.services.values()) {
locations.addAll(service.find(anyname, locationexact));
}

@ -128,7 +128,7 @@ public class ArrayStack implements BLOB {
Runtime.getRuntime().availableProcessors(), 100,
TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(),
new NamePrefixThreadFactory(prefix));
new NamePrefixThreadFactory(this.prefix));
// check existence of the heap directory
if (heapLocation.exists()) {
@ -183,9 +183,9 @@ public class ArrayStack implements BLOB {
File f;
long maxtime = 0;
for (final String file : files) {
if (file.length() >= 22 && file.startsWith(prefix) && file.endsWith(".blob")) {
if (file.length() >= 22 && file.charAt(this.prefix.length()) == '.' && file.endsWith(".blob")) {
try {
d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(prefix.length() + 1, prefix.length() + 18));
d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(this.prefix.length() + 1, this.prefix.length() + 18));
time = d.getTime();
if (time > maxtime) maxtime = time;
} catch (final ParseException e) {continue;}
@ -194,9 +194,9 @@ public class ArrayStack implements BLOB {
// open all blob files
for (final String file : files) {
if (file.length() >= 22 && file.startsWith(prefix) && file.endsWith(".blob")) {
if (file.length() >= 22 && file.charAt(this.prefix.length()) == '.' && file.endsWith(".blob")) {
try {
d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(prefix.length() + 1, prefix.length() + 18));
d = my_SHORT_MILSEC_FORMATTER.parse(file.substring(this.prefix.length() + 1, this.prefix.length() + 18));
f = new File(heapLocation, file);
time = d.getTime();
if (time == maxtime && !trimall) {

@ -32,6 +32,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.zip.GZIPInputStream;
@ -45,7 +46,7 @@ import net.yacy.kelondro.util.ByteArray;
import net.yacy.kelondro.util.MemoryControl;
public class Compressor implements BLOB {
public class Compressor implements BLOB, Iterable<byte[]> {
static byte[] gzipMagic = {(byte) 'z', (byte) '|'}; // magic for gzip-encoded content
static byte[] plainMagic = {(byte) 'p', (byte) '|'}; // magic for plain content (no encoding)
@ -61,18 +62,22 @@ public class Compressor implements BLOB {
initBuffer();
}
@Override
public long mem() {
return this.backend.mem();
}
@Override
public void trim() {
this.backend.trim();
}
@Override
public String name() {
return this.backend.name();
}
@Override
public synchronized void clear() throws IOException {
initBuffer();
this.backend.clear();
@ -83,10 +88,12 @@ public class Compressor implements BLOB {
this.bufferlength = 0;
}
@Override
public ByteOrder ordering() {
return this.backend.ordering();
}
@Override
public synchronized void close(final boolean writeIDX) {
// no more thread is running, flush all queues
flushAll();
@ -164,6 +171,7 @@ public class Compressor implements BLOB {
}
}
@Override
public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException {
// depending on the source of the result, we additionally do entry compression
// because if a document was read once, we think that it will not be retrieved another time again soon
@ -186,6 +194,7 @@ public class Compressor implements BLOB {
return decompress(b);
}
@Override
public byte[] get(final Object key) {
if (!(key instanceof byte[])) return null;
try {
@ -198,16 +207,19 @@ public class Compressor implements BLOB {
return null;
}
@Override
public boolean containsKey(final byte[] key) {
synchronized (this) {
return this.buffer.containsKey(key) || this.backend.containsKey(key);
}
}
@Override
public int keylength() {
return this.backend.keylength();
}
@Override
public synchronized long length() {
try {
return this.backend.length() + this.bufferlength;
@ -217,6 +229,7 @@ public class Compressor implements BLOB {
}
}
@Override
public long length(final byte[] key) throws IOException {
synchronized (this) {
byte[] b = this.buffer.get(key);
@ -238,6 +251,7 @@ public class Compressor implements BLOB {
return 0;
}
@Override
public void insert(final byte[] key, final byte[] b) throws IOException {
// first ensure that the files do not exist anywhere
@ -265,32 +279,47 @@ public class Compressor implements BLOB {
if (MemoryControl.shortStatus()) flushAll();
}
@Override
public synchronized void delete(final byte[] key) throws IOException {
this.backend.delete(key);
final long rx = removeFromQueues(key);
if (rx > 0) this.bufferlength -= rx;
}
@Override
public synchronized int size() {
return this.backend.size() + this.buffer.size();
}
@Override
public synchronized boolean isEmpty() {
if (!this.backend.isEmpty()) return false;
if (!this.buffer.isEmpty()) return false;
return true;
}
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
flushAll();
return this.backend.keys(up, rotating);
}
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
flushAll();
return this.backend.keys(up, firstKey);
}
@Override
public Iterator<byte[]> iterator() {
flushAll();
try {
return this.backend.keys(true, false);
} catch (IOException e) {
return null;
}
}
private boolean flushOne() {
if (this.buffer.isEmpty()) return false;
// depending on process case, write it to the file or compress it to the other queue
@ -312,6 +341,7 @@ public class Compressor implements BLOB {
}
}
@Override
public int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException {
final byte[] b = get(key);
if (b == null) return 0;
@ -323,6 +353,7 @@ public class Compressor implements BLOB {
return reduction;
}
@Override
public int reduce(final byte[] key, final Reducer reducer) throws IOException, RowSpaceExceededException {
final byte[] b = get(key);
if (b == null) return 0;
@ -334,4 +365,5 @@ public class Compressor implements BLOB {
return reduction;
}
}

@ -39,6 +39,8 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.date.GenericFormatter;
@ -57,8 +59,8 @@ import net.yacy.kelondro.util.MemoryControl;
public class MapHeap implements Map<byte[], Map<String, String>> {
private BLOB blob;
private ARC<byte[], Map<String, String>> cache;
private final BLOB blob;
private final ARC<byte[], Map<String, String>> cache;
private final char fillchar;
@ -445,11 +447,10 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
* close the Map table
*/
public synchronized void close() {
this.cache = null;
this.cache.clear();
// close file
if (this.blob != null) this.blob.close(true);
this.blob = null;
}
@Override
@ -516,6 +517,29 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
return set;
}
public final static byte[] POISON_QUEUE_ENTRY = "POISON".getBytes();
public BlockingQueue<byte[]> keyQueue(final int size) {
final ArrayBlockingQueue<byte[]> set = new ArrayBlockingQueue<byte[]>(size);
(new Thread() {
@Override
public void run() {
try {
final Iterator<byte[]> i = MapHeap.this.blob.keys(true, false);
while (i.hasNext())
try {
set.put(i.next());
} catch (InterruptedException e) {
break;
}
} catch (final IOException e) {}
try {
set.put(MapHeap.POISON_QUEUE_ENTRY);
} catch (InterruptedException e) {
}
}}).start();
return set;
}
@Override
public Collection<Map<String, String>> values() {
// this method shall not be used because it is not appropriate for this kind of data

@ -98,6 +98,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
is.close();
is = null;
assert this.index.size() == file.length() / (keylength + idxbytes);
trim();
}
public void trim() {
@ -415,6 +416,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
return this.result.get();
}
@Override
public final HandleMap call() throws IOException {
try {
finishloop: while (true) {
@ -439,7 +441,8 @@ public final class HandleMap implements Iterable<Row.Entry> {
}
}
public Iterator<Row.Entry> iterator() {
@Override
public Iterator<Row.Entry> iterator() {
return rows(true, null);
}
}

@ -473,6 +473,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
}
}
}
references.trim();
System.out.println("CELL REFERENCE COLLECTION finished");
return references;
}

@ -187,6 +187,7 @@ public class Table implements Index, Iterable<Row.Entry> {
}
}
}
this.index.trim();
// open the file
this.file = new BufferedRecords(new Records(tablefile, rowdef.objectsize), this.buffersize);
@ -594,6 +595,7 @@ public class Table implements Index, Iterable<Row.Entry> {
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public boolean put(final Entry row) throws IOException, RowSpaceExceededException {
assert row != null;
if (this.file == null || row == null) return true;
@ -702,6 +704,7 @@ public class Table implements Index, Iterable<Row.Entry> {
}
}
@Override
public boolean delete(final byte[] key) throws IOException {
return remove(key) != null;
}

@ -556,7 +556,7 @@ public final class Switchboard extends serverSwitch
getDataPath(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT);
this.log.logInfo("HTCACHE Path = " + this.htCachePath.getAbsolutePath());
final long maxCacheSize =
1024 * 1024 * Long.parseLong(getConfig(SwitchboardConstants.PROXY_CACHE_SIZE, "2")); // this is megabyte
1024L * 1024L * Long.parseLong(getConfig(SwitchboardConstants.PROXY_CACHE_SIZE, "2")); // this is megabyte
Cache.init(this.htCachePath, this.peers.mySeed().hash, maxCacheSize);
// create the surrogates directories

@ -109,8 +109,8 @@ public class SnippetProcess {
this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0;
this.result = new WeakPriorityBlockingQueue<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new WeakPriorityBlockingQueue<MediaSnippet>(-1);
this.result = new WeakPriorityBlockingQueue<ResultEntry>(Math.max(1000, 10 * query.itemsPerPage())); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new WeakPriorityBlockingQueue<MediaSnippet>(Math.max(1000, 10 * query.itemsPerPage()));
// snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search

@ -206,10 +206,10 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
if (de.anomic.crawler.Cache.has(url.hash())) {
// get the sentences from the cache
final Request request = loader.request(url, true, reindexing);
final Request request = loader == null ? null : loader.request(url, true, reindexing);
Response response;
try {
response = loader == null ? null : loader.load(request, CacheStrategy.CACHEONLY, true);
response = loader == null || request == null ? null : loader.load(request, CacheStrategy.CACHEONLY, true);
} catch (IOException e1) {
response = null;
}

Loading…
Cancel
Save