Merge branch 'master' of git://gitorious.org/yacy/rc1.git

pull/1/head
reger 13 years ago
commit a48f37bf70

@ -46,7 +46,7 @@
<classpathentry kind="lib" path="lib/httpclient-4.1.3.jar"/>
<classpathentry kind="lib" path="lib/httpmime-4.1.3.jar"/>
<classpathentry kind="lib" path="lib/commons-io-2.1.jar"/>
<classpathentry kind="lib" path="lib/apache-solr-solrj-3.6.0.jar"/>
<classpathentry kind="lib" path="lib/apache-solr-solrj-3.6.0.jar" sourcepath="/solrj/src"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="lib" path="lib/icu4j-core.jar"/>
<classpathentry kind="output" path="gen"/>

@ -698,6 +698,7 @@ crawler.http.acceptEncoding=gzip
crawler.http.acceptLanguage=en-us,en;q=0.5
crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
crawler.http.maxFileSize=10485760
crawler.http.FollowRedirects=true;
# ftp crawler specific settings; size in bytes
crawler.ftp.maxFileSize=10485760

@ -375,7 +375,7 @@ public class Balancer {
synchronized (this) {
byte[] failhash = null;
while (!this.urlFileIndex.isEmpty()) {
byte[] nexthash = getbest();
byte[] nexthash = getbest(robots);
if (nexthash == null) return null;
// check minimumDelta and if necessary force a sleep
@ -442,7 +442,7 @@ public class Balancer {
return crawlEntry;
}
private byte[] getbest() {
private byte[] getbest(final RobotsTxt robots) {
// check if we need to get entries from the file index
try {
@ -469,7 +469,23 @@ public class Balancer {
final byte[] n = entry.getValue().removeOne();
if (n == null) continue;
final long w = Latency.waitingRemainingGuessed(entry.getKey(), this.minimumLocalDelta, this.minimumGlobalDelta);
long w;
Row.Entry rowEntry;
try {
rowEntry=(n == null) ? null : this.urlFileIndex.get(n, false);
if (rowEntry == null) {
w = Latency.waitingRemainingGuessed(entry.getKey(), this.minimumLocalDelta, this.minimumGlobalDelta);
} else {
Request crawlEntry = new Request(rowEntry);
w = Latency.waitingRemaining(crawlEntry.url(), robots, this.myAgentIDs, this.minimumLocalDelta, this.minimumGlobalDelta);
//System.out.println("*** waitingRemaining = " + w + ", guessed = " + Latency.waitingRemainingGuessed(entry.getKey(), this.minimumLocalDelta, this.minimumGlobalDelta));
//System.out.println("*** explained: " + Latency.waitingRemainingExplain(crawlEntry.url(), robots, this.myAgentIDs, this.minimumLocalDelta, this.minimumGlobalDelta));
}
} catch (IOException e1) {
w = Latency.waitingRemainingGuessed(entry.getKey(), this.minimumLocalDelta, this.minimumGlobalDelta);
}
if (w < smallestWaiting) {
smallestWaiting = w;
besturlhash = n;

@ -82,8 +82,8 @@ public class CrawlQueues {
this.log.logConfig("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
}
public void relocate(final File newQueuePath) {
@ -94,11 +94,11 @@ public class CrawlQueues {
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
}
public void close() {
public synchronized void close() {
// wait for all workers to finish
for (final Loader w: this.workers.values()) {
w.interrupt();

@ -121,7 +121,7 @@ public final class CrawlStacker {
this.slowQueue.announceShutdown();
}
public void close() {
public synchronized void close() {
this.log.logInfo("Shutdown. waiting for remaining " + size() + " crawl stacker job entries. please wait.");
this.fastQueue.announceShutdown();
this.slowQueue.announceShutdown();

@ -541,7 +541,7 @@ public final class CrawlSwitchboard
return hasDoneSomething;
}
public void close() {
public synchronized void close() {
this.profilesActiveCrawlsCache.clear();
this.profilesActiveCrawls.close();
this.profilesPassiveCrawls.close();

@ -36,6 +36,8 @@ import net.yacy.kelondro.util.MemoryControl;
public class Latency {
private final static int DEFAULT_AVERAGE = 300;
// the map is a mapping from host names to host configurations
private static final ConcurrentHashMap<String, Host> map = new ConcurrentHashMap<String, Host>();
@ -57,7 +59,7 @@ public class Latency {
if (host == null) return;
Host h = map.get(host);
if (h == null) {
h = new Host(host, 3000);
h = new Host(host, DEFAULT_AVERAGE);
if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear();
map.put(host, h);
} else {
@ -70,7 +72,7 @@ public class Latency {
if (host == null) return;
Host h = map.get(host);
if (h == null) {
h = new Host(host, 3000);
h = new Host(host, DEFAULT_AVERAGE);
if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear();
map.put(host, h);
} else {
@ -121,31 +123,28 @@ public class Latency {
* which expresses how long the time is over the minimum waiting time.
*/
public static long waitingRemainingGuessed(final String hostname, final long minimumLocalDelta, final long minimumGlobalDelta) {
if (hostname == null) return 0;
final Host host = map.get(hostname);
if (host == null) return 0;
if (hostname == null) return Long.MIN_VALUE;
// the time since last access to the domain is the basis of the remaining calculation
final long timeSinceLastAccess = System.currentTimeMillis() - host.lastacc();
// first check if the domain was _ever_ accessed before
final Host host = map.get(hostname);
if (host == null) return Long.MIN_VALUE; // no delay if host is new
// find the minimum waiting time based on the network domain (local or global)
final boolean local = Domains.isLocal(hostname, null);
long waiting = (local) ? minimumLocalDelta : minimumGlobalDelta;
if (local) return minimumLocalDelta;
long waiting = minimumGlobalDelta;
// if we have accessed the domain many times, get slower (the flux factor)
if (!local) waiting += host.flux(waiting);
waiting += host.flux(waiting);
// use the access latency as rule how fast we can access the server
// this applies also to localhost, but differently, because it is not necessary to
// consider so many external accesses
waiting = Math.max(waiting, (local) ? host.average() / 2 : host.average() * 2);
waiting = Math.max(waiting, host.average() * 2);
// prevent that that a robots file can stop our indexer completely
waiting = Math.min(60000, waiting);
// return time that is remaining
//System.out.println("Latency: " + (waiting - timeSinceLastAccess));
return Math.max(0, waiting - timeSinceLastAccess);
// the time since last access to the domain is the basis of the remaining calculation
final long timeSinceLastAccess = System.currentTimeMillis() - host.lastacc();
return Math.max(0, Math.min(60000, waiting) - timeSinceLastAccess);
}
/**
@ -169,10 +168,7 @@ public class Latency {
// find the minimum waiting time based on the network domain (local or global)
final boolean local = url.isLocal();
if (local) return minimumLocalDelta;
long waiting = (local) ? minimumLocalDelta : minimumGlobalDelta;
// the time since last access to the domain is the basis of the remaining calculation
final long timeSinceLastAccess = System.currentTimeMillis() - host.lastacc();
long waiting = minimumGlobalDelta;
// for CGI accesses, we double the minimum time
// mostly there is a database access in the background
@ -180,33 +176,29 @@ public class Latency {
if (url.isCGI()) waiting = waiting * 2;
// if we have accessed the domain many times, get slower (the flux factor)
if (!local && host != null) waiting += host.flux(waiting);
// find the delay as given by robots.txt on target site
long robotsDelay = 0;
if (!local) {
RobotsTxtEntry robotsEntry;
try {
robotsEntry = robots.getEntry(url, thisAgents);
} catch (final IOException e) {
robotsEntry = null;
}
robotsDelay = (robotsEntry == null) ? 0 : robotsEntry.getCrawlDelayMillis();
if (robotsEntry != null && robotsDelay == 0 && robotsEntry.getAgentName() != null) return 0; // no limits if granted exclusively for this peer
}
waiting = Math.max(waiting, robotsDelay);
waiting += host.flux(waiting);
// use the access latency as rule how fast we can access the server
// this applies also to localhost, but differently, because it is not necessary to
// consider so many external accesses
waiting = Math.max(waiting, (local) ? host.average() / 2 : host.average() * 2);
waiting = Math.max(waiting, host.average() * 2);
// prevent that that a robots file can stop our indexer completely
waiting = Math.min(60000, waiting);
// find the delay as given by robots.txt on target site
long robotsDelay = 0;
RobotsTxtEntry robotsEntry;
try {
robotsEntry = robots.getEntry(url, thisAgents);
} catch (final IOException e) {
robotsEntry = null;
}
robotsDelay = (robotsEntry == null) ? 0 : robotsEntry.getCrawlDelayMillis();
if (robotsEntry != null && robotsDelay == 0 && robotsEntry.getAgentName() != null) return 0; // no limits if granted exclusively for this peer
// return time that is remaining
//System.out.println("Latency: " + (waiting - timeSinceLastAccess));
return Math.max(0, waiting - timeSinceLastAccess);
waiting = Math.max(waiting, robotsDelay);
// the time since last access to the domain is the basis of the remaining calculation
final long timeSinceLastAccess = System.currentTimeMillis() - host.lastacc();
return Math.max(0, Math.min(60000, waiting) - timeSinceLastAccess);
}
@ -214,46 +206,51 @@ public class Latency {
// first check if the domain was _ever_ accessed before
final Host host = host(url);
if (host == null) return "host " + host + " never accessed before -> 0"; // no delay if host is new
if (host == null) return "host " + host + " never accessed before -> Long.MIN_VALUE"; // no delay if host is new
final StringBuilder s = new StringBuilder(50);
// find the minimum waiting time based on the network domain (local or global)
final boolean local = url.isLocal();
final long waiting = (local) ? minimumLocalDelta : minimumGlobalDelta;
if (local) return "local host -> minimum local: " + minimumLocalDelta;
long waiting = minimumGlobalDelta;
s.append("minimumDelta = ").append(waiting);
// the time since last access to the domain is the basis of the remaining calculation
final long timeSinceLastAccess = (host == null) ? 0 : System.currentTimeMillis() - host.lastacc();
s.append(", timeSinceLastAccess = ").append(timeSinceLastAccess);
// for CGI accesses, we double the minimum time
// mostly there is a database access in the background
// which creates a lot of unwanted IO on target site
if (url.isCGI()) s.append(", isCGI = true -> double");
if (url.isCGI()) { waiting = waiting * 2; s.append(", isCGI = true -> double"); }
// if we have accessed the domain many times, get slower (the flux factor)
if (!local && host != null) s.append(", flux = ").append(host.flux(waiting));
long flux = host.flux(waiting);
waiting += flux;
s.append(", flux = ").append(flux);
// use the access latency as rule how fast we can access the server
// this applies also to localhost, but differently, because it is not necessary to
// consider so many external accesses
s.append(", host.average = ").append(host.average());
waiting = Math.max(waiting, host.average() * 2);
// find the delay as given by robots.txt on target site
long robotsDelay = 0;
if (!local) {
RobotsTxtEntry robotsEntry;
try {
robotsEntry = robots.getEntry(url, thisAgents);
} catch (final IOException e) {
robotsEntry = null;
}
robotsDelay = (robotsEntry == null) ? 0 : robotsEntry.getCrawlDelayMillis();
if (robotsEntry != null && robotsDelay == 0 && robotsEntry.getAgentName() != null) return "no waiting for exclusive granted peer"; // no limits if granted exclusively for this peer
RobotsTxtEntry robotsEntry;
try {
robotsEntry = robots.getEntry(url, thisAgents);
} catch (final IOException e) {
robotsEntry = null;
}
s.append(", robots.delay = ").append(robotsDelay);
robotsDelay = (robotsEntry == null) ? 0 : robotsEntry.getCrawlDelayMillis();
if (robotsEntry != null && robotsDelay == 0 && robotsEntry.getAgentName() != null) return "no waiting for exclusive granted peer"; // no limits if granted exclusively for this peer
// use the access latency as rule how fast we can access the server
// this applies also to localhost, but differently, because it is not necessary to
// consider so many external accesses
if (host != null) s.append(", host.average = ").append(host.average());
waiting = Math.max(waiting, robotsDelay);
s.append(", robots.delay = ").append(robotsDelay);
// the time since last access to the domain is the basis of the remaining calculation
final long timeSinceLastAccess = System.currentTimeMillis() - host.lastacc();
s.append(", ((waitig = ").append(waiting);
s.append(") - (timeSinceLastAccess = ").append(timeSinceLastAccess).append(")) = ");
s.append(waiting - timeSinceLastAccess);
return s.toString();
}

@ -91,7 +91,7 @@ public class NoticedURL {
this.noloadStack.clear();
}
public void close() {
public synchronized void close() {
Log.logInfo("NoticedURL", "CLOSING ALL STACKS");
if (this.coreStack != null) {
this.coreStack.close();

@ -37,6 +37,7 @@ import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.services.federated.solr.SolrDoc;
import net.yacy.cora.services.federated.solr.SolrShardingConnector;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
@ -49,6 +50,7 @@ import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.table.Table;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.index.SolrConfiguration;
import de.anomic.crawler.retrieval.Request;
public class ZURL implements Iterable<ZURL.Entry> {
@ -79,15 +81,18 @@ public class ZURL implements Iterable<ZURL.Entry> {
private Index urlIndex;
private final Queue<byte[]> stack;
private final SolrConnector solrConnector;
private final SolrConfiguration solrConfiguration;
public ZURL(
final SolrConnector solrConnector,
final SolrConfiguration solrConfiguration,
final File cachePath,
final String tablename,
final boolean startWithEmptyFile,
final boolean useTailCache,
final boolean exceed134217727) {
this.solrConnector = solrConnector;
this.solrConfiguration = solrConfiguration;
// creates a new ZURL in a file
cachePath.mkdirs();
final File f = new File(cachePath, tablename);
@ -109,8 +114,10 @@ public class ZURL implements Iterable<ZURL.Entry> {
this.stack = new LinkedBlockingQueue<byte[]>();
}
public ZURL(final SolrShardingConnector solrConnector) {
public ZURL(final SolrShardingConnector solrConnector,
final SolrConfiguration solrConfiguration) {
this.solrConnector = solrConnector;
this.solrConfiguration = solrConfiguration;
// creates a new ZUR in RAM
this.urlIndex = new RowSet(rowdef);
this.stack = new LinkedBlockingQueue<byte[]>();
@ -156,7 +163,8 @@ public class ZURL implements Iterable<ZURL.Entry> {
if (this.solrConnector != null && (failCategory == FailCategory.TEMPORARY_NETWORK_FAILURE || failCategory == FailCategory.FINAL_ROBOTS_RULE)) {
// send the error to solr
try {
this.solrConnector.err(bentry.url(), failCategory.name() + " " + reason, httpcode);
SolrDoc errorDoc = this.solrConfiguration.err(bentry.url(), failCategory.name() + " " + reason, httpcode);
this.solrConnector.add(errorDoc);
} catch (final IOException e) {
Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true, false) + " to solr: " + e.getMessage());
}

@ -38,6 +38,7 @@ import net.yacy.kelondro.io.ByteCount;
import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.Latency;
@ -127,13 +128,15 @@ public final class HTTPLoader {
client.setRedirecting(false); // we want to handle redirection ourselves, so we don't index pages twice
client.setTimout(this.socketTimeout);
client.setHeader(requestHeader.entrySet());
// send request
final byte[] responseBody = client.GETbytes(url, maxFileSize);
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
if (code > 299 && code < 310) {
// redirection (content may be empty)
// send request
final byte[] responseBody = client.GETbytes(url, maxFileSize);
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
if (code > 299 && code < 310) {
// redirection (content may be empty)
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
if (header.containsKey(HeaderFramework.LOCATION)) {
// getting redirection URL
String redirectionUrlString = header.get(HeaderFramework.LOCATION);
@ -172,40 +175,45 @@ public final class HTTPLoader {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided", code);
throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
}
} else if (responseBody == null) {
// no response, reject file
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", code);
throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
} else if (code == 200 || code == 203) {
// the transfer is ok
// we write the new cache entry to file system directly
final long contentLength = responseBody.length;
ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength);
// check length again in case it was not possible to get the length before loading
if (maxFileSize > 0 && contentLength > maxFileSize) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", code);
throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
}
// create a new cache entry
final CrawlProfile profile = this.sb.crawler.getActive(request.profileHandle().getBytes());
response = new Response(
request,
requestHeader,
header,
Integer.toString(code),
profile,
responseBody
);
return response;
} else {
// if the response has not the right response type then reject file
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", code);
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
} else {
// we don't want to follow redirects
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", code);
throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
}
} else if (responseBody == null) {
// no response, reject file
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", code);
throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
} else if (code == 200 || code == 203) {
// the transfer is ok
// we write the new cache entry to file system directly
final long contentLength = responseBody.length;
ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength);
// check length again in case it was not possible to get the length before loading
if (maxFileSize > 0 && contentLength > maxFileSize) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", code);
throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
}
// create a new cache entry
final CrawlProfile profile = this.sb.crawler.getActive(request.profileHandle().getBytes());
response = new Response(
request,
requestHeader,
header,
Integer.toString(code),
profile,
responseBody
);
return response;
} else {
// if the response has not the right response type then reject file
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", code);
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
}
}
public static Response load(final Request request) throws IOException {

@ -88,7 +88,7 @@ public class BlogBoard {
return database.containsKey(UTF8.getBytes(key));
}
public void close() {
public synchronized void close() {
database.close();
}

@ -81,7 +81,7 @@ public class BlogBoardComments {
return this.database.size();
}
public void close() {
public synchronized void close() {
this.database.close();
}

@ -53,7 +53,7 @@ public class BookmarkDate {
this.datesTable = new MapHeap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
}
public void close() {
public synchronized void close() {
this.datesTable.close();
}

@ -113,7 +113,7 @@ public class BookmarksDB {
// bookmarksDB's functions for 'destructing' the class
// -----------------------------------------------------
public void close(){
public synchronized void close(){
this.bookmarks.close();
this.tags.clear();
this.dates.close();
@ -764,4 +764,4 @@ public class BookmarksDB {
public BookmarkDate.Entry getDate(final String date) {
return this.dates.getDate(date);
}
}
}

@ -67,7 +67,7 @@ public class MessageBoard {
return database.size();
}
public void close() {
public synchronized void close() {
database.close();
}

@ -78,7 +78,7 @@ public final class UserDB {
}
}
public void close() {
public synchronized void close() {
userTable.close();
}

@ -100,7 +100,7 @@ public class WikiBoard {
/**
* Closes database files.
*/
public void close() {
public synchronized void close() {
datbase.close();
bkpbase.close();
}

@ -62,7 +62,7 @@ import java.io.UnsupportedEncodingException;
*/
public class ChunkedInputStream extends InputStream {
/** The inputstream that we're wrapping */
private InputStream in;
private final InputStream in;
/** The chunk size */
private int chunkSize;
@ -87,7 +87,7 @@ public class ChunkedInputStream extends InputStream {
* @throws IOException If an IO error occurs
*/
public ChunkedInputStream(final InputStream in) throws IOException {
if (in == null) {
throw new IllegalArgumentException("InputStream parameter may not be null");
}
@ -95,37 +95,38 @@ public class ChunkedInputStream extends InputStream {
this.pos = 0;
}
/**
* <p> Returns all the data in a chunked stream in coalesced form. A chunk
* is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
* is detected.</p>
*
*
* <p> Trailer headers are read automcatically at the end of the stream and
* can be obtained with the getResponseFooters() method.</p>
*
* @return -1 of the end of the stream has been reached or the next data
* byte
* @throws IOException If an IO problem occurs
*
*
* @see HttpMethod#getResponseFooters()
*/
@Override
public int read() throws IOException {
if (closed) {
if (this.closed) {
throw new IOException("Attempted read from closed stream.");
}
if (eof) {
if (this.eof) {
return -1;
}
if (pos >= chunkSize) {
}
if (this.pos >= this.chunkSize) {
nextChunk();
if (eof) {
if (this.eof) {
return -1;
}
}
pos++;
return in.read();
this.pos++;
return this.in.read();
}
/**
@ -139,20 +140,21 @@ public class ChunkedInputStream extends InputStream {
* @see java.io.InputStream#read(byte[], int, int)
* @throws IOException if an IO problem occurs.
*/
@Override
public int read(byte[] b, int off, int len) throws IOException {
if (closed) throw new IOException("Attempted read from closed stream.");
if (eof) return -1;
if (pos >= chunkSize) {
if (this.closed) throw new IOException("Attempted read from closed stream.");
if (this.eof) return -1;
if (this.pos >= this.chunkSize) {
nextChunk();
if (eof) {
if (this.eof) {
return -1;
}
}
len = Math.min(len, chunkSize - pos);
int count = in.read(b, off, len);
pos += count;
len = Math.min(len, this.chunkSize - this.pos);
int count = this.in.read(b, off, len);
this.pos += count;
return count;
}
@ -164,6 +166,7 @@ public class ChunkedInputStream extends InputStream {
* @see java.io.InputStream#read(byte[])
* @throws IOException if an IO problem occurs.
*/
@Override
public int read(byte[] b) throws IOException {
return read(b, 0, b.length);
}
@ -173,9 +176,9 @@ public class ChunkedInputStream extends InputStream {
* @throws IOException If an IO error occurs.
*/
private void readCRLF() throws IOException {
int cr = in.read();
int cr = this.in.read();
if (cr != '\r') throw new IOException("CRLF expected at end of chunk: cr != " + cr);
int lf = in.read();
int lf = this.in.read();
if (lf != '\n') throw new IOException("CRLF expected at end of chunk: lf != " + lf);
}
@ -185,12 +188,12 @@ public class ChunkedInputStream extends InputStream {
* @throws IOException If an IO error occurs.
*/
private void nextChunk() throws IOException {
if (!bof) readCRLF();
chunkSize = getChunkSizeFromInputStream(in);
bof = false;
pos = 0;
if (chunkSize == 0) {
eof = true;
if (!this.bof) readCRLF();
this.chunkSize = getChunkSizeFromInputStream(this.in);
this.bof = false;
this.pos = 0;
if (this.chunkSize == 0) {
this.eof = true;
skipTrailerHeaders();
}
}
@ -203,24 +206,24 @@ public class ChunkedInputStream extends InputStream {
* @param in The new input stream.
* @param required <tt>true<tt/> if a valid chunk must be present,
* <tt>false<tt/> otherwise.
*
*
* @return the chunk size as integer
*
*
* @throws IOException when the chunk size could not be parsed
*/
private static int getChunkSizeFromInputStream(final InputStream in)
private static int getChunkSizeFromInputStream(final InputStream in)
throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end
int state = 0;
int state = 0;
while (state != -1) {
int b = in.read();
if (b == -1) {
if (b == -1) {
throw new IOException("chunked stream ended unexpectedly");
}
switch (state) {
case 0:
case 0:
switch (b) {
case '\r':
state = 1;
@ -286,7 +289,7 @@ public class ChunkedInputStream extends InputStream {
*
* @param data the byte array to be encoded
* @return The string representation of the byte array
*
*
* @since 3.0
*/
private static String getAsciiString(final byte[] data) throws IOException {
@ -300,14 +303,14 @@ public class ChunkedInputStream extends InputStream {
throw new IOException("HttpClient requires ASCII support");
}
}
/**
* Reads and stores the Trailer headers.
* @throws IOException If an IO problem occurs
*/
private void skipTrailerHeaders() throws IOException {
for (; ;) {
String line = readLine(in, "US-ASCII");
String line = readLine(this.in, "US-ASCII");
if ((line == null) || (line.trim().length() < 1)) break;
}
}
@ -324,7 +327,7 @@ public class ChunkedInputStream extends InputStream {
*
* @throws IOException if an I/O problem occurs
* @return a line from the stream
*
*
* @since 3.0
*/
private static String readLine(InputStream inputStream, String charset) throws IOException {
@ -348,7 +351,7 @@ public class ChunkedInputStream extends InputStream {
final String result = getString(rawdata, 0, len - offset, charset);
return result;
}
/**
* Converts the byte array of HTTP content characters to a string. If
@ -357,16 +360,16 @@ public class ChunkedInputStream extends InputStream {
*
* @param data the byte array to be encoded
* @param offset the index of the first byte to encode
* @param length the number of bytes to encode
* @param length the number of bytes to encode
* @param charset the desired character encoding
* @return The result of the conversion.
*
*
* @since 3.0
*/
private static String getString(
final byte[] data,
int offset,
int length,
final byte[] data,
int offset,
int length,
String charset
) {
@ -384,12 +387,12 @@ public class ChunkedInputStream extends InputStream {
return new String(data, offset, length);
}
}
/**
* Return byte array from an (unchunked) input stream.
* Stop reading when <tt>"\n"</tt> terminator encountered
* Stop reading when <tt>"\n"</tt> terminator encountered
* If the stream ends before the line terminator is found,
* the last part of the string will still be returned.
* the last part of the string will still be returned.
* If no input data available, <code>null</code> is returned.
*
* @param inputStream the stream to read from
@ -412,22 +415,23 @@ public class ChunkedInputStream extends InputStream {
}
return buf.toByteArray();
}
/**
* Upon close, this reads the remainder of the chunked message,
* leaving the underlying socket at a position to start reading the
* next response without scanning.
* @throws IOException If an IO problem occurs.
*/
public void close() throws IOException {
if (!closed) {
@Override
public synchronized void close() throws IOException {
if (!this.closed) {
try {
if (!eof) {
if (!this.eof) {
exhaustInputStream(this);
}
} finally {
eof = true;
closed = true;
this.eof = true;
this.closed = true;
}
}
}

@ -1,4 +1,4 @@
//httpChunkedOutputStream.java
//httpChunkedOutputStream.java
//-------------------------------------
//part of YACY
//(C) by Michael Peter Christen; mc@yacy.net
@ -33,21 +33,21 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.util.ByteBuffer;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.server.serverCore;
public final class ChunkedOutputStream extends FilterOutputStream {
private boolean finished = false;
private boolean finished = false;
public ChunkedOutputStream(final OutputStream out) {
super(out);
}
public void close() throws IOException {
@Override
public synchronized void close() throws IOException {
if (!this.finished) this.finish();
this.out.close();
}
public void finish() throws IOException {
if (!this.finished) {
this.out.write((byte) 48);
@ -57,55 +57,58 @@ public final class ChunkedOutputStream extends FilterOutputStream {
this.finished = true;
}
}
@Override
public void write(final byte[] b) throws IOException {
if (this.finished) throw new IOException("ChunkedOutputStream already finalized.");
if (this.finished) throw new IOException("ChunkedOutputStream already finalized.");
if (b.length == 0) return;
this.out.write(ASCII.getBytes(Integer.toHexString(b.length)));
this.out.write(serverCore.CRLF);
this.out.write(b);
this.out.write(serverCore.CRLF);
this.out.flush();
}
@Override
public void write(final byte[] b, final int off, final int len) throws IOException {
if (this.finished) throw new IOException("ChunkedOutputStream already finalized.");
if (len == 0) return;
this.out.write(ASCII.getBytes(Integer.toHexString(len)));
this.out.write(serverCore.CRLF);
this.out.write(b, off, len);
this.out.write(serverCore.CRLF);
this.out.flush();
}
public void write(final ByteBuffer b, final int off, final int len) throws IOException {
if (this.finished) throw new IOException("ChunkedOutputStream already finalized.");
if (len == 0) return;
this.out.write(ASCII.getBytes(Integer.toHexString(len)));
this.out.write(serverCore.CRLF);
this.out.write(b.getBytes(off, len));
this.out.write(serverCore.CRLF);
this.out.flush();
}
public void write(final InputStream b) throws IOException {
if (this.finished) throw new IOException("ChunkedOutputStream already finalized.");
final int len = b.available();
if (len == 0) return;
this.out.write(ASCII.getBytes(Integer.toHexString(len)));
this.out.write(serverCore.CRLF);
FileUtils.copy(b, out, len);
FileUtils.copy(b, this.out, len);
this.out.write(serverCore.CRLF);
this.out.flush();
}
@Override
public void write(final int b) throws IOException {
if (this.finished) throw new IOException("ChunkedOutputStream already finalized.");
this.out.write(UTF8.getBytes("1"));
this.out.write(serverCore.CRLF);
this.out.write(b);

@ -62,12 +62,12 @@ import java.io.InputStream;
* @since 2.0
*/
public class ContentLengthInputStream extends InputStream {
/**
* The maximum number of bytes that can be read from the stream. Subsequent
* read operations will return -1.
*/
private long contentLength;
private final long contentLength;
/** The current position */
private long pos = 0;
@ -86,7 +86,7 @@ public class ContentLengthInputStream extends InputStream {
* @param in The stream to wrap
* @param contentLength The maximum number of bytes that can be read from
* the stream. Subsequent read operations will return -1.
*
*
* @since 3.0
*/
public ContentLengthInputStream(InputStream in, long contentLength) {
@ -102,14 +102,15 @@ public class ContentLengthInputStream extends InputStream {
* primed to parse the next response.</p>
* @throws IOException If an IO problem occurs.
*/
public void close() throws IOException {
if (!closed) {
@Override
public synchronized void close() throws IOException {
if (!this.closed) {
try {
ChunkedInputStream.exhaustInputStream(this);
} finally {
// close after above so that we don't throw an exception trying
// to read after closed!
closed = true;
this.closed = true;
}
}
}
@ -121,15 +122,16 @@ public class ContentLengthInputStream extends InputStream {
* @throws IOException If an IO problem occurs
* @see java.io.InputStream#read()
*/
@Override
public int read() throws IOException {
if (closed) {
if (this.closed) {
throw new IOException("Attempted read from closed stream.");
}
if (pos >= contentLength) {
if (this.pos >= this.contentLength) {
return -1;
}
pos++;
this.pos++;
return this.wrappedStream.read();
}
@ -145,20 +147,21 @@ public class ContentLengthInputStream extends InputStream {
*
* @throws java.io.IOException Should an error occur on the wrapped stream.
*/
@Override
public int read (byte[] b, int off, int len) throws java.io.IOException {
if (closed) {
if (this.closed) {
throw new IOException("Attempted read from closed stream.");
}
if (pos >= contentLength) {
if (this.pos >= this.contentLength) {
return -1;
}
if (pos + len > contentLength) {
len = (int) (contentLength - pos);
if (this.pos + len > this.contentLength) {
len = (int) (this.contentLength - this.pos);
}
int count = this.wrappedStream.read(b, off, len);
pos += count;
this.pos += count;
return count;
}
@ -170,6 +173,7 @@ public class ContentLengthInputStream extends InputStream {
* @throws IOException If an IO problem occurs
* @see java.io.InputStream#read(byte[])
*/
@Override
public int read(byte[] b) throws IOException {
return read(b, 0, b.length);
}
@ -182,20 +186,22 @@ public class ContentLengthInputStream extends InputStream {
* @throws IOException If an error occurs while skipping bytes.
* @see InputStream#skip(long)
*/
@Override
public long skip(long n) throws IOException {
// make sure we don't skip more bytes than are
// make sure we don't skip more bytes than are
// still available
long length = Math.min(n, contentLength - pos);
long length = Math.min(n, this.contentLength - this.pos);
// skip and keep track of the bytes actually skipped
length = this.wrappedStream.skip(length);
// only add the skipped bytes to the current position
// if bytes were actually skipped
if (length > 0) {
pos += length;
this.pos += length;
}
return length;
}
@Override
public int available() throws IOException {
if (this.closed) {
return 0;
@ -204,7 +210,7 @@ public class ContentLengthInputStream extends InputStream {
if (this.pos + avail > this.contentLength ) {
avail = (int)(this.contentLength - this.pos);
}
return avail;
return avail;
}
}

@ -560,7 +560,7 @@ public final class serverCore extends AbstractBusyThread implements BusyThread {
return this.stopped;
}
public void close() {
public synchronized void close() {
// closing the socket to the client
if (this.controlSocket != null) try {
this.controlSocket.close();

@ -117,7 +117,7 @@ public class TripleStore {
return this.store.keyIterator();
}
public void close() {
public synchronized void close() {
this.store.close();
}

@ -24,8 +24,10 @@
package net.yacy.cora.order;
import java.io.Serializable;
public interface ByteOrder extends Order<byte[]> {
public interface ByteOrder extends Order<byte[]>, Serializable {
@Override
public boolean wellformed(byte[] a);

@ -50,6 +50,7 @@ import net.yacy.cora.plugin.ClassProvider;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.cora.storage.KeyList;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.MemoryControl;
public class Domains {
@ -468,8 +469,8 @@ public class Domains {
noLocalCheck = v;
}
public static void close() {
if (globalHosts != null) try {globalHosts.close();} catch (final IOException e) {}
public static synchronized void close() {
if (globalHosts != null) try {globalHosts.close();} catch (final IOException e) {Log.logException(e);}
}
/**

@ -28,8 +28,6 @@ import java.io.IOException;
import java.util.Collection;
import java.util.List;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -74,16 +72,6 @@ public interface SolrConnector {
public void add(final SolrDoc solrdoc) throws IOException, SolrException;
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException;
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException;
/**
* get a query result from solr
* to get all results set the query String to "*:*"

@ -1,620 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This file was part of the solrj package and used the apache http client 3.1
* It was modified and adopted to work with the apache http client 4.1
* using the net.yacy.cora connection package of YaCy
* Code modifications (C) under Apache License 2.0 by Michael Christen, 14.4.2011
*/
package net.yacy.cora.services.federated.solr;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.HTTPClient;
import org.apache.http.entity.mime.content.ContentBody;
import org.apache.http.entity.mime.content.InputStreamBody;
import org.apache.http.entity.mime.content.StringBody;
import org.apache.solr.client.solrj.ResponseParser;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
import org.apache.solr.client.solrj.request.RequestWriter;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
/**
* The {@link SolrHTTPClient} uses the Apache Commons HTTP Client to connect to solr.
* <pre class="prettyprint" >SolrServer server = new CommonsHttpSolrServer( url );</pre>
*
* @version $Id: CommonsHttpSolrServer.java 1067552 2011-02-05 23:52:42Z koji $
* @since solr 1.3
*/
public class SolrHTTPClient extends SolrServer {
private static final long serialVersionUID = -4532572298724852268L;
/**
* User-Agent String as identified by the HTTP request by the {@link
* org.apache.commons.httpclient.HttpClient HttpClient} to the Solr
* server from the client.
*/
public static final String AGENT = "Solr["+SolrHTTPClient.class.getName()+"] 1.0";
public final static Charset utf8;
static {
utf8 = Charset.forName("UTF-8");
}
/**
* The URL of the Solr server.
*/
protected String _baseURL, host, solraccount, solrpw;
protected int port;
/**
* Default value: null / empty. <p/>
* Parameters that are added to every request regardless. This may be a place to add
* something like an authentication token.
*/
protected ModifiableSolrParams _invariantParams;
/**
* Default response parser is BinaryResponseParser <p/>
* This parser represents the default Response Parser chosen to
* parse the response if the parser were not specified as part of
* the request.
* @see org.apache.solr.client.solrj.impl.BinaryResponseParser
*/
protected ResponseParser _parser;
/**
* The RequestWriter used to write all requests to Solr
* @see org.apache.solr.client.solrj.request.RequestWriter
*/
protected RequestWriter requestWriter = new RequestWriter();
/**
* @param solrServerUrl The URL of the Solr server. For
* example, "<code>http://localhost:8983/solr/</code>"
* if you are using the standard distribution Solr webapp
* on your local machine.
*/
public SolrHTTPClient(final String solrServerUrl) throws MalformedURLException {
this(new URL(solrServerUrl));
}
/**
* @param baseURL The URL of the Solr server. For example,
* "<code>http://localhost:8983/solr/</code>" if you are using the
* standard distribution Solr webapp on your local machine.
*/
public SolrHTTPClient(final URL baseURL)
{
this(baseURL, new BinaryResponseParser());
}
/**
* @see #useMultiPartPost
* @see #_parser
*/
public SolrHTTPClient(final URL baseURL, final ResponseParser parser) {
this._baseURL = baseURL.toExternalForm();
if( this._baseURL.endsWith( "/" ) ) {
this._baseURL = this._baseURL.substring( 0, this._baseURL.length()-1 );
}
if( this._baseURL.indexOf( '?' ) >=0 ) {
throw new RuntimeException( "Invalid base url for solrj. The base URL must not contain parameters: "+this._baseURL );
}
MultiProtocolURI u;
try {
u = new MultiProtocolURI(this._baseURL.toString());
this.host = u.getHost();
this.port = u.getPort();
final String userinfo = u.getUserInfo();
if (userinfo == null || userinfo.length() == 0) {
this.solraccount = ""; this.solrpw = "";
} else {
final int p = userinfo.indexOf(':');
if (p < 0) {
this.solraccount = userinfo; this.solrpw = "";
} else {
this.solraccount = userinfo.substring(0, p); this.solrpw = userinfo.substring(p + 1);
}
}
} catch (final MalformedURLException e) {
this.solraccount = ""; this.solrpw = "";
this.host = ""; this.port = -1;
}
this._parser = parser;
}
//------------------------------------------------------------------------
//------------------------------------------------------------------------
/**
* Process the request. If {@link org.apache.solr.client.solrj.SolrRequest#getResponseParser()} is null, then use
* {@link #getParser()}
* @param request The {@link org.apache.solr.client.solrj.SolrRequest} to process
* @return The {@link org.apache.solr.common.util.NamedList} result
* @throws SolrServerException
* @throws IOException
*
* @see #request(org.apache.solr.client.solrj.SolrRequest, org.apache.solr.client.solrj.ResponseParser)
*/
@Override
public NamedList<Object> request( final SolrRequest request ) throws SolrServerException, IOException
{
ResponseParser responseParser = request.getResponseParser();
if (responseParser == null) {
responseParser = this._parser;
}
return request(request, responseParser);
}
public NamedList<Object> request(final SolrRequest request, final ResponseParser processor) throws SolrServerException, IOException {
SolrParams params = request.getParams();
final Collection<ContentStream> streams = this.requestWriter.getContentStreams(request);
String path = this.requestWriter.getPath(request);
if( path == null || !path.startsWith( "/" ) ) {
path = "/select";
}
// The parser 'wt=' and 'version=' params are used instead of the original params
ResponseParser parser = request.getResponseParser();
if( parser == null ) {
parser = this._parser;
}
final ModifiableSolrParams wparams = new ModifiableSolrParams();
wparams.set( CommonParams.WT, parser.getWriterType() );
wparams.set( CommonParams.VERSION, parser.getVersion());
if( params == null ) {
params = wparams;
}
else {
params = SolrParams.wrapDefaults(wparams, params);
}
if( this._invariantParams != null ) {
params = SolrParams.wrapDefaults( this._invariantParams, params );
}
byte[] result = null;
final HTTPClient client = new HTTPClient();
if (this.solraccount.length() > 0 && this.solrpw.length() > 0 && this.host.length() > 0) {
HTTPClient.setAuth(this.host, this.port, this.solraccount, this.solrpw);
}
if (SolrRequest.METHOD.POST == request.getMethod()) {
final boolean isMultipart = ( streams != null && streams.size() > 1 );
if (streams == null || isMultipart) {
String url = this._baseURL + path;
final HashMap<String, ContentBody> parts = new HashMap<String, ContentBody>();
final Iterator<String> iter = params.getParameterNamesIterator();
while (iter.hasNext()) {
final String p = iter.next();
final String[] vals = params.getParams(p);
if (vals != null) {
for (final String v : vals) {
if (isMultipart) {
parts.put(p, new StringBody(v, utf8));
} else {
if (url.indexOf('?') >= 0) url += "&" + p + "=" + v; else url += "?" + p + "=" + v;
}
}
}
}
if (isMultipart) {
for (final ContentStream content : streams) {
parts.put(content.getName(), new InputStreamBody(content.getStream(), content.getContentType(), null));
}
}
try {
result = client.POSTbytes(url, parts, true);
} finally {
client.finish();
}
} else {
// It has one stream, this is the post body, put the params in the URL
final String pstr = ClientUtils.toQueryString(params, false);
final String url = this._baseURL + path + pstr;
// Single stream as body
// Using a loop just to get the first one
final ContentStream[] contentStream = new ContentStream[1];
for (final ContentStream content : streams) {
contentStream[0] = content;
break;
}
result = client.POSTbytes(url, contentStream[0].getStream(), contentStream[0].getStream().available());
}
} else if (SolrRequest.METHOD.GET == request.getMethod()) {
result = client.GETbytes( this._baseURL + path + ClientUtils.toQueryString( params, false ));
} else {
throw new SolrServerException("Unsupported method: "+request.getMethod() );
}
final int statusCode = client.getStatusCode();
if (statusCode != 200) {
throw new IOException("bad status code: " + statusCode + ", " + client.getHttpResponse().getStatusLine() + ", url = " + this._baseURL + path);
}
// Read the contents
//System.out.println("SOLR RESPONSE: " + UTF8.String(result));
final InputStream respBody = new ByteArrayInputStream(result);
return processor.processResponse(respBody, "UTF-8");
}
/*
* The original code for the request method
public NamedList<Object> request(final SolrRequest request, ResponseParser processor) throws SolrServerException, IOException {
HttpMethod method = null;
InputStream is = null;
SolrParams params = request.getParams();
Collection<ContentStream> streams = requestWriter.getContentStreams(request);
String path = requestWriter.getPath(request);
if( path == null || !path.startsWith( "/" ) ) {
path = "/select";
}
ResponseParser parser = request.getResponseParser();
if( parser == null ) {
parser = _parser;
}
// The parser 'wt=' and 'version=' params are used instead of the original params
ModifiableSolrParams wparams = new ModifiableSolrParams();
wparams.set( CommonParams.WT, parser.getWriterType() );
wparams.set( CommonParams.VERSION, parser.getVersion());
if( params == null ) {
params = wparams;
}
else {
params = new DefaultSolrParams( wparams, params );
}
if( _invariantParams != null ) {
params = new DefaultSolrParams( _invariantParams, params );
}
int tries = _maxRetries + 1;
try {
while( tries-- > 0 ) {
// Note: since we aren't do intermittent time keeping
// ourselves, the potential non-timeout latency could be as
// much as tries-times (plus scheduling effects) the given
// timeAllowed.
try {
if( SolrRequest.METHOD.GET == request.getMethod() ) {
if( streams != null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "GET can't send streams!" );
}
method = new GetMethod( _baseURL + path + ClientUtils.toQueryString( params, false ) );
}
else if( SolrRequest.METHOD.POST == request.getMethod() ) {
String url = _baseURL + path;
boolean isMultipart = ( streams != null && streams.size() > 1 );
if (streams == null || isMultipart) {
PostMethod post = new PostMethod(url);
post.getParams().setContentCharset("UTF-8");
if (!this.useMultiPartPost && !isMultipart) {
post.addRequestHeader("Content-Type",
"application/x-www-form-urlencoded; charset=UTF-8");
}
List<Part> parts = new LinkedList<Part>();
Iterator<String> iter = params.getParameterNamesIterator();
while (iter.hasNext()) {
String p = iter.next();
String[] vals = params.getParams(p);
if (vals != null) {
for (String v : vals) {
if (this.useMultiPartPost || isMultipart) {
parts.add(new StringPart(p, v, "UTF-8"));
} else {
post.addParameter(p, v);
}
}
}
}
if (isMultipart) {
int i = 0;
for (ContentStream content : streams) {
final ContentStream c = content;
String charSet = null;
String transferEncoding = null;
parts.add(new PartBase(c.getName(), c.getContentType(),
charSet, transferEncoding) {
@Override
protected long lengthOfData() throws IOException {
return c.getSize();
}
@Override
protected void sendData(OutputStream out)
throws IOException {
InputStream in = c.getStream();
try {
IOUtils.copy(in, out);
} finally {
in.close();
}
}
});
}
}
if (parts.size() > 0) {
post.setRequestEntity(new MultipartRequestEntity(parts
.toArray(new Part[parts.size()]), post.getParams()));
}
method = post;
}
// It is has one stream, it is the post body, put the params in the URL
else {
String pstr = ClientUtils.toQueryString(params, false);
PostMethod post = new PostMethod(url + pstr);
// Single stream as body
// Using a loop just to get the first one
final ContentStream[] contentStream = new ContentStream[1];
for (ContentStream content : streams) {
contentStream[0] = content;
break;
}
if (contentStream[0] instanceof RequestWriter.LazyContentStream) {
post.setRequestEntity(new RequestEntity() {
public long getContentLength() {
return -1;
}
public String getContentType() {
return contentStream[0].getContentType();
}
public boolean isRepeatable() {
return false;
}
public void writeRequest(OutputStream outputStream) throws IOException {
((RequestWriter.LazyContentStream) contentStream[0]).writeTo(outputStream);
}
}
);
} else {
is = contentStream[0].getStream();
post.setRequestEntity(new InputStreamRequestEntity(is, contentStream[0].getContentType()));
}
method = post;
}
}
else {
throw new SolrServerException("Unsupported method: "+request.getMethod() );
}
}
catch( NoHttpResponseException r ) {
// This is generally safe to retry on
method.releaseConnection();
method = null;
if(is != null) {
is.close();
}
// If out of tries then just rethrow (as normal error).
if( ( tries < 1 ) ) {
throw r;
}
//log.warn( "Caught: " + r + ". Retrying..." );
}
}
}
catch( IOException ex ) {
throw new SolrServerException("error reading streams", ex );
}
method.setFollowRedirects( _followRedirects );
method.addRequestHeader( "User-Agent", AGENT );
if( _allowCompression ) {
method.setRequestHeader( new Header( "Accept-Encoding", "gzip,deflate" ) );
}
try {
// Execute the method.
//System.out.println( "EXECUTE:"+method.getURI() );
int statusCode = _httpClient.executeMethod(method);
if (statusCode != HttpStatus.SC_OK) {
StringBuilder msg = new StringBuilder();
msg.append( method.getStatusLine().getReasonPhrase() );
msg.append( "\n\n" );
msg.append( method.getStatusText() );
msg.append( "\n\n" );
msg.append( "request: "+method.getURI() );
throw new SolrException(statusCode, java.net.URLDecoder.decode(msg.toString(), "UTF-8") );
}
// Read the contents
String charset = "UTF-8";
if( method instanceof HttpMethodBase ) {
charset = ((HttpMethodBase)method).getResponseCharSet();
}
InputStream respBody = method.getResponseBodyAsStream();
// Jakarta Commons HTTPClient doesn't handle any
// compression natively. Handle gzip or deflate
// here if applicable.
if( _allowCompression ) {
Header contentEncodingHeader = method.getResponseHeader( "Content-Encoding" );
if( contentEncodingHeader != null ) {
String contentEncoding = contentEncodingHeader.getValue();
if( contentEncoding.contains( "gzip" ) ) {
//log.debug( "wrapping response in GZIPInputStream" );
respBody = new GZIPInputStream( respBody );
}
else if( contentEncoding.contains( "deflate" ) ) {
//log.debug( "wrapping response in InflaterInputStream" );
respBody = new InflaterInputStream(respBody);
}
}
else {
Header contentTypeHeader = method.getResponseHeader( "Content-Type" );
if( contentTypeHeader != null ) {
String contentType = contentTypeHeader.getValue();
if( contentType != null ) {
if( contentType.startsWith( "application/x-gzip-compressed" ) ) {
//log.debug( "wrapping response in GZIPInputStream" );
respBody = new GZIPInputStream( respBody );
}
else if ( contentType.startsWith("application/x-deflate") ) {
//log.debug( "wrapping response in InflaterInputStream" );
respBody = new InflaterInputStream(respBody);
}
}
}
}
}
return processor.processResponse(respBody, charset);
}
catch (HttpException e) {
throw new SolrServerException( e );
}
catch (IOException e) {
throw new SolrServerException( e );
}
finally {
method.releaseConnection();
if(is != null) {
is.close();
}
}
}
*/
//-------------------------------------------------------------------
//-------------------------------------------------------------------
/**
* Retrieve the default list of parameters are added to every request regardless.
*
* @see #_invariantParams
*/
public ModifiableSolrParams getInvariantParams()
{
return this._invariantParams;
}
public String getBaseURL() {
return this._baseURL;
}
public void setBaseURL(final String baseURL) {
this._baseURL = baseURL;
}
public ResponseParser getParser() {
return this._parser;
}
/**
* Note: This setter method is <b>not thread-safe</b>.
* @param processor Default Response Parser chosen to parse the response if the parser were not specified as part of the request.
* @see org.apache.solr.client.solrj.SolrRequest#getResponseParser()
*/
public void setParser(final ResponseParser processor) {
this._parser = processor;
}
public void setRequestWriter(final RequestWriter requestWriter) {
this.requestWriter = requestWriter;
}
/**
* Adds the documents supplied by the given iterator.
*
* @param docIterator the iterator which returns SolrInputDocument instances
*
* @return the response from the SolrServer
*/
public UpdateResponse add(final Iterator<SolrInputDocument> docIterator)
throws SolrServerException, IOException {
final UpdateRequest req = new UpdateRequest();
req.setDocIterator(docIterator);
return req.process(this);
}
/**
* Adds the beans supplied by the given iterator.
*
* @param beanIterator the iterator which returns Beans
*
* @return the response from the SolrServer
*/
public UpdateResponse addBeans(final Iterator<?> beanIterator)
throws SolrServerException, IOException {
final UpdateRequest req = new UpdateRequest();
req.setDocIterator(new Iterator<SolrInputDocument>() {
@Override
public boolean hasNext() {
return beanIterator.hasNext();
}
@Override
public SolrInputDocument next() {
final Object o = beanIterator.next();
if (o == null) return null;
return getBinder().toSolrInputDocument(o);
}
@Override
public void remove() {
beanIterator.remove();
}
});
return req.process(this);
}
}

@ -5,8 +5,6 @@ import java.util.Collection;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -105,11 +103,6 @@ public class SolrMultipleConnector implements SolrConnector {
}
}
@Override
public void err(DigestURI digestURI, String failReason, int httpstatus) throws IOException {
this.solr.err(digestURI, failReason, httpstatus);
}
@Override
public SolrDocumentList get(String querystring, int offset, int count) throws IOException {
return this.solr.get(querystring, offset, count);

@ -28,8 +28,6 @@ import java.io.IOException;
import java.util.Collection;
import java.util.List;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
@ -44,7 +42,7 @@ public class SolrRetryConnector implements SolrConnector {
}
@Override
public void close() {
public synchronized void close() {
this.solrConnector.close();
}
@ -128,21 +126,6 @@ public class SolrRetryConnector implements SolrConnector {
for (SolrDoc d: solrdocs) add(d);
}
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
this.solrConnector.err(digestURI, failReason, httpstatus);
return;
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
continue;
}
if (ee != null) throw (ee instanceof IOException) ? (IOException) ee : new IOException(ee.getMessage());
}
@Override
public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;

@ -31,7 +31,6 @@ import java.util.Collection;
import java.util.List;
import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.data.meta.DigestURI;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
@ -57,7 +56,7 @@ public class SolrShardingConnector implements SolrConnector {
}
@Override
public void close() {
public synchronized void close() {
for (final SolrConnector connector: this.connectors) connector.close();
}
@ -128,19 +127,6 @@ public class SolrShardingConnector implements SolrConnector {
for (final SolrDoc doc: docs) add(doc);
}
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
this.connectors.get(this.sharding.selectURL(digestURI.toNormalform(true, false))).err(digestURI, failReason, httpstatus);
}
/**
* get a query result from solr
* to get all results set the query String to "*:*"

@ -31,10 +31,8 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import org.apache.http.HttpHost;
@ -115,7 +113,7 @@ public class SolrSingleConnector implements SolrConnector {
}
@Override
public void close() {
public synchronized void close() {
try {
this.server.commit();
} catch (SolrServerException e) {
@ -205,6 +203,7 @@ public class SolrSingleConnector implements SolrConnector {
}
}
@Override
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException {
ArrayList<SolrInputDocument> l = new ArrayList<SolrInputDocument>();
for (SolrDoc d: solrdocs) l.add(d);
@ -217,27 +216,6 @@ public class SolrSingleConnector implements SolrConnector {
}
}
@Override
public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final SolrDoc solrdoc = new SolrDoc();
solrdoc.addField("id", ASCII.String(digestURI.hash()));
solrdoc.addField("sku", digestURI.toNormalform(true, false), 3.0f);
final InetAddress address = digestURI.getInetAddress();
if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());
// path elements of link
final String path = digestURI.getPath();
if (path != null) {
final String[] paths = path.split("/");
if (paths.length > 0) solrdoc.addField("attr_paths", paths);
}
solrdoc.addField("failreason_t", failReason);
solrdoc.addField("httpstatus_i", httpstatus);
add(solrdoc);
}
/**
* get a query result from solr
* to get all results set the query String to "*:*"

@ -91,7 +91,7 @@ public class KeyList implements Iterable<String> {
}
}
public void close() throws IOException {
public synchronized void close() throws IOException {
synchronized (this.raf) {
this.raf.close();
}

@ -0,0 +1,91 @@
package net.yacy.cora.storage;
import java.io.File;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
public class ZIPReader extends AbstractMap<String, ZipEntry> implements Map<String, ZipEntry>, Iterable<Map.Entry<String, ZipEntry>> {
private final Set<String> filenames;
private final ZipFile zipFile;
public ZIPReader(File file) throws IOException {
super();
if (!file.exists()) throw new IOException("ZIPWriter can only be used for existing files");
this.zipFile = new ZipFile(file);
// read all entries
this.filenames = new HashSet<String>();
final Enumeration<? extends ZipEntry> e = this.zipFile.entries();
while (e.hasMoreElements()) {
ZipEntry z = e.nextElement();
this.filenames.add(z.getName());
}
}
@Override
public Iterator<java.util.Map.Entry<String, ZipEntry>> iterator() {
final Enumeration<? extends ZipEntry> e = this.zipFile.entries();
return new Iterator<java.util.Map.Entry<String, ZipEntry>>() {
@Override
public boolean hasNext() {
return e.hasMoreElements();
}
@Override
public java.util.Map.Entry<String, ZipEntry> next() {
ZipEntry z = e.nextElement();
return new AbstractMap.SimpleImmutableEntry<String, ZipEntry>(z.getName(), z);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@Override
public int size() {
return this.zipFile.size();
}
@Override
public boolean isEmpty() {
return this.zipFile.size() == 0;
}
@Override
public boolean containsKey(Object key) {
return this.filenames.contains(key);
}
@Override
public ZipEntry get(Object key) {
return this.zipFile.getEntry((String) key);
}
@Override
public Set<String> keySet() {
return this.filenames;
}
@Override
public Set<java.util.Map.Entry<String, ZipEntry>> entrySet() {
throw new UnsupportedOperationException();
}
public void close() throws IOException {
this.zipFile.close();
}
}

@ -0,0 +1,62 @@
package net.yacy.cora.storage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
public class ZIPWriter extends AbstractMap<String, ZipEntry> implements Map<String, ZipEntry>, Iterable<Map.Entry<String, ZipEntry>> {
private final HashMap<String, ZipEntry> backup;
private final ZipOutputStream zos;
public ZIPWriter(File file) throws IOException {
super();
if (file.exists()) throw new IOException("ZIPWriter can only be used for new files");
this.backup = new HashMap<String, ZipEntry>();
this.zos = new ZipOutputStream(new FileOutputStream(file));
}
@Override
public ZipEntry put(String key, ZipEntry value) {
assert !this.backup.containsKey(key);
try {
this.zos.putNextEntry(value);
this.backup.put(key, value);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
@Override
public ZipEntry get(Object key) {
return this.backup.get(key);
}
@Override
public Iterator<java.util.Map.Entry<String, ZipEntry>> iterator() {
return this.backup.entrySet().iterator();
}
@Override
public void clear() {
throw new UnsupportedOperationException();
}
@Override
public Set<java.util.Map.Entry<String, ZipEntry>> entrySet() {
return this.backup.entrySet();
}
public void close() throws IOException {
this.zos.close();
}
}

@ -702,7 +702,7 @@ dc_rights
}
}
public void close() {
public synchronized void close() {
if (this.text == null) return;
// try close the output stream

@ -142,7 +142,7 @@ public class SentenceReader implements Iterator<StringBuilder> {
throw new UnsupportedOperationException();
}
public void close() {
public synchronized void close() {
try {
raf.close();
} catch(IOException ioe) {

@ -82,7 +82,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
return r;
}
public void close() {
public synchronized void close() {
this.e.close();
}
@ -153,7 +153,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
return r;
}
public void close() {
public synchronized void close() {
this.e.close();
}
}

@ -85,7 +85,7 @@ public class DatabaseConnection {
}
}
public void close() {
public synchronized void close() {
if (connection != null) {
try {
connection.close();

@ -100,7 +100,7 @@ public class ImportDump {
close();
}
public void close() {
public synchronized void close() {
this.conn.close();
}

@ -7,7 +7,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -65,17 +65,19 @@ public class PhpBB3Dao implements Dao {
this.prefix = prefix;
this.users = new HashMap<Integer, String>();
}
@Override
protected void finalize() throws Throwable {
close();
}
@Override
public Date first() {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
rs = stmt.executeQuery("select min(post_time) from " + prefix + "posts");
stmt = this.conn.statement();
rs = stmt.executeQuery("select min(post_time) from " + this.prefix + "posts");
if (rs.next()) {
return new Date(rs.getLong(1) * 1000L);
}
@ -89,12 +91,13 @@ public class PhpBB3Dao implements Dao {
}
}
@Override
public Date latest() {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
rs = stmt.executeQuery("select max(post_time) from " + prefix + "posts");
stmt = this.conn.statement();
rs = stmt.executeQuery("select max(post_time) from " + this.prefix + "posts");
if (rs.next()) {
return new Date(rs.getLong(1) * 1000L);
}
@ -108,18 +111,21 @@ public class PhpBB3Dao implements Dao {
}
}
@Override
public int size() throws SQLException {
return this.conn.count(prefix + "posts");
return this.conn.count(this.prefix + "posts");
}
@Override
public DCEntry get(int item) {
return getOne("select * from " + prefix + "posts where post_id = " + item);
return getOne("select * from " + this.prefix + "posts where post_id = " + item);
}
@Override
public BlockingQueue<DCEntry> query(int from, int until, int queueSize) {
// define the sql query
final StringBuilder sql = new StringBuilder(256);
sql.append("select * from " + prefix + "posts where post_id >= ");
sql.append("select * from " + this.prefix + "posts where post_id >= ");
sql.append(from);
if (until > from) {
sql.append(" and post_id < ");
@ -130,24 +136,25 @@ public class PhpBB3Dao implements Dao {
// execute the query and push entries to a queue concurrently
return toQueue(sql, queueSize);
}
@Override
public BlockingQueue<DCEntry> query(Date from, int queueSize) {
// define the sql query
final StringBuilder sql = new StringBuilder(256);
sql.append("select * from " + prefix + "posts where post_time >= ");
sql.append("select * from " + this.prefix + "posts where post_time >= ");
sql.append(from.getTime() / 1000);
sql.append(" order by post_id");
// execute the query and push entries to a queue concurrently
return toQueue(sql, queueSize);
}
private DCEntry getOne(String sql) {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
stmt = this.conn.statement();
rs = stmt.executeQuery(sql);
if (rs.next()) {
try {
@ -165,16 +172,17 @@ public class PhpBB3Dao implements Dao {
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
private BlockingQueue<DCEntry> toQueue(final StringBuilder sql, int queueSize) {
// execute the query and push entries to a queue concurrently
final BlockingQueue<DCEntry> queue = new ArrayBlockingQueue<DCEntry>(queueSize);
Thread dbreader = new Thread() {
@Override
public void run() {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
stmt = PhpBB3Dao.this.conn.statement();
rs = stmt.executeQuery(sql.toString());
while (rs.next()) {
try {
@ -197,7 +205,7 @@ public class PhpBB3Dao implements Dao {
dbreader.start();
return queue;
}
protected DCEntry parseResultSet(ResultSet rs) throws SQLException, MalformedURLException {
DigestURI url;
int item = rs.getInt("post_id");
@ -208,7 +216,7 @@ public class PhpBB3Dao implements Dao {
Date date = new Date(rs.getLong("post_time") * 1000L);
return new DCEntry(url, date, subject, user, text, 0.0f, 0.0f);
}
public static String xmlCleaner(String s) {
if (s == null) return null;
@ -217,10 +225,10 @@ public class PhpBB3Dao implements Dao {
for (int i = 0; i < s.length(); i++ ) {
c = s.charAt(i);
if ((c >= 0x0020 && c <= 0xD7FF) ||
if ((c >= 0x0020 && c <= 0xD7FF) ||
(c >= 0xE000 && c <= 0xFFFD) ||
c == 0x0009 ||
c == 0x000A ||
c == 0x000A ||
c == 0x000D ) {
sbOutput.append(c);
}
@ -231,14 +239,14 @@ public class PhpBB3Dao implements Dao {
private String getUser(int poster_id) {
String nick = this.users.get(poster_id);
if (nick != null) return nick;
StringBuilder sql = new StringBuilder(256);
sql.append("select * from " + prefix + "users where user_id = ");
sql.append("select * from " + this.prefix + "users where user_id = ");
sql.append(poster_id);
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
stmt = this.conn.statement();
rs = stmt.executeQuery(sql.toString());
if (rs.next()) nick = rs.getString("username");
if (nick == null) nick = "";
@ -252,7 +260,8 @@ public class PhpBB3Dao implements Dao {
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
@Override
public int writeSurrogates(
BlockingQueue<DCEntry> queue,
File targetdir,
@ -264,7 +273,7 @@ public class PhpBB3Dao implements Dao {
String targethost = new DigestURI(this.urlstub).getHost();
int fc = 0;
File outputfiletmp = null, outputfile = null;
// write the result from the query concurrently in a file
OutputStreamWriter osw = null;
DCEntry e;
@ -304,11 +313,12 @@ public class PhpBB3Dao implements Dao {
}
return 0;
}
public void close() {
@Override
public synchronized void close() {
this.conn.close();
}
public static void main(String[] args) {
PhpBB3Dao db;
try {
@ -331,5 +341,5 @@ public class PhpBB3Dao implements Dao {
Log.logException(e);
}
}
}

@ -564,7 +564,7 @@ public class MediawikiImporter extends Thread implements Importer {
return this.bb.getBytes();
}
public void close() {
public synchronized void close() {
try {
this.is.close();
} catch (final IOException e) {

@ -0,0 +1,390 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* this parser was copied and modified to fit into YaCy from the apache tika project
*/
package net.yacy.document.parser;
import java.io.InputStream;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.kelondro.util.MemoryControl;
import org.apache.poi.util.StringUtil;
public class dwgParser extends AbstractParser implements Parser {
private static final String HEADER_2000_PROPERTIES_MARKER_STR = "DWGPROPS COOKIE";
private static final byte[] HEADER_2000_PROPERTIES_MARKER = new byte[HEADER_2000_PROPERTIES_MARKER_STR.length()];
static {
StringUtil.putCompressedUnicode(
HEADER_2000_PROPERTIES_MARKER_STR,
HEADER_2000_PROPERTIES_MARKER, 0);
}
/**
* How far to skip after the last standard property, before
* we find any custom properties that might be there.
*/
private static final int CUSTOM_PROPERTIES_SKIP = 20;
public dwgParser() {
super("DWG (CAD Drawing) parser (very basic)");
this.SUPPORTED_EXTENSIONS.add("dwg");
this.SUPPORTED_MIME_TYPES.add("application/dwg");
this.SUPPORTED_MIME_TYPES.add("applications/vnd.dwg");
}
@Override
public Document[] parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws Parser.Failure, InterruptedException {
// check memory for parser
if (!MemoryControl.request(200 * 1024 * 1024, true))
throw new Parser.Failure("Not enough Memory available for pdf parser: " + MemoryControl.available(), location);
return null;
// First up, which version of the format are we handling?
/*
byte[] header = new byte[128];
IOUtils.readFully(source, header);
String version = new String(header, 0, 6, "US-ASCII");
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
if (version.equals("AC1015")) {
metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
if (skipTo2000PropertyInfoSection(stream, header)) {
get2000Props(stream,metadata,xhtml);
}
} else if (version.equals("AC1018")) {
metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
if (skipToPropertyInfoSection(stream, header)) {
get2004Props(stream,metadata,xhtml);
}
} else if (version.equals("AC1021") || version.equals("AC1024")) {
metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
if (skipToPropertyInfoSection(stream, header)) {
get2007and2010Props(stream,metadata,xhtml);
}
} else {
throw new TikaException(
"Unsupported AutoCAD drawing version: " + version);
}
xhtml.endDocument();
String docTitle = null, docSubject = null, docAuthor = null, docPublisher = null, docKeywordStr = null;
if (info != null) {
docTitle = info.getTitle();
docSubject = info.getSubject();
docAuthor = info.getAuthor();
docPublisher = info.getProducer();
if (docPublisher == null || docPublisher.length() == 0) docPublisher = info.getCreator();
docKeywordStr = info.getKeywords();
}
if (docTitle == null || docTitle.length() == 0) {
docTitle = MultiProtocolURI.unescape(location.getFileName());
}
String[] docKeywords = null;
if (docKeywordStr != null) {
docKeywords = docKeywordStr.split(" |,");
}
if (docTitle == null) {
docTitle = docSubject;
}
byte[] contentBytes;
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
this,
null,
docKeywords,
docTitle,
docAuthor,
docPublisher,
null,
null,
0.0f, 0.0f,
contentBytes,
null,
null,
null,
false)};
*/
}
/*
private void get2004Props(
InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
throws IOException, TikaException, SAXException {
// Standard properties
for (int i = 0; i < HEADER_PROPERTIES_ENTRIES.length; i++) {
String headerValue = read2004String(stream);
handleHeader(i, headerValue, metadata, xhtml);
}
// Custom properties
int customCount = skipToCustomProperties(stream);
for (int i = 0; i < customCount; i++) {
String propName = read2004String(stream);
String propValue = read2004String(stream);
if(propName.length() > 0 && propValue.length() > 0) {
metadata.add(propName, propValue);
}
}
}
private String read2004String(InputStream stream) throws IOException, TikaException {
int stringLen = EndianUtils.readUShortLE(stream);
byte[] stringData = new byte[stringLen];
IOUtils.readFully(stream, stringData);
// Often but not always null terminated
if (stringData[stringLen-1] == 0) {
stringLen--;
}
String value = StringUtil.getFromCompressedUnicode(stringData, 0, stringLen);
return value;
}
// Stored as UCS2, so 16 bit "unicode"
private void get2007and2010Props(
InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
throws IOException, TikaException, SAXException {
// Standard properties
for (int i = 0; i < HEADER_PROPERTIES_ENTRIES.length; i++) {
String headerValue = read2007and2010String(stream);
handleHeader(i, headerValue, metadata, xhtml);
}
// Custom properties
int customCount = skipToCustomProperties(stream);
for (int i = 0; i < customCount; i++) {
String propName = read2007and2010String(stream);
String propValue = read2007and2010String(stream);
if(propName.length() > 0 && propValue.length() > 0) {
metadata.add(propName, propValue);
}
}
}
private String read2007and2010String(InputStream stream) throws IOException, TikaException {
int stringLen = EndianUtils.readUShortLE(stream);
byte[] stringData = new byte[stringLen * 2];
IOUtils.readFully(stream, stringData);
String value = StringUtil.getFromUnicodeLE(stringData);
// Some strings are null terminated
if(value.charAt(value.length()-1) == 0) {
value = value.substring(0, value.length()-1);
}
return value;
}
private void get2000Props(
InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
throws IOException, TikaException, SAXException {
int propCount = 0;
while(propCount < 30) {
int propIdx = EndianUtils.readUShortLE(stream);
int length = EndianUtils.readUShortLE(stream);
int valueType = stream.read();
if(propIdx == 0x28) {
// This one seems not to follow the pattern
length = 0x19;
} else if(propIdx == 90) {
// We think this means the end of properties
break;
}
byte[] value = new byte[length];
IOUtils.readFully(stream, value);
if(valueType == 0x1e) {
// Normal string, good
String val = StringUtil.getFromCompressedUnicode(value, 0, length);
// Is it one we can look up by index?
if(propIdx < HEADER_2000_PROPERTIES_ENTRIES.length) {
metadata.add(HEADER_2000_PROPERTIES_ENTRIES[propIdx], val);
xhtml.element("p", val);
} else if(propIdx == 0x012c) {
int splitAt = val.indexOf('=');
if(splitAt > -1) {
String propName = val.substring(0, splitAt);
String propVal = val.substring(splitAt+1);
metadata.add(propName, propVal);
}
}
} else {
// No idea...
}
propCount++;
}
}
private void handleHeader(
int headerNumber, String value, Metadata metadata,
XHTMLContentHandler xhtml) throws SAXException {
if(value == null || value.length() == 0) {
return;
}
String headerProp = HEADER_PROPERTIES_ENTRIES[headerNumber];
if(headerProp != null) {
metadata.set(headerProp, value);
}
xhtml.element("p", value);
}
// Grab the offset, then skip there
private boolean skipToPropertyInfoSection(InputStream stream, byte[] header)
throws IOException, TikaException {
// The offset is stored in the header from 0x20 onwards
long offsetToSection = EndianUtils.getLongLE(header, 0x20);
long toSkip = offsetToSection - header.length;
if(offsetToSection == 0){
return false;
}
while (toSkip > 0) {
byte[] skip = new byte[Math.min((int) toSkip, 0x4000)];
IOUtils.readFully(stream, skip);
toSkip -= skip.length;
}
return true;
}
//We think it can be anywhere...
private boolean skipTo2000PropertyInfoSection(InputStream stream, byte[] header)
throws IOException {
int val = 0;
while(val != -1) {
val = stream.read();
if(val == HEADER_2000_PROPERTIES_MARKER[0]) {
boolean going = true;
for(int i=1; i<HEADER_2000_PROPERTIES_MARKER.length && going; i++) {
val = stream.read();
if(val != HEADER_2000_PROPERTIES_MARKER[i]) going = false;
}
if(going) {
// Bingo, found it
return true;
}
}
}
return false;
}
private int skipToCustomProperties(InputStream stream)
throws IOException, TikaException {
// There should be 4 zero bytes next
byte[] padding = new byte[4];
IOUtils.readFully(stream, padding);
if(padding[0] == 0 && padding[1] == 0 &&
padding[2] == 0 && padding[3] == 0) {
// Looks hopeful, skip on
padding = new byte[CUSTOM_PROPERTIES_SKIP];
IOUtils.readFully(stream, padding);
// We should now have the count
int count = EndianUtils.readUShortLE(stream);
// Sanity check it
if(count > 0 && count < 0x7f) {
// Looks plausible
return count;
} else {
// No properties / count is too high to trust
return 0;
}
} else {
// No padding. That probably means no custom props
return 0;
}
}
public static void main(final String[] args) {
if (args.length > 0 && args[0].length() > 0) {
// file
final File dwgFile = new File(args[0]);
if(dwgFile.canRead()) {
System.out.println(dwgFile.getAbsolutePath());
final long startTime = System.currentTimeMillis();
// parse
final AbstractParser parser = new dwgParser();
Document document = null;
try {
document = Document.mergeDocuments(null, "application/dwg", parser.parse(null, "application/dwg", null, new FileInputStream(dwgFile)));
} catch (final Parser.Failure e) {
System.err.println("Cannot parse file " + dwgFile.getAbsolutePath());
Log.logException(e);
} catch (final InterruptedException e) {
System.err.println("Interrupted while parsing!");
Log.logException(e);
} catch (final NoClassDefFoundError e) {
System.err.println("class not found: " + e.getMessage());
} catch (final FileNotFoundException e) {
Log.logException(e);
}
// statistics
System.out.println("\ttime elapsed: " + (System.currentTimeMillis() - startTime) + " ms");
// output
if (document == null) {
System.out.println("\t!!!Parsing without result!!!");
} else {
System.out.println("\tParsed text with " + document.getTextLength() + " chars of text and " + document.getAnchors().size() + " anchors");
try {
// write file
FileUtils.copy(document.getText(), new File("parsedPdf.txt"));
} catch (final IOException e) {
System.err.println("error saving parsed document");
Log.logException(e);
}
}
} else {
System.err.println("Cannot read file "+ dwgFile.getAbsolutePath());
}
} else {
System.out.println("Please give a filename as first argument.");
}
}
*/
}

@ -1,4 +1,4 @@
// AbstractTransformer.java
// AbstractTransformer.java
// ----------------------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
@ -37,15 +37,18 @@ public abstract class AbstractTransformer implements Transformer {
this.tags1 = tags1;
}
@Override
public boolean isTag0(final String tag) {
return tags0.contains(tag);
return this.tags0.contains(tag);
}
@Override
public boolean isTag1(final String tag) {
return tags1.contains(tag);
return this.tags1.contains(tag);
}
//the 'missing' method that shall be implemented:
@Override
public abstract char[] transformText(char[] text);
/* could be easily implemented as:
{
@ -54,18 +57,21 @@ public abstract class AbstractTransformer implements Transformer {
*/
// the other methods must take into account to construct the return value correctly
@Override
public char[] transformTag0(final String tagname, final Properties tagopts, final char quotechar) {
return TransformerWriter.genTag0(tagname, tagopts, quotechar);
}
@Override
public char[] transformTag1(final String tagname, final Properties tagopts, final char[] text, final char quotechar) {
return TransformerWriter.genTag1(tagname, tagopts, text, quotechar);
}
public void close() {
@Override
public synchronized void close() {
// free resources
tags0 = null;
tags1 = null;
this.tags0 = null;
this.tags1 = null;
}
}

@ -139,9 +139,9 @@ public class ContentTransformer extends AbstractTransformer implements Transform
}
@Override
public void close() {
public synchronized void close() {
// free resources
super.close();
}
}
}

@ -170,7 +170,7 @@ public class ScraperInputStream extends InputStream implements ScraperListener {
}
@Override
public void close() throws IOException {
public synchronized void close() throws IOException {
if (this.writer != null) this.writer.close();
}

@ -62,7 +62,7 @@ public class BEncodedHeap implements MapStore {
/**
* produce or open a properties table
*
*
* @param location the file
* @param keylength length of access keys
* @param ordering ordering on the keys
@ -80,7 +80,7 @@ public class BEncodedHeap implements MapStore {
/**
* convenience method to open a properies table
*
*
* @param location the file
* @param keylength length of access keys
*/
@ -120,11 +120,11 @@ public class BEncodedHeap implements MapStore {
public CloneableIterator<byte[]> clone(Object modifier) {
return this;
}
};
}
}
public byte[] encodedKey(final String key) {
return Base64Order.enhancedCoder.encodeSubstring(Digest.encodeMD5Raw(key), this.table.keylength);
}
@ -207,7 +207,7 @@ public class BEncodedHeap implements MapStore {
/**
* the map is stored inside a file; this method may return the file
*
*
* @return the file where the map is stored
*/
public File getFile() {
@ -216,7 +216,7 @@ public class BEncodedHeap implements MapStore {
/**
* Retur the number of key-value mappings in this map.
*
*
* @return the number of entries mappings in this map
*/
@Override
@ -234,7 +234,7 @@ public class BEncodedHeap implements MapStore {
/**
* check if a row with given key exists in the table
*
*
* @param name
* @return true if the row exists
*/
@ -244,7 +244,7 @@ public class BEncodedHeap implements MapStore {
/**
* check if a row with given key exists in the table This method is here to implement the Map interface
*
*
* @param name
* @return true if the row exists
*/
@ -267,7 +267,7 @@ public class BEncodedHeap implements MapStore {
/**
* get a map from the table
*
*
* @param name
* @return the map if one found or NULL if no entry exists or the entry is corrupt
* @throws RowSpaceExceededException
@ -283,7 +283,7 @@ public class BEncodedHeap implements MapStore {
/**
* get a map from the table this method is here to implement the Map interface
*
*
* @param name
* @return the map if one found or NULL if no entry exists or the entry is corrupt
*/
@ -305,7 +305,7 @@ public class BEncodedHeap implements MapStore {
/**
* convenience method to get a value from a map
*
*
* @param pk
* @param key
* @return the value
@ -324,7 +324,7 @@ public class BEncodedHeap implements MapStore {
/**
* select all rows from a table where a given matcher matches with elements in a given row this method
* makes a full-table scan of the whole table
*
*
* @param columnName the name of the column where the matcher shall match
* @param columnMatcher the matcher for the elements of the column
* @return a set of primary keys where the matcher matched
@ -351,7 +351,7 @@ public class BEncodedHeap implements MapStore {
/**
* select one row from a table where a given matcher matches with elements in a given row this method
* stops the full-table scan as soon as a first matcher was found
*
*
* @param columnName the name of the column where the matcher shall match
* @param columnMatcher the matcher for the elements of the column
* @return the row where the matcher matched the given column
@ -379,7 +379,7 @@ public class BEncodedHeap implements MapStore {
/**
* insert a map into the table this method shall be used in exchange of the get method if the previous
* entry value is not needed.
*
*
* @param name
* @param map
* @throws RowSpaceExceededException
@ -427,7 +427,7 @@ public class BEncodedHeap implements MapStore {
/**
* insert a map into the table
*
*
* @param name
* @param map
*/
@ -450,7 +450,7 @@ public class BEncodedHeap implements MapStore {
/**
* delete a map from the table
*
*
* @param name
* @throws IOException
*/
@ -460,7 +460,7 @@ public class BEncodedHeap implements MapStore {
/**
* delete a map from the table
*
*
* @param name
* @throws RowSpaceExceededException
* @throws IOException
@ -489,7 +489,7 @@ public class BEncodedHeap implements MapStore {
/**
* Copy all the mappings from the specified map to this map.
*
*
* @param m mappings to be stored in this map
*/
@Override
@ -522,7 +522,8 @@ public class BEncodedHeap implements MapStore {
* close the backen-file. Should be called explicitely to ensure that all data waiting in IO write buffers
* are flushed
*/
public void close() {
@Override
public synchronized void close() {
int s = this.size();
File f = this.table.heapFile;
this.table.close();
@ -532,7 +533,7 @@ public class BEncodedHeap implements MapStore {
/**
* Return a Set of the keys contained in this map. This may not be a useful method, if possible use the
* keys() method instead to iterate all keys from the backend-file
*
*
* @return a set view of the keys contained in this map
*/
@Override
@ -550,7 +551,7 @@ public class BEncodedHeap implements MapStore {
/**
* iterate all keys of the table
*
*
* @return an iterator of byte[]
* @throws IOException
*/
@ -562,7 +563,7 @@ public class BEncodedHeap implements MapStore {
* the values() method is not implemented in this class because it does not make sense to use such a
* method for file-based data structures. To get a collection view of all the entries, just use a entry
* iterator instead.
*
*
* @return nothing. The method throws always a UnsupportedOperationException
*/
@Override
@ -613,7 +614,7 @@ public class BEncodedHeap implements MapStore {
/**
* iterate all rows of the table. this is a static method that expects that the given file is not opened
* by any other application
*
*
* @param location
* @param keylen
* @return
@ -637,7 +638,7 @@ public class BEncodedHeap implements MapStore {
* Produce a list of column names from this table This method may be useful if the table shall be
* displayed as a table in GUIs. To show the first line of the table, the table header, a list of all
* column names is required. This can be generated with this method
*
*
* @return a list of column names
*/
public ArrayList<String> columns() {

@ -154,7 +154,7 @@ public class BEncodedHeapShard extends AbstractMapStore implements MapStore {
}
@Override
public void close() {
public synchronized void close() {
if (this.shard == null) return;
final Iterator<MapStore> i = this.shard.values().iterator();

@ -803,8 +803,8 @@ public class HeapReader {
}
}
public void close() {
if (this.is != null) try { this.is.close(); } catch (final IOException e) {}
public synchronized void close() {
if (this.is != null) try { this.is.close(); } catch (final IOException e) {Log.logException(e);}
this.is = null;
}

@ -99,7 +99,7 @@ public class Tables implements Iterable<String> {
heap.close();
}
public void close() {
public synchronized void close() {
for (final BEncodedHeap heap: this.tables.values()) heap.close();
this.tables.clear();
}

@ -20,6 +20,7 @@
package net.yacy.kelondro.data.citation;
import java.io.Serializable;
import java.util.Collection;
import net.yacy.cora.document.ASCII;
@ -33,10 +34,12 @@ import net.yacy.kelondro.order.MicroDate;
import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.util.ByteArray;
public class CitationReference implements Reference /*, Cloneable*/ {
public class CitationReference implements Reference, Serializable {
// this object stores citation attributes to URL references
private static final long serialVersionUID=1920200210928897131L;
public static final Row citationRow = new Row(new Column[]{
new Column("h", Column.celltype_string, Column.encoder_bytes, Word.commonHashLength, "urlhash"),
new Column("m", Column.celltype_cardinal, Column.encoder_b256, 2, "lastModified"),

@ -20,11 +20,15 @@
package net.yacy.kelondro.data.citation;
import java.io.Serializable;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.rwi.ReferenceFactory;
public class CitationReferenceFactory implements ReferenceFactory<CitationReference> {
public class CitationReferenceFactory implements ReferenceFactory<CitationReference>, Serializable {
private static final long serialVersionUID=-1098504892965986149L;
@Override
public CitationReference produceSlow(final Entry e) {

@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -26,21 +26,28 @@
package net.yacy.kelondro.data.image;
import java.io.Serializable;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.rwi.ReferenceFactory;
public class ImageReferenceFactory implements ReferenceFactory<ImageReference> {
public class ImageReferenceFactory implements ReferenceFactory<ImageReference>, Serializable {
private static final long serialVersionUID=-2209473508756878863L;
@Override
public ImageReference produceSlow(Entry e) {
return null; //new ImageReferenceRow(e);
}
@Override
public ImageReference produceFast(ImageReference r) {
if (r instanceof ImageReferenceVars) return r;
return new ImageReferenceVars(r);
}
@Override
public Row getRow() {
return ImageReferenceRow.urlEntryRow;
}

@ -26,6 +26,7 @@
package net.yacy.kelondro.data.image;
import java.io.Serializable;
import java.util.Collection;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingQueue;
@ -38,9 +39,12 @@ import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.util.ByteArray;
public class ImageReferenceVars extends AbstractReference implements ImageReference, Reference, Cloneable {
public class ImageReferenceVars extends AbstractReference implements ImageReference, Reference, Cloneable, Serializable {
/**
private static final long serialVersionUID=3669156620967277347L;
/**
* object for termination of concurrent blocking queue processing
*/
public static final ImageReferenceVars poison = new ImageReferenceVars();

@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -26,21 +26,28 @@
package net.yacy.kelondro.data.navigation;
import java.io.Serializable;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.rwi.ReferenceFactory;
public class NavigationReferenceFactory implements ReferenceFactory<NavigationReference> {
public class NavigationReferenceFactory implements ReferenceFactory<NavigationReference>, Serializable {
private static final long serialVersionUID=-3440570952034279619L;
@Override
public NavigationReference produceSlow(final Entry e) {
return new NavigationReferenceRow(e);
}
@Override
public NavigationReference produceFast(final NavigationReference r) {
if (r instanceof NavigationReferenceVars) return r;
return new NavigationReferenceVars(r);
}
@Override
public Row getRow() {
return NavigationReferenceRow.navEntryRow;
}

@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -26,6 +26,7 @@
package net.yacy.kelondro.data.navigation;
import java.io.Serializable;
import java.util.Collection;
import net.yacy.cora.document.ASCII;
@ -33,12 +34,14 @@ import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.rwi.AbstractReference;
import net.yacy.kelondro.rwi.Reference;
public class NavigationReferenceVars extends AbstractReference implements NavigationReference, Reference, Cloneable {
public class NavigationReferenceVars extends AbstractReference implements NavigationReference, Reference, Cloneable, Serializable {
private static final long serialVersionUID=2873544331190937594L;
public byte[] termhash, refhash;
public int hitcount, position;
byte flags;
public NavigationReferenceVars(
final byte[] termhash,
final byte[] refhash,
@ -52,7 +55,7 @@ public class NavigationReferenceVars extends AbstractReference implements Navig
this.position = pos;
this.flags = flags;
}
public NavigationReferenceVars(final NavigationReference e) {
this.refhash = e.urlhash();
this.termhash = e.termHash();
@ -60,7 +63,7 @@ public class NavigationReferenceVars extends AbstractReference implements Navig
this.position = e.position(0);
this.flags = e.flags();
}
@Override
public NavigationReferenceVars clone() {
final NavigationReferenceVars c = new NavigationReferenceVars(
@ -72,7 +75,7 @@ public class NavigationReferenceVars extends AbstractReference implements Navig
);
return c;
}
public NavigationReferenceRow toRowEntry() {
return new NavigationReferenceRow(
this.termhash,
@ -81,50 +84,58 @@ public class NavigationReferenceVars extends AbstractReference implements Navig
this.position,
this.flags);
}
@Override
public String toPropertyForm() {
return toRowEntry().toPropertyForm();
}
@Override
public Entry toKelondroEntry() {
return toRowEntry().toKelondroEntry();
}
@Override
public String navigationHash() {
return ASCII.String(this.termhash) + ASCII.String(this.refhash);
}
@Override
public byte[] urlhash() {
return this.refhash;
}
@Override
public byte[] termHash() {
return this.termhash;
}
@Override
public int hitcount() {
return this.hitcount;
}
@Override
public int position(final int p) {
assert p == 0 : "p = " + p;
return this.position;
}
@Override
public byte flags() {
return this.flags;
}
@Override
public String toString() {
return toPropertyForm();
}
@Override
public int hashCode() {
return this.navigationHash().hashCode();
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
@ -133,24 +144,28 @@ public class NavigationReferenceVars extends AbstractReference implements Navig
NavigationReferenceVars other = (NavigationReferenceVars) obj;
return this.navigationHash().equals(other.navigationHash());
}
@Override
public boolean isOlder(final Reference other) {
return false;
}
// unsupported operations:
@Override
public void join(final Reference oe) {
throw new UnsupportedOperationException();
}
@Override
public long lastModified() {
throw new UnsupportedOperationException();
}
@Override
public Collection<Integer> positions() {
throw new UnsupportedOperationException();
}
}

@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -26,6 +26,7 @@
package net.yacy.kelondro.data.word;
import java.io.Serializable;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
@ -39,13 +40,15 @@ import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceFactory;
import net.yacy.kelondro.util.ByteBuffer;
public class WordReferenceFactory implements ReferenceFactory<WordReference> {
public class WordReferenceFactory implements ReferenceFactory<WordReference>, Serializable {
private static final long serialVersionUID=-7168706947127349876L;
@Override
public WordReference produceSlow(final Entry e) {
return new WordReferenceRow(e);
}
@Override
public WordReference produceFast(final WordReference r) {
if (r instanceof WordReferenceVars) return r;
@ -134,7 +137,7 @@ public class WordReferenceFactory implements ReferenceFactory<WordReference> {
urlsb.append(dom);
url = urlsb.toString();
ci.trim(6);
peers = target.get(url);
if (peers == null) {
peers = new StringBuilder(24);

@ -111,7 +111,7 @@ public class BufferedObjectIndex implements Index, Iterable<Row.Entry> {
}
@Override
public void close() {
public synchronized void close() {
synchronized (this.backend) {
try {
flushBuffer();

@ -10,7 +10,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -27,9 +27,13 @@
package net.yacy.kelondro.index;
import java.io.Serializable;
import net.yacy.kelondro.util.kelondroException;
public final class Column {
public final class Column implements Serializable {
private static final long serialVersionUID=6558500565023465301L;
public static final int celltype_undefined = 0;
public static final int celltype_boolean = 1;
@ -37,12 +41,12 @@ public final class Column {
public static final int celltype_string = 3;
public static final int celltype_cardinal = 4;
public static final int celltype_bitfield = 5;
public static final int encoder_none = 0;
public static final int encoder_b64e = 1;
public static final int encoder_b256 = 2;
public static final int encoder_bytes = 3;
public int cellwidth;
public final String nickname;
protected final int celltype;
@ -65,7 +69,7 @@ public final class Column {
celldef = celldef.trim();
if (celldef.length() > 0 && celldef.charAt(0) == '<') celldef = celldef.substring(1);
if (celldef.endsWith(">")) celldef = celldef.substring(0, celldef.length() - 1);
// parse type definition
int p = celldef.indexOf(' ');
String typename = "";
@ -76,7 +80,7 @@ public final class Column {
} else {
typename = celldef.substring(0, p);
celldef = celldef.substring(p + 1).trim();
if (typename.equals("boolean")) {
this.celltype = celltype_boolean;
this.cellwidth = 1;
@ -109,9 +113,9 @@ public final class Column {
this.cellwidth = -1; // yet undefined
} else {
throw new kelondroException("kelondroColumn - undefined type def '" + typename + "'");
}
}
}
// parse length
p = celldef.indexOf('-');
if (p < 0) {
@ -144,7 +148,7 @@ public final class Column {
celldef = celldef.substring(q + 1);
}
}
// check length constraints
if (this.cellwidth < 0) throw new kelondroException("kelondroColumn - no cell width given for " + this.nickname);
if (((typename.equals("boolean")) && (this.cellwidth > 1)) ||
@ -179,9 +183,9 @@ public final class Column {
if (this.celltype == celltype_cardinal) throw new kelondroException("kelondroColumn - encoder missing for cell " + this.nickname);
this.encoder = encoder_bytes;
}
assert (this.celltype != celltype_cardinal) || (this.encoder == encoder_b64e) || (this.encoder == encoder_b256);
// parse/check description
if (celldef.length() > 0 && celldef.charAt(0) == '"') {
p = celldef.indexOf('"', 1);
@ -195,43 +199,43 @@ public final class Column {
@Override
public final String toString() {
final StringBuilder s = new StringBuilder(20);
switch (celltype) {
switch (this.celltype) {
case celltype_undefined:
s.append(nickname);
s.append(this.nickname);
s.append('-');
s.append(cellwidth);
s.append(this.cellwidth);
break;
case celltype_boolean:
s.append("boolean ");
s.append(nickname);
s.append(this.nickname);
break;
case celltype_binary:
s.append("byte[] ");
s.append(nickname);
s.append(this.nickname);
s.append('-');
s.append(cellwidth);
s.append(this.cellwidth);
break;
case celltype_string:
s.append("String ");
s.append(nickname);
s.append(this.nickname);
s.append('-');
s.append(cellwidth);
s.append(this.cellwidth);
break;
case celltype_cardinal:
s.append("Cardinal ");
s.append(nickname);
s.append(this.nickname);
s.append('-');
s.append(cellwidth);
s.append(this.cellwidth);
break;
case celltype_bitfield:
s.append("Bitfield ");
s.append(nickname);
s.append(this.nickname);
s.append('-');
s.append(cellwidth);
s.append(this.cellwidth);
break;
}
switch (encoder) {
switch (this.encoder) {
case encoder_b64e:
s.append(" {b64e}");
break;
@ -249,11 +253,11 @@ public final class Column {
public final int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + celltype;
result = prime * result + cellwidth;
result = prime * result + encoder;
result = prime * result + this.celltype;
result = prime * result + this.cellwidth;
result = prime * result + this.encoder;
result = prime * result
+ ((nickname == null) ? 0 : nickname.hashCode());
+ ((this.nickname == null) ? 0 : this.nickname.hashCode());
return result;
}
@ -266,12 +270,12 @@ public final class Column {
if (obj == null) return false;
if (!(obj instanceof Column)) return false;
final Column other = (Column) obj;
if (celltype != other.celltype) return false;
if (cellwidth != other.cellwidth) return false;
if (encoder != other.encoder) return false;
if (nickname == null) {
if (this.celltype != other.celltype) return false;
if (this.cellwidth != other.cellwidth) return false;
if (this.encoder != other.encoder) return false;
if (this.nickname == null) {
if (other.nickname != null) return false;
} else if (!nickname.equals(other.nickname)) return false;
} else if (!this.nickname.equals(other.nickname)) return false;
return true;
}

@ -434,7 +434,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
return this.map;
}
public void close() {
public synchronized void close() {
this.map.close();
}
}

@ -7,7 +7,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -31,27 +31,34 @@ import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Iterator;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.order.CloneableIterator;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.SetTools;
public final class HandleSet implements Iterable<byte[]>, Cloneable {
public final class HandleSet implements Iterable<byte[]>, Cloneable, Serializable {
private static final long serialVersionUID=444204785291174968L;
private final Row rowdef;
private RowSet index;
public HandleSet(final int keylength, final ByteOrder objectOrder, final int expectedspace) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key")}, objectOrder);
try {
this.index = new RowSet(rowdef, expectedspace);
this.index = new RowSet(this.rowdef, expectedspace);
} catch (RowSpaceExceededException e) {
try {
this.index = new RowSet(rowdef, 0);
this.index = new RowSet(this.rowdef, 0);
} catch (RowSpaceExceededException ee) {
Log.logException(ee);
this.index = null;
@ -73,18 +80,18 @@ public final class HandleSet implements Iterable<byte[]>, Cloneable {
public HandleSet clone() {
return new HandleSet(this.rowdef, this.index.clone());
}
public byte[] export() {
return index.exportCollection();
return this.index.exportCollection();
}
/**
* initialize a HandleSet with the content of a dump
* @param keylength
* @param objectOrder
* @param file
* @throws IOException
* @throws RowSpaceExceededException
* @throws IOException
* @throws RowSpaceExceededException
*/
public HandleSet(final int keylength, final ByteOrder objectOrder, final File file) throws IOException, RowSpaceExceededException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)));
@ -128,32 +135,32 @@ public final class HandleSet implements Iterable<byte[]>, Cloneable {
os.close();
return c;
}
public final synchronized byte[] smallestKey() {
return this.index.smallestKey();
}
public final synchronized byte[] largestKey() {
return this.index.largestKey();
}
public ByteOrder comparator() {
return this.rowdef.objectOrder;
}
public final Row row() {
return index.row();
return this.index.row();
}
public final void clear() {
this.index.clear();
}
public final synchronized boolean has(final byte[] key) {
assert (key != null);
return index.has(key);
return this.index.has(key);
}
public final void putAll(final HandleSet aset) throws RowSpaceExceededException {
for (byte[] b: aset) put(b);
}
@ -161,36 +168,36 @@ public final class HandleSet implements Iterable<byte[]>, Cloneable {
/**
* Adds the key to the set
* @param key
* @return true if this set did _not_ already contain the given key.
* @return true if this set did _not_ already contain the given key.
* @throws IOException
* @throws RowSpaceExceededException
*/
public final boolean put(final byte[] key) throws RowSpaceExceededException {
assert (key != null);
final Row.Entry newentry = index.row().newEntry(key);
return index.put(newentry);
final Row.Entry newentry = this.index.row().newEntry(key);
return this.index.put(newentry);
}
public final void putUnique(final byte[] key) throws RowSpaceExceededException {
assert (key != null);
final Row.Entry newentry = index.row().newEntry(key);
index.addUnique(newentry);
final Row.Entry newentry = this.index.row().newEntry(key);
this.index.addUnique(newentry);
}
public final boolean remove(final byte[] key) {
assert (key != null);
Row.Entry indexentry;
indexentry = index.remove(key);
indexentry = this.index.remove(key);
return indexentry != null;
}
public final synchronized byte[] removeOne() {
Row.Entry indexentry;
indexentry = index.removeOne();
indexentry = this.index.removeOne();
if (indexentry == null) return null;
return indexentry.getPrimaryKeyBytes();
}
/**
* get one entry; objects are taken from the end of the list
* a getOne(0) would return the same object as removeOne() would remove
@ -200,39 +207,40 @@ public final class HandleSet implements Iterable<byte[]>, Cloneable {
public final synchronized byte[] getOne(int idx) {
if (idx >= this.size()) return null;
Row.Entry indexentry;
indexentry = index.get(this.size() - 1 - idx, true);
indexentry = this.index.get(this.size() - 1 - idx, true);
if (indexentry == null) return null;
return indexentry.getPrimaryKeyBytes();
}
public final synchronized boolean isEmpty() {
return index.isEmpty();
return this.index.isEmpty();
}
public final synchronized int size() {
return index.size();
return this.index.size();
}
public final synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
return index.keys(up, firstKey);
return this.index.keys(up, firstKey);
}
@Override
public final Iterator<byte[]> iterator() {
return keys(true, null);
}
public final synchronized void close() {
index.close();
index = null;
this.index.close();
this.index = null;
}
@Override
public final String toString() {
return this.index.toString();
}
// set tools
public HandleSet joinConstructive(final HandleSet other) throws RowSpaceExceededException {
return joinConstructive(this, other);
}
@ -299,26 +307,62 @@ public final class HandleSet implements Iterable<byte[]>, Cloneable {
public void excludeDestructive(final HandleSet other) {
excludeDestructive(this, other);
}
private static void excludeDestructive(final HandleSet set1, final HandleSet set2) {
if (set1 == null) return;
if (set2 == null) return;
assert set1.comparator() == set2.comparator();
if (set1.isEmpty() || set2.isEmpty()) return;
if (set1.size() < set2.size())
excludeDestructiveByTestSmallInLarge(set1, set2);
else
excludeDestructiveByTestLargeInSmall(set1, set2);
}
private static void excludeDestructiveByTestSmallInLarge(final HandleSet small, final HandleSet large) {
final Iterator<byte[]> mi = small.iterator();
while (mi.hasNext()) if (large.has(mi.next())) mi.remove();
}
private static void excludeDestructiveByTestLargeInSmall(final HandleSet large, final HandleSet small) {
final Iterator<byte[]> si = small.iterator();
while (si.hasNext()) large.remove(si.next());
}
public static void main(String[] args) {
HandleSet s = new HandleSet(8, NaturalOrder.naturalOrder, 100);
try {
s.put(UTF8.getBytes("Hello"));
s.put(UTF8.getBytes("World"));
// test Serializable
try {
// write to file
File f = File.createTempFile("HandleSet", "stream");
ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(f));
out.writeObject(s);
out.close();
// read from file
ObjectInputStream in = new ObjectInputStream(new FileInputStream(f));
HandleSet s1 = (HandleSet) in.readObject();
in.close();
for (byte[] b: s1) {
System.out.println(UTF8.String(b));
}
s1.close();
} catch(IOException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
} catch (RowSpaceExceededException e) {
e.printStackTrace();
}
s.close();
Log.shutdown();
}
}

@ -82,6 +82,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return (int) ((this.rowdef.objectOrder.cardinal(row.bytes(), 0, row.getPrimaryKeyLength()) / 17) % (this.cluster.length));
}
@Override
public final byte[] smallestKey() {
final HandleSet keysort = new HandleSet(this.rowdef.primaryKeyLength, this.rowdef.objectOrder, this.cluster.length);
synchronized (this.cluster) {
@ -94,6 +95,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return keysort.smallestKey();
}
@Override
public final byte[] largestKey() {
final HandleSet keysort = new HandleSet(this.rowdef.primaryKeyLength, this.rowdef.objectOrder, this.cluster.length);
synchronized (this.cluster) {
@ -115,6 +117,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return r;
}
@Override
public final void addUnique(final Entry row) throws RowSpaceExceededException {
final int i = indexFor(row);
assert i >= 0 : "i = " + i;
@ -126,28 +129,37 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
for (final Entry row: rows) addUnique(row);
}
@Override
public final void clear() {
synchronized (this.cluster) {
for (final RAMIndex c: this.cluster) if (c != null) c.clear();
}
}
public final void close() {
clear();
@Override
public final void close() {
synchronized (this.cluster) {
for (final RAMIndex c: this.cluster) if (c != null) c.close();
for (final RAMIndex c: this.cluster) {
if (c != null) {
//Log.logInfo("RAMIndexCluster", "Closing RAM index at " + c.getName() + " with " + c.size() + " entries ...");
c.close();
}
}
}
}
@Override
public final void deleteOnExit() {
// no nothing here
}
@Override
public final String filename() {
// we don't have a file name
return null;
}
@Override
public final Entry get(final byte[] key, final boolean forcecopy) {
final int i = indexFor(key);
if (i < 0) return null;
@ -156,8 +168,10 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return r.get(key, forcecopy);
}
@Override
public Map<byte[], Row.Entry> get(final Collection<byte[]> keys, final boolean forcecopy) throws IOException, InterruptedException {
final Map<byte[], Row.Entry> map = new TreeMap<byte[], Row.Entry>(row().objectOrder);
Row.Entry entry;
for (final byte[] key: keys) {
entry = get(key, forcecopy);
@ -166,6 +180,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return map;
}
@Override
public final boolean has(final byte[] key) {
final int i = indexFor(key);
if (i < 0) return false;
@ -174,6 +189,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return r.has(key);
}
@Override
public final CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
synchronized (this.cluster) {
final Collection<CloneableIterator<byte[]>> col = new ArrayList<CloneableIterator<byte[]>>();
@ -193,6 +209,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public final boolean put(final Entry row) throws RowSpaceExceededException {
final int i = indexFor(row);
assert i >= 0 : "i = " + i;
@ -200,18 +217,21 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return accessArray(i).put(row);
}
@Override
public final boolean delete(final byte[] key) {
final int i = indexFor(key);
if (i < 0) return false;
return accessArray(i).delete(key);
}
@Override
public final Entry remove(final byte[] key) {
final int i = indexFor(key);
if (i < 0) return null;
return accessArray(i).remove(key);
}
@Override
public final ArrayList<RowCollection> removeDoubles() throws RowSpaceExceededException {
final ArrayList<RowCollection> col = new ArrayList<RowCollection>();
synchronized (this.cluster) {
@ -225,6 +245,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return col;
}
@Override
public final Entry removeOne() {
synchronized (this.cluster) {
for (int i = 0; i < this.cluster.length; i++) {
@ -238,6 +259,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return null;
}
@Override
public List<Row.Entry> top(final int count) {
final List<Row.Entry> list = new ArrayList<Row.Entry>();
synchronized (this.cluster) {
@ -256,6 +278,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return list;
}
@Override
public final Entry replace(final Entry row) throws RowSpaceExceededException {
final int i = indexFor(row);
assert i >= 0 : "i = " + i;
@ -263,10 +286,12 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return accessArray(i).replace(row);
}
@Override
public final Row row() {
return this.rowdef;
}
@Override
@SuppressWarnings("unchecked")
public final CloneableIterator<Entry> rows(final boolean up, final byte[] firstKey) {
synchronized (this.cluster) {
@ -282,10 +307,12 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
}
}
@Override
public final CloneableIterator<Entry> rows() {
return rows(true, null);
}
@Override
public final int size() {
int c = 0;
synchronized (this.cluster) {
@ -294,6 +321,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return c;
}
@Override
public long mem() {
long m = 0;
synchronized (this.cluster) {
@ -302,6 +330,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return m;
}
@Override
public final boolean isEmpty() {
synchronized (this.cluster) {
for (final RAMIndex i: this.cluster) if (i != null && !i.isEmpty()) return false;
@ -309,6 +338,7 @@ public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Clonea
return true;
}
@Override
public final Iterator<Entry> iterator() {
return this.rows(true, null);
}

@ -27,6 +27,7 @@
package net.yacy.kelondro.index;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
@ -48,9 +49,10 @@ import net.yacy.kelondro.util.ByteBuffer;
import net.yacy.kelondro.util.kelondroException;
public final class Row {
public final class Row implements Serializable {
//private final static Pattern commaPattern = Pattern.compile(",");
private static final long serialVersionUID=-148412365988669116L;
protected final Column[] row;
public final int[] colstart;
@ -235,7 +237,9 @@ public final class Row {
}
public class Entry implements Comparable<Entry>, Comparator<Entry>, Cloneable {
public class Entry implements Comparable<Entry>, Comparator<Entry>, Cloneable, Serializable {
private static final long serialVersionUID=-2576312347345553495L;
private byte[] rowinstance;
private int offset; // the offset where the row starts within rowinstance
@ -257,7 +261,7 @@ public final class Row {
public Entry(final byte[] newrow, final int start, final boolean forceclone) {
if (forceclone || newrow.length - start < Row.this.objectsize) {
this.rowinstance = new byte[Row.this.objectsize];
System.arraycopy(newrow, start, this.rowinstance, 0, Row.this.objectsize);
System.arraycopy(newrow, start, this.rowinstance, 0, Math.min(newrow.length, Row.this.objectsize));
this.offset = 0;
} else {
this.rowinstance = newrow;
@ -632,7 +636,10 @@ public final class Row {
}
public final class EntryIndex extends Entry {
public final class EntryIndex extends Entry implements Serializable {
private static final long serialVersionUID=153069052590699231L;
private final int index;
public EntryIndex(final byte[] row, final int i) {
super(row, false);

@ -26,6 +26,7 @@ package net.yacy.kelondro.index;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@ -46,10 +47,11 @@ import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.kelondroException;
public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>, Cloneable {
public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>, Cloneable, Serializable {
private static final long serialVersionUID=-4670634138825982705L;
private static final byte[] EMPTY_CACHE = new byte[0];
private static final byte[] EMPTY_CACHE = new byte[0];
public static final long growfactorLarge100 = 140L;
public static final long growfactorSmall100 = 120L;
private static final int isortlimit = 20;
@ -134,6 +136,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
this.lastTimeWrote = lastTimeWrote;
}
@Override
public RowCollection clone() {
return new RowCollection(this.rowdef, this.chunkcache, this.chunkcount, this.sortBound, this.lastTimeWrote);
}
@ -334,6 +337,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
return b;
}
@Override
public synchronized final Row.Entry get(final int index, final boolean clone) {
assert (index >= 0) : "get: access with index " + index + " is below zero";
assert (index < this.chunkcount) : "get: access with index " + index + " is above chunkcount " + this.chunkcount + "; sortBound = " + this.sortBound;
@ -478,6 +482,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
}
@Override
public final void delete(final int p) {
removeRow(p, true);
}
@ -533,6 +538,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
this.lastTimeWrote = System.currentTimeMillis();
}
@Override
public int size() {
return this.chunkcount;
}
@ -566,14 +572,17 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
this.keepOrderWhenRemoving = keepOrderWhenRemoving;
}
@Override
public boolean hasNext() {
return this.p < RowCollection.this.chunkcount;
}
@Override
public byte[] next() {
return getKey(this.p++);
}
@Override
public void remove() {
this.p--;
removeRow(this.p, this.keepOrderWhenRemoving);
@ -583,6 +592,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
/**
* return an iterator for the row entries in this object
*/
@Override
public Iterator<Row.Entry> iterator() {
// iterates kelondroRow.Entry - type entries
return new rowIterator();
@ -601,14 +611,17 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
this.p = 0;
}
@Override
public boolean hasNext() {
return this.p < RowCollection.this.chunkcount;
}
@Override
public Row.Entry next() {
return get(this.p++, true);
}
@Override
public void remove() {
this.p--;
removeRow(this.p, true);
@ -637,6 +650,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
this.S = S;
}
@Override
public Integer call() throws Exception {
return Integer.valueOf(this.rc.partition(this.L, this.R, this.S, new byte[this.rc.rowdef.objectsize]));
}
@ -817,6 +831,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
return true;
}
@Override
public synchronized String toString() {
final StringBuilder s = new StringBuilder(80);
final Iterator<Row.Entry> i = iterator();

@ -25,6 +25,7 @@
package net.yacy.kelondro.index;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
@ -42,8 +43,9 @@ import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.MemoryControl;
public class RowSet extends RowCollection implements Index, Iterable<Row.Entry> {
public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>, Serializable {
private static final long serialVersionUID=-6036029762440788566L;
private static final int collectionReSortLimit = 3000;
public RowSet(final RowSet rs) {
@ -118,20 +120,24 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
super(rowdef, chunkcache, chunkcount, sortBound, lastTimeWrote);
}
@Override
public RowSet clone() {
return new RowSet(super.rowdef, super.chunkcache, super.chunkcount, super.sortBound, super.lastTimeWrote);
}
public void reset() {
@Override
public void reset() {
super.reset();
}
@Override
public final synchronized boolean has(final byte[] key) {
assert key.length == this.rowdef.primaryKeyLength;
final int index = find(key, 0);
return index >= 0;
}
@Override
public final synchronized Row.Entry get(final byte[] key, final boolean forcecopy) {
assert key.length == this.rowdef.primaryKeyLength;
final int index = find(key, 0);
@ -139,6 +145,7 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
return get(index, forcecopy);
}
@Override
public Map<byte[], Row.Entry> get(final Collection<byte[]> keys, final boolean forcecopy) throws IOException, InterruptedException {
final Map<byte[], Row.Entry> map = new TreeMap<byte[], Row.Entry>(row().objectOrder);
Row.Entry entry;
@ -156,6 +163,7 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public final boolean put(final Row.Entry entry) throws RowSpaceExceededException {
assert (entry != null);
final byte[] key = entry.getPrimaryKeyBytes();
@ -176,6 +184,7 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
}
}
@Override
public final Row.Entry replace(final Row.Entry entry) throws RowSpaceExceededException {
assert (entry != null);
final byte[] key = entry.getPrimaryKeyBytes();
@ -227,6 +236,7 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
* if the entry was found, return the entry, but delete the entry from the set
* if the entry was not found, return null.
*/
@Override
public final synchronized boolean delete(final byte[] a) {
boolean exists = false;
int index;
@ -258,6 +268,7 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
}
}
@Override
public final synchronized Row.Entry remove(final byte[] a) {
Row.Entry entry = null;
int index;
@ -346,6 +357,7 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
return super.keys(true);
}
@Override
public final synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
return new keyIterator(up, firstKey);
}
@ -372,10 +384,12 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
}
}
public final keyIterator clone(final Object second) {
@Override
public final keyIterator clone(final Object second) {
return new keyIterator(this.up, (byte[]) second);
}
@Override
public final boolean hasNext() {
if (this.p < 0) return false;
if (this.p >= size()) return false;
@ -386,27 +400,32 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
}
}
@Override
public final byte[] next() {
final byte[] key = getKey(this.p);
if (this.up) this.p++; else this.p--;
return key;
}
@Override
public final void remove() {
throw new UnsupportedOperationException();
}
}
@Override
public final synchronized Iterator<Row.Entry> iterator() {
// iterates kelondroRow.Entry - type entries
sort();
return super.iterator();
}
@Override
public final synchronized CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) {
return new rowIterator(up, firstKey);
}
@Override
public final synchronized CloneableIterator<Row.Entry> rows() {
return new rowIterator(true, null);
}
@ -432,10 +451,12 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
}
}
public final rowIterator clone(final Object second) {
@Override
public final rowIterator clone(final Object second) {
return new rowIterator(this.up, (byte[]) second);
}
@Override
public final boolean hasNext() {
if (this.p < 0) return false;
if (this.p >= size()) return false;
@ -446,12 +467,14 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
}
}
@Override
public final Row.Entry next() {
final Row.Entry entry = get(this.p, true);
if (this.up) this.p++; else this.p--;
return entry;
}
@Override
public final void remove() {
throw new UnsupportedOperationException();
}
@ -677,10 +700,12 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
return randomHash(r.nextLong(), r.nextLong());
}
@Override
public String filename() {
return null;
}
@Override
public void deleteOnExit() {
// do nothing, there is no file
}

@ -1,4 +1,4 @@
//httpByteCountinputStream.java
//httpByteCountinputStream.java
//-----------------------
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
@ -29,24 +29,23 @@ package net.yacy.kelondro.io;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
//import java.util.HashMap;
import net.yacy.kelondro.logging.Log;
public final class ByteCountInputStream extends FilterInputStream {
// private final static Object syncObject = new Object();
// private final static HashMap<String, Long> byteCountInfo = new HashMap<String, Long>(2);
// private static long globalByteCount = 0;
private boolean finished = false;
protected long byteCount;
private String byteCountAccountName = null;
private String byteCountAccountName = null;
protected ByteCountInputStream(final InputStream inputStream) {
this(inputStream, null);
}
/**
* Constructor of this class
* @param inputStream the {@link InputStream} to read from
@ -54,7 +53,7 @@ public final class ByteCountInputStream extends FilterInputStream {
public ByteCountInputStream(final InputStream inputStream, final String accountName) {
this(inputStream,0,accountName);
}
/**
* Constructor of this class
* @param inputStream the {@link InputStream} to read from
@ -64,14 +63,16 @@ public final class ByteCountInputStream extends FilterInputStream {
super(inputStream);
this.byteCount = initByteCount;
this.byteCountAccountName = accountName;
}
}
@Override
public final int read(final byte[] b) throws IOException {
final int readCount = super.read(b);
if (readCount > 0) this.byteCount += readCount;
return readCount;
}
@Override
public final int read(final byte[] b, final int off, final int len) throws IOException {
try {
final int readCount = super.read(b, off, len);
@ -82,31 +83,33 @@ public final class ByteCountInputStream extends FilterInputStream {
}
}
@Override
public final int read() throws IOException {
this.byteCount++;
return super.read();
}
@Override
public final long skip(final long len) throws IOException {
final long skipCount = super.skip(len);
if (skipCount > 0) this.byteCount += skipCount;
if (skipCount > 0) this.byteCount += skipCount;
return skipCount;
}
public final long getCount() {
return this.byteCount;
}
public final String getAccountName() {
return this.byteCountAccountName;
}
// public final static long getGlobalCount() {
// synchronized (syncObject) {
// return globalByteCount;
// }
// }
// public final static long getAccountCount(final String accountName) {
// synchronized (syncObject) {
// if (byteCountInfo.containsKey(accountName)) {
@ -115,8 +118,9 @@ public final class ByteCountInputStream extends FilterInputStream {
// return 0;
// }
// }
public final void close() throws IOException {
@Override
public final synchronized void close() throws IOException {
try {
super.close();
} catch (OutOfMemoryError e) {
@ -124,10 +128,10 @@ public final class ByteCountInputStream extends FilterInputStream {
}
this.finish();
}
public final void finish() {
if (this.finished) return;
this.finished = true;
ByteCount.addAccountCount(this.byteCountAccountName, this.byteCount);
// synchronized (syncObject) {
@ -140,10 +144,10 @@ public final class ByteCountInputStream extends FilterInputStream {
// lastByteCount += this.byteCount;
// byteCountInfo.put(this.byteCountAccountName, Long.valueOf(lastByteCount));
// }
//
// }
//
// }
}
// public final static void resetCount() {
// synchronized (syncObject) {
// globalByteCount = 0;

@ -473,7 +473,7 @@ public final class CharBuffer extends Writer {
}
@Override
public void close() {
public synchronized void close() {
this.length = 0;
this.offset = 0;
this.buffer = null; // assist with garbage collection
@ -484,4 +484,4 @@ public final class CharBuffer extends Writer {
trimToSize();
}
}
}

@ -1,4 +1,4 @@
//ConsoleOutErrHandler.java
//ConsoleOutErrHandler.java
//-------------------------------------
//part of YACY
//(C) by Michael Peter Christen; mc@yacy.net
@ -42,37 +42,37 @@ public final class ConsoleOutErrHandler extends Handler {
private boolean ignoreCtrlChr = false;
private Level splitLevel = Level.WARNING;
private final Handler stdOutHandler;
private final Handler stdErrHandler;
private final Handler stdErrHandler;
public ConsoleOutErrHandler() {
this.stdOutHandler = new ConsoleOutHandler();
this.stdErrHandler = new ConsoleHandler();
this.stdErrHandler = new ConsoleHandler();
this.stdOutHandler.setLevel(Level.FINEST);
this.stdErrHandler.setLevel(Level.WARNING);
configure();
}
/**
* Get any configuration properties set
*/
private void configure() {
final LogManager manager = LogManager.getLogManager();
final String className = getClass().getName();
final String level = manager.getProperty(className + ".level");
setLevel((level == null) ? Level.INFO : Level.parse(level));
final Level levelStdOut = parseLevel(manager.getProperty(className + ".levelStdOut"));
final Level levelSplit = parseLevel(manager.getProperty(className + ".levelSplit"));
final Level levelStdErr = parseLevel(manager.getProperty(className + ".levelStdErr"));
setLevels(levelStdOut,levelSplit,levelStdErr);
final String filter = manager.getProperty(className + ".filter");
setFilter(makeFilter(filter));
final String formatter = manager.getProperty(className + ".formatter");
setFormatter(makeFormatter(formatter));
final String encoding = manager.getProperty(className + ".encoding");
try {
this.stdOutHandler.setEncoding(encoding);
@ -80,12 +80,12 @@ public final class ConsoleOutErrHandler extends Handler {
} catch (final UnsupportedEncodingException e) {
Log.logException(e);
}
final String ignoreCtrlChrStr = manager.getProperty(className + ".ignoreCtrlChr");
this.ignoreCtrlChr = (ignoreCtrlChrStr==null) ? false : "true".equalsIgnoreCase(ignoreCtrlChrStr);
}
}
private Level parseLevel(final String levelName) {
try {
return (levelName == null) ? Level.INFO : Level.parse(levelName);
@ -93,10 +93,10 @@ public final class ConsoleOutErrHandler extends Handler {
return Level.ALL;
}
}
private Filter makeFilter(final String name) {
if (name == null) return null;
Filter f = null;
try {
final Class<?> c = Class.forName(name);
@ -107,11 +107,11 @@ public final class ConsoleOutErrHandler extends Handler {
}
}
return f;
}
}
private Formatter makeFormatter(final String name) {
if (name == null) return null;
Formatter f = null;
try {
final Class<?> c = Class.forName(name);
@ -120,12 +120,13 @@ public final class ConsoleOutErrHandler extends Handler {
f = new SimpleFormatter();
}
return f;
}
}
@Override
public final void publish(final LogRecord record) {
if (!isLoggable(record)) return;
if (this.ignoreCtrlChr) {
String msg = record.getMessage();
if (msg != null) {
@ -133,8 +134,8 @@ public final class ConsoleOutErrHandler extends Handler {
}
record.setMessage(msg);
}
if (record.getLevel().intValue() >= splitLevel.intValue()) {
if (record.getLevel().intValue() >= this.splitLevel.intValue()) {
this.stdErrHandler.publish(record);
} else {
this.stdOutHandler.publish(record);
@ -142,27 +143,29 @@ public final class ConsoleOutErrHandler extends Handler {
flush();
}
@Override
public void flush() {
this.stdOutHandler.flush();
this.stdErrHandler.flush();
}
public void close() throws SecurityException {
this.stdOutHandler.close();
@Override
public synchronized void close() throws SecurityException {
this.stdOutHandler.close();
this.stdErrHandler.close();
}
@Override
public synchronized void setLevel(final Level newLevel) throws SecurityException {
super.setLevel(newLevel);
}
public void setLevels(final Level stdOutLevel, final Level splitLevel, final Level stdErrLevel) throws SecurityException {
this.stdOutHandler.setLevel(stdOutLevel);
this.splitLevel = splitLevel;
this.stdErrHandler.setLevel(stdErrLevel);
}
@Override
public void setFormatter(final Formatter newFormatter) throws SecurityException {
super.setFormatter(newFormatter);

@ -27,6 +27,7 @@
package net.yacy.kelondro.order;
import java.io.Serializable;
import java.util.Comparator;
import net.yacy.cora.document.UTF8;
@ -35,7 +36,9 @@ import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.order.Order;
public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Comparator<byte[]>, Cloneable {
public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Comparator<byte[]>, Cloneable, Serializable {
private static final long serialVersionUID=980647587445343851L;
public static final byte[] alpha_standard = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".getBytes();
public static final byte[] alpha_enhanced = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".getBytes();

@ -7,7 +7,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -24,88 +24,91 @@
package net.yacy.kelondro.order;
import java.io.Serializable;
public class Bitfield implements Cloneable {
public class Bitfield implements Cloneable, Serializable {
// the bitfield implements a binary array. Such arrays may be exported in a base64-String
private byte[] bb;
private static final long serialVersionUID=3605122793792478052L;
private byte[] bb;
public Bitfield() {
this(0);
}
public Bitfield(final byte[] b) {
if (b == null) this.bb = new byte[0]; else this.bb = b;
}
public Bitfield(final int bytelength) {
this.bb= new byte[bytelength];
for (int i = 0 ; i < bytelength; i++) bb[i] = 0;
for (int i = 0 ; i < bytelength; i++) this.bb[i] = 0;
}
public Bitfield(final int bytelength, final String exported) {
// imports a b64-encoded bitfield
final byte[] b = Base64Order.enhancedCoder.decode(exported);
if (b.length == bytelength) {
bb = b;
this.bb = b;
} else {
bb = new byte[bytelength];
this.bb = new byte[bytelength];
assert (b.length <= bytelength) : "exported = " + exported + " has bytelength = " + b.length + " > " + bytelength;
System.arraycopy(b, 0, bb, 0, Math.min(b.length, bytelength));
System.arraycopy(b, 0, this.bb, 0, Math.min(b.length, bytelength));
}
}
@Override
public Bitfield clone() {
final Bitfield theClone = new Bitfield(new byte[this.bb.length]);
System.arraycopy(this.bb, 0, theClone.bb, 0, this.bb.length);
return theClone;
}
public void set(final int pos, final boolean value) {
assert (pos >= 0);
final int slot = pos >> 3; // /8
if (slot >= bb.length) {
if (slot >= this.bb.length) {
// extend capacity
byte[] nb = new byte[slot + 1];
System.arraycopy(bb, 0, nb, 0, bb.length);
for (int i = bb.length; i < nb.length; i++) nb[i] = 0;
bb = nb;
System.arraycopy(this.bb, 0, nb, 0, this.bb.length);
for (int i = this.bb.length; i < nb.length; i++) nb[i] = 0;
this.bb = nb;
}
if (value) {
bb[slot] = (byte) (bb[slot] | (1 << (pos % 8)));
this.bb[slot] = (byte) (this.bb[slot] | (1 << (pos % 8)));
} else {
bb[slot] = (byte) (bb[slot] & (0xff ^ (1 << (pos % 8))));
this.bb[slot] = (byte) (this.bb[slot] & (0xff ^ (1 << (pos % 8))));
}
}
public boolean get(final int pos) {
assert (pos >= 0);
final int slot = pos >> 3; // /8
if (slot >= bb.length) return false;
return (bb[slot] & (1 << (pos % 8))) > 0;
if (slot >= this.bb.length) return false;
return (this.bb[slot] & (1 << (pos % 8))) > 0;
}
public int length() {
return bb.length << 3;
return this.bb.length << 3;
}
public String exportB64() {
return Base64Order.enhancedCoder.encode(bb);
return Base64Order.enhancedCoder.encode(this.bb);
}
public byte[] bytes() {
return bb;
return this.bb;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder(length());
for (int i = length() - 1; i >= 0; i--) sb.append((this.get(i)) ? '1' : '0');
return sb.toString();
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
@ -116,17 +119,17 @@ public class Bitfield implements Cloneable {
for (int i = 0; i < this.bb.length; i++) if (this.bb[i] != other.bb[i]) return false;
return true;
}
@Override
public int hashCode() {
return this.toString().hashCode();
}
public void and(final Bitfield x) {
final int c = Math.min(x.length(), this.length());
for (int i = 0; i < c; i++) set(i, this.get(i) && x.get(i));
}
public void or(final Bitfield x) {
final int c = Math.min(x.length(), this.length());
for (int i = 0; i < c; i++) set(i, this.get(i) || x.get(i));
@ -134,7 +137,7 @@ public class Bitfield implements Cloneable {
for (int i = c; i < x.length(); i++) set(i, x.get(i));
}
}
public void xor(final Bitfield x) {
final int c = Math.min(x.length(), this.length());
for (int i = 0; i < c; i++) set(i, this.get(i) != x.get(i));
@ -142,13 +145,13 @@ public class Bitfield implements Cloneable {
for (int i = c; i < x.length(); i++) set(i, x.get(i));
}
}
public boolean anyOf(final Bitfield x) {
final int c = Math.min(x.length(), this.length());
for (int i = 0; i < c; i++) if ((x.get(i)) && (this.get(i))) return true;
return false;
}
public boolean allOf(final Bitfield x) {
final int c = Math.min(x.length(), this.length());
for (int i = 0; i < c; i++) if ((x.get(i)) && (!(this.get(i)))) return false;
@ -157,7 +160,7 @@ public class Bitfield implements Cloneable {
}
return true;
}
public static void main(final String[] args) {
Bitfield test = new Bitfield(4);
final int l = test.length();
@ -166,19 +169,19 @@ public class Bitfield implements Cloneable {
for (int i = 0; i < l/2; i++) {
System.out.println(test.exportB64());
test.set(i, true);
System.out.println(i + ":" + test.toString());
System.out.println(i + ":" + test.toString());
}
for (int i = l/2; i < l; i++) {
System.out.println(test.exportB64());
test = new Bitfield(4, test.exportB64());
test.set(i, true);
System.out.println(i + ":" + test.toString());
System.out.println(i + ":" + test.toString());
}
System.out.println(test.exportB64());
for (int i = l - 1; i >= 0; i--) {
test.set(i, false);
System.out.println(i + ":" + test.toString());
System.out.println(i + ":" + test.toString());
}
System.out.println("after: " + test.toString());
System.out.println("after: " + test.toString());
}
}

@ -26,6 +26,7 @@
package net.yacy.kelondro.order;
import java.io.Serializable;
import java.util.Comparator;
import java.util.Iterator;
@ -34,8 +35,9 @@ import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.order.Order;
import net.yacy.kelondro.index.HandleSet;
public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrder, Comparator<byte[]>, Cloneable {
public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrder, Comparator<byte[]>, Cloneable, Serializable {
private static final long serialVersionUID=7170913936645013046L;
public static final ByteOrder naturalOrder = new NaturalOrder(true);
public static final Comparator<String> naturalComparator = new StringOrder(naturalOrder);
public NaturalOrder(final boolean ascending) {
@ -47,14 +49,17 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
return new HandleSet(keylength, this, space);
}
@Override
public boolean wellformed(final byte[] a) {
return true;
}
@Override
public boolean wellformed(final byte[] a, final int astart, final int alength) {
return true;
}
@Override
public final Order<byte[]> clone() {
final NaturalOrder o = new NaturalOrder(this.asc);
o.rotate(this.zero);
@ -75,6 +80,7 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
return null;
}
@Override
public final String signature() {
if (!this.asc) return "nd";
if ( this.asc) return "nu";
@ -92,6 +98,7 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
return c;
}
@Override
public final long cardinal(final byte[] key) {
if (this.zero == null) return cardinalI(key, 0, key.length);
final long zeroCardinal = cardinalI(this.zero, 0, this.zero.length);
@ -100,6 +107,7 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
return Long.MAX_VALUE - keyCardinal + zeroCardinal;
}
@Override
public long cardinal(final byte[] key, final int off, final int len) {
if (this.zero == null) return cardinalI(key, off, len);
final long zeroCardinal = cardinalI(this.zero, 0, this.zero.length);
@ -150,6 +158,7 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
// is less than, equal to, or greater than the second.
// two arrays are also equal if one array is a subset of the other's array
// with filled-up char(0)-values
@Override
public final int compare(final byte[] a, final byte[] b) {
if (a.length == b.length) {
return (this.asc) ? compare0(a, b, a.length) : compare0(b, 0, a, 0, a.length);
@ -165,10 +174,12 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
return (a.length > b.length) ? -1 : 1;
}
@Override
public final int compare(final byte[] a, final byte[] b, final int length) {
return (this.asc) ? compare0(a, b, length) : compare0(b, a, length);
}
@Override
public final int compare(final byte[] a, final int aoffset, final byte[] b, final int boffset, final int length) {
return (this.asc) ? compare0(a, aoffset, b, boffset, length) : compare0(b, boffset, a, aoffset, length);
}
@ -193,6 +204,7 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
return sig(az - bz);
}
@Override
public final boolean equal(final byte[] a, final byte[] b) {
if ((a == null) && (b == null)) return true;
if ((a == null) || (b == null)) return false;
@ -206,6 +218,7 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
return true;
}
@Override
public final boolean equal(final byte[] a, int astart, final byte[] b, int bstart, int length) {
if ((a == null) && (b == null)) return true;
if ((a == null) || (b == null)) return false;
@ -284,10 +297,12 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
this.b256Iterator = b256Iterator;
}
@Override
public boolean hasNext() {
return this.b256Iterator.hasNext();
}
@Override
public Long next() {
final byte[] b = this.b256Iterator.next();
assert (b != null);
@ -295,6 +310,7 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
return Long.valueOf(decodeLong(b));
}
@Override
public void remove() {
this.b256Iterator.remove();
}

@ -10,7 +10,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -27,24 +27,29 @@
package net.yacy.kelondro.order;
import java.io.Serializable;
import java.util.Comparator;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.order.Order;
public class StringOrder implements Comparator<String> {
public class StringOrder implements Comparator<String>, Serializable {
private static final long serialVersionUID=-5443022063770309585L;
public ByteOrder baseOrder;
public StringOrder(final ByteOrder base) {
this.baseOrder = base;
}
public StringOrder(final Order<byte[]> base) {
this.baseOrder = (ByteOrder) base;
}
@Override
public final int compare(final String s1, final String s2) {
return baseOrder.compare(UTF8.getBytes(s1), UTF8.getBytes(s2));
return this.baseOrder.compare(UTF8.getBytes(s1), UTF8.getBytes(s2));
}
}

@ -54,6 +54,8 @@ import net.yacy.kelondro.order.Base64Order;
*/
public class ReferenceContainer<ReferenceType extends Reference> extends RowSet {
private static final long serialVersionUID=-540567425172727979L;
private byte[] termHash;
protected ReferenceFactory<ReferenceType> factory;
public static int maxReferences = 0; // overwrite this to enable automatic index shrinking. 0 means no shrinking
@ -255,16 +257,19 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
this.rowEntryIterator = iterator();
}
@Override
public boolean hasNext() {
return this.rowEntryIterator.hasNext();
}
@Override
public ReferenceType next() {
final Row.Entry rentry = this.rowEntryIterator.next();
if (rentry == null) return null;
return ReferenceContainer.this.factory.produceSlow(rentry);
}
@Override
public void remove() {
this.rowEntryIterator.remove();
}
@ -562,10 +567,12 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
return pivot;
}
@Override
public synchronized String toString() {
return "C[" + ASCII.String(this.termHash) + "] has " + size() + " entries";
}
@Override
public int hashCode() {
return (int) Base64Order.enhancedCoder.decodeLong(this.termHash, 0, 4);
}

@ -75,7 +75,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
true);
}
public void close() {
public synchronized void close() {
this.array.close(true);
}

@ -101,7 +101,7 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
}
@Override
public void close() {
public synchronized void close() {
this.cache = null;
}

@ -83,7 +83,7 @@ public class ReferenceIterator <ReferenceType extends Reference> extends LookAhe
return null;
}
public void close() {
public synchronized void close() {
if (this.blobs != null) this.blobs.close();
this.blobs = null;
}

@ -108,23 +108,28 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
@Override
public long mem() {
return 0;
}
@Override
public byte[] smallestKey() {
return null;
}
@Override
public byte[] largestKey() {
return null;
}
@Override
public String filename() {
return "dbtest." + this.theDBConnection.hashCode();
}
public void close() {
@Override
public synchronized void close() {
if (this.theDBConnection != null) try {
this.theDBConnection.close();
} catch (final SQLException e) {
@ -133,6 +138,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
this.theDBConnection = null;
}
@Override
public int size() {
int size = -1;
try {
@ -155,14 +161,17 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
}
@Override
public boolean isEmpty() {
return size() == 0;
}
@Override
public Row row() {
return this.rowdef;
}
@Override
public boolean has(final byte[] key) {
try {
return (get(key, false) != null);
@ -171,10 +180,12 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
}
@Override
public ArrayList<RowCollection> removeDoubles() {
return new ArrayList<RowCollection>();
}
@Override
public Row.Entry get(final byte[] key, final boolean forcecopy) throws IOException {
try {
final String sqlQuery = "SELECT value from test where hash = ?";
@ -199,6 +210,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
}
@Override
public Map<byte[], Row.Entry> get(final Collection<byte[]> keys, final boolean forcecopy) throws IOException, InterruptedException {
final Map<byte[], Row.Entry> map = new TreeMap<byte[], Row.Entry>(row().objectOrder);
Row.Entry entry;
@ -209,6 +221,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
return map;
}
@Override
public Row.Entry replace(final Row.Entry row) throws IOException {
try {
final Row.Entry oldEntry = remove(row.getPrimaryKeyBytes());
@ -231,6 +244,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
}
@Override
public boolean put(final Row.Entry row) throws IOException {
try {
final String sqlQuery = "INSERT INTO test (" +
@ -252,6 +266,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
}
@Override
public synchronized void addUnique(final Row.Entry row) throws IOException {
throw new UnsupportedOperationException();
}
@ -264,6 +279,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
throw new UnsupportedOperationException();
}
@Override
public Row.Entry remove(final byte[] key) throws IOException {
PreparedStatement sqlStatement = null;
try {
@ -292,23 +308,28 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
}
@Override
public boolean delete(final byte[] key) throws IOException {
return remove(key) != null;
}
@Override
public Row.Entry removeOne() {
return null;
}
@Override
public List<Row.Entry> top(final int count) throws IOException {
return null;
}
@Override
public CloneableIterator<Row.Entry> rows(final boolean up, final byte[] startKey) throws IOException {
// Objects are of type kelondroRow.Entry
return null;
}
@Override
public Iterator<Entry> iterator() {
try {
return rows();
@ -317,10 +338,12 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
}
}
@Override
public CloneableIterator<Row.Entry> rows() throws IOException {
return null;
}
@Override
public CloneableIterator<byte[]> keys(final boolean up, final byte[] startKey) {
// Objects are of type byte[]
return null;
@ -362,10 +385,12 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
return new int[]{0,0,0,0,0,0,0,0,0,0};
}
@Override
public void clear() {
// do nothing
}
@Override
public void deleteOnExit() {
// do nothing
}

@ -689,7 +689,9 @@ public class Table implements Index, Iterable<Row.Entry> {
continue;
}
this.file.put(i, p, 0);
this.index.put(lr.getPrimaryKeyBytes(), i);
byte[] pk = lr.getPrimaryKeyBytes();
if (pk == null) continue;
this.index.put(pk, i);
break;
}
}

@ -161,7 +161,7 @@ public class XMLTables {
return null;
}
public void close() throws IOException {
public synchronized void close() throws IOException {
commit(true);
}

@ -122,7 +122,7 @@ public class NewsDB {
}
}
public void close() {
public synchronized void close() {
if (this.news != null) this.news.close();
this.news = null;
}
@ -329,4 +329,4 @@ public class NewsDB {
}
}
}
}

@ -98,7 +98,7 @@ public class NewsQueue implements Iterable<NewsDB.Record> {
}
}
public void close() {
public synchronized void close() {
if (this.queueStack != null) this.queueStack.close();
this.queueStack = null;
}
@ -224,4 +224,4 @@ public class NewsQueue implements Iterable<NewsDB.Record> {
}
}
}

@ -28,6 +28,7 @@
package net.yacy.peers.graphics;
import java.io.File;
import java.io.Serializable;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
@ -342,8 +343,9 @@ public class WebStructureGraph
hosthashes);
}
public static class HostReferenceFactory implements ReferenceFactory<HostReference>
{
public static class HostReferenceFactory implements ReferenceFactory<HostReference>, Serializable {
private static final long serialVersionUID=7461135579006223155L;
private static final Row hostReferenceRow = new Row(
"String h-6, Cardinal m-4 {b256}, Cardinal c-4 {b256}",
@ -369,8 +371,9 @@ public class WebStructureGraph
}
public static class HostReference extends AbstractReference implements Reference
{
public static class HostReference extends AbstractReference implements Reference, Serializable {
private static final long serialVersionUID=-9170091435821206765L;
private final Row.Entry entry;
@ -749,7 +752,7 @@ public class WebStructureGraph
}
}
public void close() {
public synchronized void close() {
// finish dns resolving queue
if ( this.publicRefDNSResolvingWorker.isAlive() ) {
log.logInfo("Waiting for the DNS Resolving Queue to terminate");

@ -91,6 +91,7 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.TimeoutRequest;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.protocol.http.ProxySettings;
import net.yacy.cora.services.federated.solr.SolrDoc;
import net.yacy.cora.services.federated.solr.SolrShardingConnector;
import net.yacy.cora.services.federated.solr.SolrShardingSelection;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
@ -1551,7 +1552,7 @@ public final class Switchboard extends serverSwitch
return this.crawler.clear();
}
public void close() {
public synchronized void close() {
this.log.logConfig("SWITCHBOARD SHUTDOWN STEP 1: sending termination signal to managed threads:");
MemoryTracker.stopSystemProfiling();
terminateAllThreads(true);
@ -2428,10 +2429,8 @@ public final class Switchboard extends serverSwitch
// in case that this happens it appears that the doc id is the right one
}
try {
this.indexSegments
.segment(Segments.Process.LOCALCRAWLING)
.getSolr()
.add(this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc));
SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc);
this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().add(solrDoc);
} catch ( final IOException e ) {
Log.logWarning(
"SOLR",

@ -296,6 +296,7 @@ public final class SwitchboardConstants {
* <p>Name of the setting how many active crawler-threads may maximal be running on the same time</p>
*/
public static final String CRAWLER_THREADS_ACTIVE_MAX = "crawler.MaxActiveThreads";
public static final String CRAWLER_FOLLOW_REDIRECTS = "crawler.http.FollowRedirects";
public static final String YACY_MODE_DEBUG = "yacyDebugMode";
/**

@ -243,7 +243,7 @@ public class DocumentIndex extends Segment
* close the index. This terminates all worker threads and then closes the segment.
*/
@Override
public void close() {
public synchronized void close() {
// send termination signal to worker threads
for ( @SuppressWarnings("unused")
final Worker element : this.worker ) {

@ -203,6 +203,7 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
}
public boolean exists(final byte[] urlHash) {
if (urlHash == null) return false;
try {
if (this.solr != null && this.solr.exists(ASCII.String(urlHash))) {
return true;

@ -303,7 +303,7 @@ public class Segment {
return refCount;
}
public void close() {
public synchronized void close() {
this.termIndex.close();
this.urlMetadata.close();
this.urlCitationIndex.close();

@ -176,7 +176,7 @@ public class Segments implements Iterable<Segment> {
segment(this.process_assignment.get(process)).close();
}
public void close() {
public synchronized void close() {
if (this.segments != null) for (final Segment s: this.segments.values()) s.close();
this.segments = null;
}

@ -26,6 +26,7 @@ package net.yacy.search.index;
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.util.ArrayList;
@ -36,6 +37,7 @@ import java.util.Map;
import java.util.Properties;
import java.util.Set;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.HeaderFramework;
@ -491,6 +493,33 @@ public class SolrConfiguration extends ConfigurationSet {
return a;
}
/**
* register an entry as error document
* @param digestURI
* @param failReason
* @param httpstatus
* @throws IOException
*/
public SolrDoc err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final SolrDoc solrdoc = new SolrDoc();
addSolr(solrdoc, SolrField.id, ASCII.String(digestURI.hash()));
addSolr(solrdoc, SolrField.sku, digestURI.toNormalform(true, false));
final InetAddress address = digestURI.getInetAddress();
if (address != null) addSolr(solrdoc, SolrField.ip_s, address.getHostAddress());
if (digestURI.getHost() != null) addSolr(solrdoc, SolrField.host_s, digestURI.getHost());
// path elements of link
final String path = digestURI.getPath();
if (path != null) {
final String[] paths = path.split("/");
if (paths.length > 0) addSolr(solrdoc, SolrField.paths_txt, paths);
}
addSolr(solrdoc, SolrField.failreason_t, failReason);
addSolr(solrdoc, SolrField.httpstatus_i, httpstatus);
return solrdoc;
}
/*
standard solr schema

@ -436,7 +436,7 @@ public class TarBuffer {
* current block before closing.
* @throws IOException on error
*/
public void close() throws IOException {
public synchronized void close() throws IOException {
if (this.debug) {
System.err.println("TarBuffer.closeBuffer().");
}

@ -111,7 +111,7 @@ public class TarInputStream extends FilterInputStream {
* @throws IOException on error
*/
@Override
public void close() throws IOException {
public synchronized void close() throws IOException {
this.buffer.close();
}

@ -141,7 +141,7 @@ public class TarOutputStream extends FilterOutputStream {
* @throws IOException on error
*/
@Override
public void close() throws IOException {
public synchronized void close() throws IOException {
if (!closed) {
this.finish();
this.buffer.close();

Loading…
Cancel
Save