From 2f847071e15dc16002b4d31778b962c007c387a4 Mon Sep 17 00:00:00 2001 From: sixcooler Date: Wed, 2 Sep 2015 19:10:39 +0200 Subject: [PATCH 1/9] ignore /DATA (Eclipse Mars) --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f47c9a147..bca830555 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ lib/yacycore.jar libbuild/svnRevNr.jar libbuild/GitRevTask.jar gen/** -DATA/ +/DATA classes/ RELEASE/ /yacy.pid From 20e18d79f8927a8b9c7d7f9e389b82077495ced2 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 10 Nov 2015 01:29:13 +0100 Subject: [PATCH 2/9] harmonize document title for archive parsers --- source/net/yacy/document/parser/bzipParser.java | 5 +++-- source/net/yacy/document/parser/gzipParser.java | 4 +++- source/net/yacy/document/parser/sevenzipParser.java | 7 +++++-- source/net/yacy/document/parser/tarParser.java | 7 ++++--- source/net/yacy/document/parser/zipParser.java | 6 ++++-- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/source/net/yacy/document/parser/bzipParser.java b/source/net/yacy/document/parser/bzipParser.java index 0dc0daad6..dca586f3c 100644 --- a/source/net/yacy/document/parser/bzipParser.java +++ b/source/net/yacy/document/parser/bzipParser.java @@ -34,6 +34,7 @@ import java.util.Date; import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -92,7 +93,7 @@ public class bzipParser extends AbstractParser implements Parser { } zippedContent.close(); out.close(); - + final String filename = location.getFileName(); // create maindoc for this bzip container, register with supplied url & mime maindoc = new Document( location, @@ -101,7 +102,7 @@ public class bzipParser extends AbstractParser implements Parser { this, null, null, - null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null, diff --git a/source/net/yacy/document/parser/gzipParser.java b/source/net/yacy/document/parser/gzipParser.java index 504dd1116..c8590530f 100644 --- a/source/net/yacy/document/parser/gzipParser.java +++ b/source/net/yacy/document/parser/gzipParser.java @@ -35,6 +35,7 @@ import java.util.zip.GZIPInputStream; import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -90,6 +91,7 @@ public class gzipParser extends AbstractParser implements Parser { } zippedContent.close(); out.close(); + final String filename = location.getFileName(); // create maindoc for this gzip container, register with supplied url & mime maindoc = new Document( location, @@ -98,7 +100,7 @@ public class gzipParser extends AbstractParser implements Parser { this, null, null, - null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null, diff --git a/source/net/yacy/document/parser/sevenzipParser.java b/source/net/yacy/document/parser/sevenzipParser.java index ddfdd8153..c5cde469f 100644 --- a/source/net/yacy/document/parser/sevenzipParser.java +++ b/source/net/yacy/document/parser/sevenzipParser.java @@ -35,6 +35,7 @@ import java.io.OutputStream; import java.util.Date; import net.yacy.cora.document.id.AnchorURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.AbstractParser; import net.yacy.document.Document; @@ -62,6 +63,8 @@ public class sevenzipParser extends AbstractParser implements Parser { final String charset, final int timezoneOffset, final IInStream source) throws Parser.Failure, InterruptedException { + + final String filename = location.getFileName(); final Document doc = new Document( location, mimeType, @@ -69,12 +72,12 @@ public class sevenzipParser extends AbstractParser implements Parser { this, null, null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title, null, null, null, null, - null, - 0.0f, 0.0f, + 0.0d, 0.0d, (Object)null, null, null, diff --git a/source/net/yacy/document/parser/tarParser.java b/source/net/yacy/document/parser/tarParser.java index c5a5fbd03..ae25b4776 100644 --- a/source/net/yacy/document/parser/tarParser.java +++ b/source/net/yacy/document/parser/tarParser.java @@ -72,7 +72,8 @@ public class tarParser extends AbstractParser implements Parser { final int timezoneOffset, InputStream source) throws Parser.Failure, InterruptedException { - final String ext = MultiProtocolURL.getFileExtension(location.getFileName()); + final String filename = location.getFileName(); + final String ext = MultiProtocolURL.getFileExtension(filename); if (ext.equals("gz") || ext.equals("tgz")) { try { source = new GZIPInputStream(source); @@ -84,14 +85,14 @@ public class tarParser extends AbstractParser implements Parser { final TarArchiveInputStream tis = new TarArchiveInputStream(source); // create maindoc for this bzip container - Document maindoc = new Document( + final Document maindoc = new Document( location, mimeType, charset, this, null, null, - null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null, diff --git a/source/net/yacy/document/parser/zipParser.java b/source/net/yacy/document/parser/zipParser.java index 155d669ba..a6718aea1 100644 --- a/source/net/yacy/document/parser/zipParser.java +++ b/source/net/yacy/document/parser/zipParser.java @@ -33,6 +33,7 @@ import java.util.zip.ZipInputStream; import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -79,15 +80,16 @@ public class zipParser extends AbstractParser implements Parser { ZipEntry entry; final ZipInputStream zis = new ZipInputStream(source); + final String filename = location.getFileName(); // create maindoc for this zip container with supplied url and mime - Document maindoc = new Document( + final Document maindoc = new Document( location, mimeType, charset, this, null, null, - null, + AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null, From 4a905ec1345451ae030a5a538b607427a5b3f645 Mon Sep 17 00:00:00 2001 From: sixcooler Date: Tue, 10 Nov 2015 20:27:17 +0100 Subject: [PATCH 3/9] fix to not let the AccessTracker-Log grow to much, but have enough data to monitor. (+gitignore-correction) --- .../net/yacy/search/query/AccessTracker.java | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/source/net/yacy/search/query/AccessTracker.java b/source/net/yacy/search/query/AccessTracker.java index b050ee4ae..07c06b15d 100644 --- a/source/net/yacy/search/query/AccessTracker.java +++ b/source/net/yacy/search/query/AccessTracker.java @@ -48,7 +48,8 @@ import net.yacy.search.EventTracker; public class AccessTracker { - private final static long DUMP_PERIOD = 60000L; + private final static long DUMP_PERIOD = 3600000L; + private final static int DUMP_SIZE = 50000; private static final int minSize = 100; private static final int maxSize = 1000; @@ -89,6 +90,8 @@ public class AccessTracker { private static final LinkedList remoteSearches = new LinkedList(); private static final ArrayList log = new ArrayList(); private static long lastLogDump = System.currentTimeMillis(); + private static long localCount = 0; + private static long remoteCount = 0; private static File dumpFile = null; public static void setDumpFile(File f) { @@ -141,9 +144,9 @@ public class AccessTracker { return null; } - public static int size(final Location location) { - if (location == Location.local) synchronized (localSearches) {return localSearches.size();} - if (location == Location.remote) synchronized (remoteSearches) {return remoteSearches.size();} + public static long size(final Location location) { + if (location == Location.local) synchronized (localSearches) {return localCount + localSearches.size();} + if (location == Location.remote) synchronized (remoteSearches) {return remoteCount + remoteSearches.size();} return 0; } @@ -155,10 +158,6 @@ public class AccessTracker { public static void addToDump(String querystring, String resultcount) { addToDump(querystring, resultcount, new Date()); - if (lastLogDump + DUMP_PERIOD < System.currentTimeMillis()) { - lastLogDump = System.currentTimeMillis(); - dumpLog(); - } } public static void addToDump(String querystring, String resultcount, Date d) { @@ -173,12 +172,21 @@ public class AccessTracker { synchronized (log) { log.add(sb.toString()); } + if (log.size() > DUMP_SIZE || lastLogDump + DUMP_PERIOD < System.currentTimeMillis()) { + dumpLog(); + } } public static void dumpLog() { + lastLogDump = System.currentTimeMillis(); + localCount += localSearches.size(); while (!localSearches.isEmpty()) { addToDump(localSearches.removeFirst(), 0); } + remoteCount += remoteSearches.size(); + while (!remoteSearches.isEmpty()) { + addToDump(remoteSearches.removeFirst(), 0); + } Thread t = new Thread() { @Override public void run() { From fca353e5eb292c20c020e0d00e786a0e331327be Mon Sep 17 00:00:00 2001 From: sixcooler Date: Tue, 10 Nov 2015 20:32:05 +0100 Subject: [PATCH 4/9] set startuptype of most solr handlers to lazy --- defaults/solr/solrconfig.xml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/defaults/solr/solrconfig.xml b/defaults/solr/solrconfig.xml index a57b7339f..da58799fb 100644 --- a/defaults/solr/solrconfig.xml +++ b/defaults/solr/solrconfig.xml @@ -838,7 +838,7 @@ of SearchComponents (see below) and supports distributed queries across multiple shards --> - + @@ -902,7 +902,7 @@ - + explicit json @@ -923,7 +923,7 @@ also always fetch the complete index from the leader because a partial sync will not be possible in the absence of this handler. --> - + true json @@ -936,7 +936,7 @@ Do not change these defaults. --> - + {!xport} xsort @@ -966,7 +966,7 @@ This handler will pick a response format to match the input if the 'wt' parameter is not explicit --> - + - + solrpingquery @@ -1117,7 +1117,7 @@ - + explicit true @@ -1137,7 +1137,7 @@ https://wiki.apache.org/solr/SolrCloud/ --> - +