From 0c1b29f3c9055c211a70e0efbdc94fbcdeff5ddc Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 1 Jun 2011 19:31:56 +0000 Subject: [PATCH] - applied many small performance hacks - added a memory limitation in the zip parser and the pdf parser - added a search throttling: if there are too many search queries are still to be computed, then new requests are not accepted for some time. if after a one second still no space is there to perform another search, the search terminates with no results. this case should only happen in case of DoS-like situations and in case of strong load on a peer like if it is integrated in metager. - added a search cache deletion process that removes search requests in case that throttling happens git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7766 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/data/ListManager.java | 10 +- .../http/server/ServerSideIncludes.java | 22 +- .../de/anomic/http/server/TemplateEngine.java | 47 +- source/de/anomic/search/ReferenceOrder.java | 170 +++-- source/de/anomic/search/ResultFetcher.java | 238 +++---- source/de/anomic/search/SearchEventCache.java | 114 ++-- .../yacy/cora/storage/ConcurrentScoreMap.java | 132 ++-- source/net/yacy/document/Document.java | 273 ++++---- source/net/yacy/document/TextParser.java | 165 ++--- .../document/parser/html/ContentScraper.java | 362 +++++------ .../yacy/document/parser/html/Evaluation.java | 87 +-- .../parser/html/TransformerWriter.java | 354 +++++----- .../net/yacy/document/parser/pdfParser.java | 92 +-- .../net/yacy/document/parser/zipParser.java | 55 +- source/net/yacy/kelondro/blob/ArrayStack.java | 2 +- source/net/yacy/kelondro/blob/Compressor.java | 222 +++---- source/net/yacy/kelondro/blob/MapHeap.java | 8 +- .../kelondro/data/word/WordReferenceVars.java | 182 +++--- source/net/yacy/kelondro/index/RowSet.java | 214 +++--- source/net/yacy/kelondro/table/Table.java | 611 +++++++++--------- .../net/yacy/repository/LoaderDispatcher.java | 192 +++--- 21 files changed, 1789 insertions(+), 1763 deletions(-) diff --git a/source/de/anomic/data/ListManager.java b/source/de/anomic/data/ListManager.java index 56d254692..a506c44fc 100644 --- a/source/de/anomic/data/ListManager.java +++ b/source/de/anomic/data/ListManager.java @@ -40,10 +40,14 @@ import net.yacy.repository.BlacklistFile; import de.anomic.search.SearchEventCache; import de.anomic.search.Switchboard; import java.util.List; +import java.util.regex.Pattern; // The Naming of the functions is a bit strange... public class ListManager { + + private final static Pattern commaPattern = Pattern.compile(","); + public static Switchboard switchboard = null; public static File listsPath = null; @@ -143,7 +147,7 @@ public class ListManager { ArrayList list; if (string != null && string.length() > 0) { - list = new ArrayList(Arrays.asList(string.split(","))); + list = new ArrayList(Arrays.asList(commaPattern.split(string, 0))); } else { list = new ArrayList(); } @@ -161,7 +165,7 @@ public class ListManager { HashSet set; if (string != null) { - set = new HashSet(Arrays.asList(string.split(","))); + set = new HashSet(Arrays.asList(commaPattern.split(string, 0))); } else { set = new HashSet(); } @@ -180,7 +184,7 @@ public class ListManager { Vector v; if (string != null) { - v = new Vector(Arrays.asList(string.split(","))); + v = new Vector(Arrays.asList(commaPattern.split(string, 0))); } else { v = new Vector(); } diff --git a/source/de/anomic/http/server/ServerSideIncludes.java b/source/de/anomic/http/server/ServerSideIncludes.java index ee924faa1..3c599a23c 100644 --- a/source/de/anomic/http/server/ServerSideIncludes.java +++ b/source/de/anomic/http/server/ServerSideIncludes.java @@ -10,7 +10,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -31,7 +31,7 @@ import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; -import net.yacy.cora.document.UTF8; +import net.yacy.cora.document.ASCII; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.util.ByteBuffer; @@ -42,12 +42,12 @@ public class ServerSideIncludes { public static void writeSSI(final ByteBuffer in, final OutputStream out, final String authorization, final String requesthost) throws IOException { writeSSI(in, 0, out, authorization, requesthost); } - + public static void writeSSI(final ByteBuffer in, int off, final OutputStream out, final String authorization, final String requesthost) throws IOException { - int p = in.indexOf(UTF8.getBytes(""), p + 10); + q = in.indexOf(ASCII.getBytes("-->"), p + 10); if (out instanceof ChunkedOutputStream) { ((ChunkedOutputStream) out).write(in, off, p - off); } else { @@ -55,7 +55,7 @@ public class ServerSideIncludes { } parseSSI(in, p, out, authorization, requesthost); off = q + 3; - p = in.indexOf(UTF8.getBytes("