From 461a0ce052eab5498d3b239aef52fca0731368a9 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 5 Jun 2012 20:03:43 +0200 Subject: [PATCH 1/5] removed warnings --- htroot/sharedBlacklist_p.java | 2 -- source/de/anomic/crawler/CrawlProfile.java | 1 - source/de/anomic/crawler/CrawlStacker.java | 4 ---- .../sorting/WeakPriorityBlockingQueue.java | 24 ++++++++++++------- .../yacy/document/content/dao/ImportDump.java | 3 --- .../importer/OAIListFriendsLoader.java | 9 +++++-- .../yacy/kelondro/blob/MapColumnIndex.java | 3 ++- source/net/yacy/kelondro/util/ByteArray.java | 2 -- source/net/yacy/search/query/RWIProcess.java | 3 --- .../net/yacy/search/query/SnippetProcess.java | 2 -- 10 files changed, 25 insertions(+), 28 deletions(-) diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index c78337521..2704cc9d5 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -210,7 +210,6 @@ public class sharedBlacklist_p { prop.put("page", "1"); //result page prop.put("status", STATUS_ENTRIES_ADDED); //list of added Entries - int count = 0;//couter of added entries PrintWriter pw = null; try { // open the blacklist file @@ -238,7 +237,6 @@ public class sharedBlacklist_p { // append the item to the file pw.println(newItem); - count++; if (Switchboard.urlBlacklist != null) { final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index c5a6e24bc..ecbffa3e1 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -34,7 +34,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Pattern; import de.anomic.server.serverObjects; -import de.anomic.server.servletProperties; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.MultiProtocolURI; diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 1da8a6726..966ecabac 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -67,7 +67,6 @@ public final class CrawlStacker { private final Log log = new Log("STACKCRAWL"); private final WorkflowProcessor fastQueue, slowQueue; - private long dnsMiss; private final CrawlQueues nextQueue; private final CrawlSwitchboard crawler; private final Segment indexSegment; @@ -89,8 +88,6 @@ public final class CrawlStacker { this.crawler = cs; this.indexSegment = indexSegment; this.peers = peers; - //this.dnsHit = 0; - this.dnsMiss = 0; this.acceptLocalURLs = acceptLocalURLs; this.acceptGlobalURLs = acceptGlobalURLs; this.domainList = domainList; @@ -179,7 +176,6 @@ public final class CrawlStacker { } else { try { this.slowQueue.enQueue(entry); - this.dnsMiss++; } catch (final InterruptedException e) { Log.logException(e); } diff --git a/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java b/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java index 879e6e5ed..720b3674b 100644 --- a/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java +++ b/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java @@ -25,6 +25,7 @@ package net.yacy.cora.sorting; +import java.io.Serializable; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; @@ -39,10 +40,11 @@ import java.util.concurrent.TimeUnit; * If the queue gets larger that the given maxsize, then elements from the tail of the queue * are drained (deleted). */ -public class WeakPriorityBlockingQueue { +public class WeakPriorityBlockingQueue implements Serializable { - - private final TreeSet> queue; // object within the stack, ordered using a TreeSet + private static final long serialVersionUID = 4573442576760691887L; + + private final TreeSet> queue; // object within the stack, ordered using a TreeSet private final Semaphore enqueued; // semaphore for elements in the stack private final ArrayList> drained; // objects that had been on the stack but had been removed protected int maxsize; @@ -274,7 +276,7 @@ public class WeakPriorityBlockingQueue { return this.drained.iterator(); } - public interface Element { + public interface Element extends Serializable { public long getWeight(); public E getElement(); public boolean equals(Element o); @@ -284,9 +286,11 @@ public class WeakPriorityBlockingQueue { public String toString(); } - protected abstract static class AbstractElement implements Element { + protected abstract static class AbstractElement implements Element, Serializable { + + private static final long serialVersionUID = -7026597258248026566L; - public long weight; + public long weight; public E element; public long getWeight() { @@ -318,7 +322,9 @@ public class WeakPriorityBlockingQueue { */ public static class NaturalElement extends AbstractElement implements Element, Comparable>, Comparator> { - public NaturalElement(final E element, final long weight) { + private static final long serialVersionUID = 6816543012966928794L; + + public NaturalElement(final E element, final long weight) { this.element = element; this.weight = weight; } @@ -347,7 +353,9 @@ public class WeakPriorityBlockingQueue { */ public static class ReverseElement extends AbstractElement implements Element, Comparable>, Comparator> { - public ReverseElement(final E element, final long weight) { + private static final long serialVersionUID = -8166724491837508921L; + + public ReverseElement(final E element, final long weight) { this.element = element; this.weight = weight; } diff --git a/source/net/yacy/document/content/dao/ImportDump.java b/source/net/yacy/document/content/dao/ImportDump.java index 4bb67575d..c33abbe27 100644 --- a/source/net/yacy/document/content/dao/ImportDump.java +++ b/source/net/yacy/document/content/dao/ImportDump.java @@ -59,7 +59,6 @@ public class ImportDump { FileUtils.copy(dump, baos); String s = UTF8.String(baos.toByteArray()); - int batchSize = 0; int p, q; String t; loop: while (s.length() > 0) { @@ -78,11 +77,9 @@ public class ImportDump { s = s.substring(p); //if (batchSize + t.length() >= maxBatch) { statement.executeBatch(); - batchSize = 0; //} System.out.println(t); statement.addBatch(t); - batchSize += t.length(); } statement.executeBatch(); } catch (SQLException e) { diff --git a/source/net/yacy/document/importer/OAIListFriendsLoader.java b/source/net/yacy/document/importer/OAIListFriendsLoader.java index 34d92a873..9331a82b1 100644 --- a/source/net/yacy/document/importer/OAIListFriendsLoader.java +++ b/source/net/yacy/document/importer/OAIListFriendsLoader.java @@ -27,6 +27,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.Serializable; import java.net.MalformedURLException; import java.util.HashMap; import java.util.Map; @@ -51,7 +52,7 @@ import org.xml.sax.helpers.DefaultHandler; import de.anomic.crawler.retrieval.Response; -public class OAIListFriendsLoader { +public class OAIListFriendsLoader implements Serializable { private static final long serialVersionUID = -8705115274655024604L; @@ -117,7 +118,7 @@ public class OAIListFriendsLoader { } // get a resumption token using a SAX xml parser from am input stream - private static class Parser extends DefaultHandler { + public static class Parser extends DefaultHandler { // class variables private final StringBuilder buffer; @@ -161,6 +162,10 @@ public class OAIListFriendsLoader { http://oai.repec.openlib.org/ */ + + public int getCounter() { + return this.recordCounter; + } public void startElement(final String uri, final String name, final String tag, final Attributes atts) throws SAXException { if ("baseURL".equals(tag)) { diff --git a/source/net/yacy/kelondro/blob/MapColumnIndex.java b/source/net/yacy/kelondro/blob/MapColumnIndex.java index 34ed4ab93..c8e0f8b4b 100644 --- a/source/net/yacy/kelondro/blob/MapColumnIndex.java +++ b/source/net/yacy/kelondro/blob/MapColumnIndex.java @@ -25,6 +25,7 @@ package net.yacy.kelondro.blob; +import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -38,7 +39,7 @@ import net.yacy.kelondro.order.NaturalOrder; /** * a mapping from a column name to maps with the value of the columns to the primary keys where the entry exist in the table */ -public class MapColumnIndex { +public class MapColumnIndex implements Serializable { private static final long serialVersionUID=-424741536889467566L; diff --git a/source/net/yacy/kelondro/util/ByteArray.java b/source/net/yacy/kelondro/util/ByteArray.java index 6dc21e9d2..fe05ea19f 100644 --- a/source/net/yacy/kelondro/util/ByteArray.java +++ b/source/net/yacy/kelondro/util/ByteArray.java @@ -44,12 +44,10 @@ import net.yacy.cora.order.ByteOrder; public class ByteArray { private final byte[] buffer; - private final int hash; public ByteArray(final byte[] bb) { this.buffer = bb; - this.hash = 0; } public int length() { diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index c2aafb61b..5c19b2eb7 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -271,7 +271,6 @@ public final class RWIProcess extends Thread || pattern.equals("smb://.*") || pattern.equals("file://.*"); long remaining; - int count = 0; pollloop: while ( true ) { remaining = timeout - System.currentTimeMillis(); if (remaining <= 0) { @@ -287,8 +286,6 @@ public final class RWIProcess extends Thread break pollloop; } assert (iEntry.urlhash().length == index.row().primaryKeyLength); - //if (iEntry.urlHash().length() != index.row().primaryKeyLength) continue; - count++; // increase flag counts for ( int j = 0; j < 32; j++ ) { diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index 6a6188210..e1cf414f0 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -461,7 +461,6 @@ public class SnippetProcess { final boolean nav_topics = SnippetProcess.this.query.navigators.equals("all") || SnippetProcess.this.query.navigators.indexOf("topics",0) >= 0; try { //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis())); - int loops = 0; while (this.shallrun && System.currentTimeMillis() < this.timeout) { //Log.logInfo("SnippetProcess", "***** timeleft = " + (this.timeout - System.currentTimeMillis())); this.lastLifeSign = System.currentTimeMillis(); @@ -509,7 +508,6 @@ public class SnippetProcess { } } - loops++; resultEntry = fetchSnippet(page, solrContent, this.cacheStrategy); // does not fetch snippets if snippetMode == 0 if (resultEntry == null) { From 9ad84c5e9f2b97b1f7aa1dec1573138d2b8369a9 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 7 Jun 2012 12:36:05 +0200 Subject: [PATCH 2/5] fix for NPE in PerformanceMemory --- htroot/PerformanceMemory_p.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index 4a1b5942f..e37d66b33 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -116,7 +116,9 @@ public class PerformanceMemory_p { prop.put("EcoList_" + c + "_tableKeyMem", Formatter.bytesToString(mem)); prop.put("EcoList_" + c + "_tableKeyChunkSize", mapx.get(Table.StatKeys.tableKeyChunkSize)); - mem = Long.parseLong(mapx.get(Table.StatKeys.tableValueMem)); + assert mapx.get(Table.StatKeys.tableValueMem) != null : mapx; + v = mapx.get(Table.StatKeys.tableValueMem); + mem = v == null ? 0 : Long.parseLong(v); totalmem += mem; prop.put("EcoList_" + c + "_tableValueMem", Formatter.bytesToString(mem)); prop.put("EcoList_" + c + "_tableValueChunkSize", mapx.get(Table.StatKeys.tableValueChunkSize)); From c8bbd180e46f7282a469d0d86d463860af298bac Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 7 Jun 2012 12:36:26 +0200 Subject: [PATCH 3/5] enhanced hint for debian package automatic update --- htroot/ConfigUpdate_p.html | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/htroot/ConfigUpdate_p.html b/htroot/ConfigUpdate_p.html index bb160760c..ea013848a 100644 --- a/htroot/ConfigUpdate_p.html +++ b/htroot/ConfigUpdate_p.html @@ -114,8 +114,9 @@ :: - You installed YaCy with a package manager. To update YaCy, use the package manager:

- Debian: apt-get update && apt-get install yacy

+

You installed YaCy with a package manager. To update YaCy, use the package manager:

+

manual update:
apt-get update && apt-get install yacy

+

automatic update: add the following line to /etc/crontab
0 6 * * * root apt-get update && apt-get -y --force-yes install yacy

:: #(/candeploy)# From a61f44f9e442790e17a6eee4924ea11bc8fb0950 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 7 Jun 2012 13:16:38 +0200 Subject: [PATCH 4/5] lazy initialization of block rank table. this causes that the table is not initialized when there is no search is done. the effect is most strong if YaCy is started headless which causes no browser pop-up which otherwise would load the search page and therefore trigger the initialization of the table. --- htroot/yacyinteractive.java | 3 +++ htroot/yacysearch.java | 2 ++ htroot/yacysearch_location.java | 4 +++- source/net/yacy/search/ranking/BlockRank.java | 16 +++++++++++++--- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/htroot/yacyinteractive.java b/htroot/yacyinteractive.java index 959dbec48..845ca476b 100644 --- a/htroot/yacyinteractive.java +++ b/htroot/yacyinteractive.java @@ -27,6 +27,7 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; +import net.yacy.search.ranking.BlockRank; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -37,6 +38,8 @@ public class yacyinteractive { public static serverObjects respond(final RequestHeader header, serverObjects post, final serverSwitch env) { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); + BlockRank.ensureLoaded(); + prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0); final String promoteSearchPageGreeting = (env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ? diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 182030eea..31fe9d53a 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -78,6 +78,7 @@ import net.yacy.search.query.AccessTracker; import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; +import net.yacy.search.ranking.BlockRank; import net.yacy.search.ranking.RankingProfile; import de.anomic.data.DidYouMean; import de.anomic.data.UserDB; @@ -96,6 +97,7 @@ public class yacysearch { final Switchboard sb = (Switchboard) env; sb.localSearchLastAccess = System.currentTimeMillis(); + BlockRank.ensureLoaded(); final boolean searchAllowed = sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header); diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java index 68b3ac080..2a4c1cbf4 100644 --- a/htroot/yacysearch_location.java +++ b/htroot/yacysearch_location.java @@ -31,6 +31,7 @@ import net.yacy.document.LibraryProvider; import net.yacy.document.geolocalization.GeoLocation; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; +import net.yacy.search.ranking.BlockRank; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -40,7 +41,8 @@ public class yacysearch_location { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); - + BlockRank.ensureLoaded(); + prop.put("kml", 0); if (header.get(HeaderFramework.CONNECTION_PROP_EXT, "").equals("kml") || diff --git a/source/net/yacy/search/ranking/BlockRank.java b/source/net/yacy/search/ranking/BlockRank.java index 24b146e22..4e4abeafc 100644 --- a/source/net/yacy/search/ranking/BlockRank.java +++ b/source/net/yacy/search/ranking/BlockRank.java @@ -56,7 +56,8 @@ import net.yacy.search.index.Segment; public class BlockRank { public static BinSearch[] ybrTables = null; // block-rank tables - + private static File rankingPath; + private static int count; /** * collect host index information from other peers. All peers in the seed database are asked @@ -214,6 +215,7 @@ public class BlockRank { byte[] hosth = new byte[6]; String hosths, hostn; HostStat hs; + ensureLoaded(); for (int ybr = 0; ybr < ybrTables.length; ybr++) { row: for (int i = 0; i < ybrTables[ybr].size(); i++) { hosth = ybrTables[ybr].get(i, hosth); @@ -240,11 +242,18 @@ public class BlockRank { * @param rankingPath * @param count */ - public static void loadBlockRankTable(final File rankingPath, final int count) { - if (!rankingPath.exists()) return; + public static void loadBlockRankTable(final File rankingPath0, final int count0) { + // lazy initialization to save memory during startup phase + rankingPath = rankingPath0; + count = count0; + } + + public static void ensureLoaded() { + if (ybrTables != null) return; ybrTables = new BinSearch[count]; String ybrName; File f; + Log.logInfo("BlockRank", "loading block rank table from " + rankingPath.toString()); try { for (int i = 0; i < count; i++) { ybrName = "YBR-4-" + Digest.encodeHex(i, 2) + ".idx"; @@ -287,6 +296,7 @@ public class BlockRank { * @return */ public static int ranking(final byte[] hash) { + ensureLoaded(); return ranking(hash, ybrTables); } From b0095c8d3c45347259eb25a1095b5112689bdc82 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 7 Jun 2012 19:42:33 +0200 Subject: [PATCH 5/5] flush the compressor cache when a cleanup is done --- source/de/anomic/crawler/Cache.java | 4 ++++ source/net/yacy/kelondro/blob/Compressor.java | 2 +- source/net/yacy/search/Switchboard.java | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/crawler/Cache.java b/source/de/anomic/crawler/Cache.java index f1867a7fa..d027eb8ab 100644 --- a/source/de/anomic/crawler/Cache.java +++ b/source/de/anomic/crawler/Cache.java @@ -143,6 +143,10 @@ public final class Cache { } } + public static void commit() { + fileDB.flushAll(); + } + /** * clear the cache */ diff --git a/source/net/yacy/kelondro/blob/Compressor.java b/source/net/yacy/kelondro/blob/Compressor.java index 5b9837cdb..e6fc380c6 100644 --- a/source/net/yacy/kelondro/blob/Compressor.java +++ b/source/net/yacy/kelondro/blob/Compressor.java @@ -335,7 +335,7 @@ public class Compressor implements BLOB, Iterable { } } - private void flushAll() { + public void flushAll() { while (!this.buffer.isEmpty()) { if (!flushOne()) break; } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 2b3bad5ab..1f5b4ee88 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1892,7 +1892,7 @@ public final class Switchboard extends serverSwitch } public int cleanupJobSize() { - int c = 0; + int c = 1; // "es gibt immer was zu tun" if ( (this.crawlQueues.delegatedURL.stackSize() > 1000) ) { c++; } @@ -1909,6 +1909,9 @@ public final class Switchboard extends serverSwitch public boolean cleanupJob() { try { + // flush the document compressor cache + Cache.commit(); + // clear caches if necessary if ( !MemoryControl.request(8000000L, false) ) { for ( final Segment indexSegment : this.indexSegments ) {