From 734059d33e6cec664471ce2fda9192c605c45fd5 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 14 Sep 2011 23:34:05 +0000 Subject: [PATCH] performance hacks git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7955 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacy/transferRWI.java | 42 +- .../de/anomic/server/serverAccessTracker.java | 8 +- source/de/anomic/yacy/yacySeed.java | 24 +- .../yacy/cora/document/MultiProtocolURI.java | 520 +++++++++--------- .../kelondro/data/word/WordReferenceRow.java | 63 +++ .../kelondro/data/word/WordReferenceVars.java | 12 +- 6 files changed, 373 insertions(+), 296 deletions(-) diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index 154eade91..5669c7898 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -1,4 +1,4 @@ -// transferRWI.java +// transferRWI.java // ----------------------- // part of the AnomicHTTPD caching proxy // (C) by Michael Peter Christen; mc@yacy.net @@ -35,12 +35,13 @@ import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.URIMetadataRow; +import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.rwi.IndexCell; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.Blacklist; - import de.anomic.search.Segments; import de.anomic.search.Switchboard; import de.anomic.search.SwitchboardConstants; @@ -56,7 +57,7 @@ import de.anomic.yacy.dht.FlatWordPartitionScheme; public final class transferRWI { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws InterruptedException { - + // return variable that accumulates replacements final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); @@ -77,7 +78,7 @@ public final class transferRWI { logWarning(contentType, "missing entryc"); return prop; } - + // request values final String iam = post.get("iam", ""); // seed hash of requester final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability @@ -89,13 +90,13 @@ public final class transferRWI { final boolean blockBlacklist = sb.getConfigBool("indexReceiveBlockBlacklist", false); final long cachelimit = sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000); final yacySeed otherPeer = sb.peers.get(iam); - final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion())); - + final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion())); + // response values int pause = 0; String result = "ok"; final StringBuilder unknownURLs = new StringBuilder(6000); - + if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) { sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash); result = "wrong_target"; @@ -134,7 +135,7 @@ public final class transferRWI { // decode request System.out.println("STRINGS " + UTF8.String(indexes)); - Iterator it = FileUtils.strings(indexes); + final Iterator it = FileUtils.strings(indexes); // free memory indexes = null; @@ -151,10 +152,11 @@ public final class transferRWI { int received = 0; int blocked = 0; int receivedURL = 0; + final IndexCell cell = sb.indexSegments.termIndex(Segments.Process.DHTIN); while (it.hasNext()) { serverCore.checkInterruption(); estring = it.next(); - + // check if RWI entry is well-formed p = estring.indexOf('{'); if ((p < 0) || (estring.indexOf("x=") < 0) || !(estring.indexOf("[B@") < 0)) { @@ -165,14 +167,14 @@ public final class transferRWI { wordhashes.add(wordHash); iEntry = new WordReferenceRow(estring.substring(p)); urlHash = iEntry.urlhash(); - + // block blacklisted entries if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) { if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName); blocked++; continue; } - + // check if the entry is in our network domain final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomainHash(urlHash); if (urlRejectReason != null) { @@ -181,11 +183,11 @@ public final class transferRWI { blocked++; continue; } - + // learn entry try { - sb.indexSegments.termIndex(Segments.Process.DHTIN).add(wordHash.getBytes(), iEntry); - } catch (Exception e) { + cell.add(wordHash.getBytes(), iEntry); + } catch (final Exception e) { Log.logException(e); } serverCore.checkInterruption(); @@ -208,7 +210,7 @@ public final class transferRWI { sb.peers.mySeed().incRI(received); // finally compose the unknownURL hash list - Iterator bit = unknownURL.iterator(); + final Iterator bit = unknownURL.iterator(); unknownURLs.ensureCapacity(unknownURL.size() * 25); while (bit.hasNext()) { unknownURLs.append(",").append(UTF8.String(bit.next())); @@ -217,14 +219,14 @@ public final class transferRWI { if (wordhashes.isEmpty() || received == 0) { sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs"); } else { - String firstHash = wordhashes.get(0); - String lastHash = wordhashes.get(wordhashes.size() - 1); + final String firstHash = wordhashes.get(0); + final String lastHash = wordhashes.get(wordhashes.size() - 1); final long avdist = (FlatWordPartitionScheme.std.dhtDistance(firstHash.getBytes(), null, sb.peers.mySeed()) + FlatWordPartitionScheme.std.dhtDistance(lastHash.getBytes(), null, sb.peers.mySeed())) / 2; - sb.getLog().logInfo("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName); - yacyChannel.channels(yacyChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName, "", otherPeer.hash)); + sb.getLog().logInfo("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "], processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, " + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName); + yacyChannel.channels(yacyChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "], processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, " + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName, "", otherPeer.hash)); } result = "ok"; - + pause = (int) (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time } diff --git a/source/de/anomic/server/serverAccessTracker.java b/source/de/anomic/server/serverAccessTracker.java index 8c37c3236..2d0701d95 100644 --- a/source/de/anomic/server/serverAccessTracker.java +++ b/source/de/anomic/server/serverAccessTracker.java @@ -62,10 +62,12 @@ public class serverAccessTracker { /* * remove all entries from the access tracker where the age of the last access is greater than the given timeout */ - private synchronized void cleanupAccessTracker() { + private void cleanupAccessTracker() { - if (System.currentTimeMillis() - this.lastCleanup < cleanupCycle) return; // avoid too many scans of the queues - this.lastCleanup = System.currentTimeMillis(); + synchronized (this) { + if (System.currentTimeMillis() - this.lastCleanup < cleanupCycle) return; // avoid too many scans of the queues + this.lastCleanup = System.currentTimeMillis(); + } // clear entries which had no entry for the maxTrackingTime time final Iterator>> i = this.accessTracker.entrySet().iterator(); diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index 8bf830b29..ce6e63d10 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -175,7 +175,7 @@ public class yacySeed implements Cloneable, Comparable, Comparator dna; private String alternativeIP = null; - private final long birthdate; // keep this value in ram since it is often used and may cause lockings in concurrent situations. + private long birthdate; // keep this value in ram since it is often used and may cause lockings in concurrent situations. // use our own formatter to prevent concurrency locks with other processes private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); @@ -188,13 +188,7 @@ public class yacySeed implements Cloneable, Comparable, Comparator, Comparator milliseconds; } + public final long getBirthdate() { + if (this.birthdate > 0) return this.birthdate; + long b; + try { + b = my_SHORT_SECOND_FORMATTER.parse(get(yacySeed.BDATE, "20040101000000")).getTime(); + } catch (final ParseException e) { + b = System.currentTimeMillis(); + } + this.birthdate = b; + return this.birthdate; + } + /** @return the age of the seed in number of days */ public final int getAge() { - return (int) Math.abs((System.currentTimeMillis() - this.birthdate) / 1000 / 60 / 60 / 24); + return (int) Math.abs((System.currentTimeMillis() - getBirthdate()) / 1000 / 60 / 60 / 24); } public void setPeerTags(final Set keys) { diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java index 18c569e4b..2c1cc1492 100644 --- a/source/net/yacy/cora/document/MultiProtocolURI.java +++ b/source/net/yacy/cora/document/MultiProtocolURI.java @@ -11,12 +11,12 @@ * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. - * + * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see . @@ -44,7 +44,6 @@ import java.util.regex.Pattern; import jcifs.smb.SmbException; import jcifs.smb.SmbFile; import jcifs.smb.SmbFileInputStream; - import net.yacy.cora.document.Punycode.PunycodeException; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.TimeoutRequest; @@ -58,10 +57,10 @@ import net.yacy.cora.protocol.http.HTTPClient; public class MultiProtocolURI implements Serializable, Comparable { public static final MultiProtocolURI POISON = new MultiProtocolURI(); // poison pill for concurrent link generators - + private static final long serialVersionUID = -1173233022912141884L; private static final long SMB_TIMEOUT = 5000; - + public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter private static final Pattern backPathPattern = Pattern.compile("(/[^/]+(? sessionIDnames = new ConcurrentHashMap(); - - public static final void initSessionIDNames(Set idNames) { + + public static final void initSessionIDNames(final Set idNames) { for (String s: idNames) { if (s == null) continue; s = s.trim(); if (s.length() > 0) sessionIDnames.put(s, PRESENT); } } - + // class variables protected final String protocol, userInfo; protected String host, path, quest, ref; protected int port; - + /** * initialization of a MultiProtocolURI to produce poison pills for concurrent blocking queues */ @@ -115,10 +114,10 @@ public class MultiProtocolURI implements Serializable, Comparable 1 && url.charAt(1) == ':') { // maybe a DOS drive path url = "file://" + url; } - + if (url.length() > 0 && url.charAt(0) == '/') { // maybe a unix/linux absolute path url = "file://" + url; } - + int p = url.indexOf(':'); if (p < 0) { url = "http://" + url; @@ -150,80 +149,80 @@ public class MultiProtocolURI implements Serializable, Comparable 0 && h.charAt(0) == '/') { - char c = h.charAt(2); + final char c = h.charAt(2); if (c == ':' || c == '|') - path = h.substring(1); + this.path = h.substring(1); else - path = h; + this.path = h; } else { - char c = h.charAt(1); + final char c = h.charAt(1); if (c == ':' || c == '|') - path = h; + this.path = h; else - path = "/" + h; + this.path = "/" + h; } } - userInfo = null; - port = -1; - quest = null; - ref = null; + this.userInfo = null; + this.port = -1; + this.quest = null; + this.ref = null; } else { throw new MalformedURLException("unknown protocol: " + url); } } - + // handle international domains - if (!Punycode.isBasic(host)) try { - final String[] domainParts = patternDot.split(host, 0); - StringBuilder buffer = new StringBuilder(80); + if (!Punycode.isBasic(this.host)) try { + final String[] domainParts = patternDot.split(this.host, 0); + final StringBuilder buffer = new StringBuilder(80); // encode each domain-part separately for(int i = 0; i < domainParts.length; i++) { final String part = domainParts[i]; @@ -236,15 +235,15 @@ public class MultiProtocolURI implements Serializable, Comparable end) break; @@ -358,7 +357,7 @@ public class MultiProtocolURI implements Serializable, Comparable @@ -368,42 +367,42 @@ public class MultiProtocolURI implements Serializable, Comparable */ private void escape() { - if (path != null && path.indexOf('%') == -1) escapePath(); - if (quest != null && quest.indexOf('%') == -1) escapeQuest(); - if (ref != null && ref.indexOf('%') == -1) escapeRef(); + if (this.path != null && this.path.indexOf('%') == -1) escapePath(); + if (this.quest != null && this.quest.indexOf('%') == -1) escapeQuest(); + if (this.ref != null && this.ref.indexOf('%') == -1) escapeRef(); } - + private void escapePath() { - final String[] pathp = patternSlash.split(path, -1); - StringBuilder ptmp = new StringBuilder(path.length() + 10); - for (int i = 0; i < pathp.length; i++) { + final String[] pathp = patternSlash.split(this.path, -1); + final StringBuilder ptmp = new StringBuilder(this.path.length() + 10); + for (final String element : pathp) { ptmp.append('/'); - ptmp.append(escape(pathp[i])); + ptmp.append(escape(element)); } - path = ptmp.substring((ptmp.length() > 0) ? 1 : 0); + this.path = ptmp.substring((ptmp.length() > 0) ? 1 : 0); } - + private void escapeRef() { - ref = escape(ref).toString(); + this.ref = escape(this.ref).toString(); } - + private void escapeQuest() { - final String[] questp = patternAmp.split(quest, -1); - StringBuilder qtmp = new StringBuilder(quest.length() + 10); - for (int i = 0; i < questp.length; i++) { - if (questp[i].indexOf('=') != -1) { + final String[] questp = patternAmp.split(this.quest, -1); + final StringBuilder qtmp = new StringBuilder(this.quest.length() + 10); + for (final String element : questp) { + if (element.indexOf('=') != -1) { qtmp.append('&'); - qtmp.append(escape(questp[i].substring(0, questp[i].indexOf('=')))); + qtmp.append(escape(element.substring(0, element.indexOf('=')))); qtmp.append('='); - qtmp.append(escape(questp[i].substring(questp[i].indexOf('=') + 1))); + qtmp.append(escape(element.substring(element.indexOf('=') + 1))); } else { qtmp.append('&'); - qtmp.append(escape(questp[i])); + qtmp.append(escape(element)); } } - quest = qtmp.substring((qtmp.length() > 0) ? 1 : 0); + this.quest = qtmp.substring((qtmp.length() > 0) ? 1 : 0); } - + private final static String[] hex = { "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", @@ -438,7 +437,7 @@ public class MultiProtocolURI implements Serializable, Comparable 0) this.port = Integer.parseInt(portStr); - else this.port = -1; + else this.port = -1; this.host = this.host.substring(0, r); } catch (final NumberFormatException e) { throw new MalformedURLException("wrong port in host fragment '" + this.host + "' of input url '" + inputURL + "'"); } } } - + private void identRef() { // identify ref in file - final int r = path.indexOf('#'); + final int r = this.path.indexOf('#'); if (r < 0) { this.ref = null; } else { - this.ref = path.substring(r + 1); - this.path = path.substring(0, r); + this.ref = this.path.substring(r + 1); + this.path = this.path.substring(0, r); } } - + private void identQuest() { // identify quest in file - final int r = path.indexOf('?'); + final int r = this.path.indexOf('?'); if (r < 0) { this.quest = null; } else { - this.quest = path.substring(r + 1); - this.path = path.substring(0, r); + this.quest = this.path.substring(r + 1); + this.path = this.path.substring(0, r); } } - + public String getFile() { return getFile(false, false); } - + public String getFile(final boolean excludeReference, final boolean removeSessionID) { // this is the path plus quest plus ref // if there is no quest and no ref the result is identical to getPath // this is defined according to http://java.sun.com/j2se/1.4.2/docs/api/java/net/URL.html#getFile() - if (quest == null) { - if (excludeReference || ref == null) return path; - StringBuilder sb = new StringBuilder(120); - sb.append(path); + if (this.quest == null) { + if (excludeReference || this.ref == null) return this.path; + final StringBuilder sb = new StringBuilder(120); + sb.append(this.path); sb.append('#'); - sb.append(ref); + sb.append(this.ref); return sb.toString(); } - String q = quest; + String q = this.quest; if (removeSessionID) { - for (String sid: sessionIDnames.keySet()) { + for (final String sid: sessionIDnames.keySet()) { if (q.toLowerCase().startsWith(sid.toLowerCase() + "=")) { - int p = q.indexOf('&'); + final int p = q.indexOf('&'); if (p < 0) { - if (excludeReference || ref == null) return path; - StringBuilder sb = new StringBuilder(120); - sb.append(path); + if (excludeReference || this.ref == null) return this.path; + final StringBuilder sb = new StringBuilder(120); + sb.append(this.path); sb.append('#'); - sb.append(ref); + sb.append(this.ref); return sb.toString(); } q = q.substring(p + 1); continue; } - int p = q.toLowerCase().indexOf("&" + sid.toLowerCase() + "="); + final int p = q.toLowerCase().indexOf("&" + sid.toLowerCase() + "="); if (p < 0) continue; - int p1 = q.indexOf('&', p+1); + final int p1 = q.indexOf('&', p+1); if (p1 < 0) { q = q.substring(0, p); } else { @@ -633,34 +632,37 @@ public class MultiProtocolURI implements Serializable, Comparable= 0) && (host != null)) ? host + ":" + port : ((host != null) ? host : ""); + return ((this.port >= 0) && (this.host != null)) ? this.host + ":" + this.port : ((this.host != null) ? this.host : ""); } public String getHost() { - return host; + return this.host; } public int getPort() { - return port; + return this.port; } public String getProtocol() { - return protocol; + return this.protocol; } public String getRef() { - return ref; + return this.ref; } public void removeRef() { - ref = null; + this.ref = null; } - + public String getUserInfo() { - return userInfo; + return this.userInfo; } public String getQuery() { - return quest; + return this.quest; } @Override @@ -717,7 +719,7 @@ public class MultiProtocolURI implements Serializable, Comparable= '0' && c <='9') || (c >= 'a' && c <='z') || (c >= 'A' && c <='Z')) sb.append(c); else sb.append(' '); } t = sb.toString(); - + // remove all double-spaces int p; while ((p = t.indexOf(" ")) >= 0) t = t.substring(0, p) + t.substring(p + 1); // split the string into tokens and add all camel-case splitting - String[] u = t.split(" "); - Map token = new LinkedHashMap(); - for (String r: u) { + final String[] u = t.split(" "); + final Map token = new LinkedHashMap(); + for (final String r: u) { token.putAll(parseCamelCase(r)); } - + // construct a String again - for (String v: token.keySet()) if (v.length() > 1) s += " " + v; + for (final String v: token.keySet()) if (v.length() > 1) s += " " + v; return s; } - + public static enum CharType { low, high, number; } - + public static Map parseCamelCase(String s) { - Map token = new LinkedHashMap(); + final Map token = new LinkedHashMap(); if (s.length() == 0) return token; int p = 0; CharType type = charType(s.charAt(0)), nct = type; @@ -767,7 +769,7 @@ public class MultiProtocolURI implements Serializable, Comparable= 'a' && c <= 'z') return CharType.low; if (c >= '0' && c <= '9') return CharType.number; return CharType.high; } - + public String toNormalform(final boolean excludeReference, final boolean stripAmp) { return toNormalform(excludeReference, stripAmp, false, false); } - + private static final Pattern ampPattern = Pattern.compile("&"); public String toNormalform(final boolean excludeReference, final boolean stripAmp, final boolean resolveHost, final boolean removeSessionID) { - String result = toNormalform0(excludeReference, resolveHost, removeSessionID); + String result = toNormalform0(excludeReference, resolveHost, removeSessionID); if (stripAmp) { result = ampPattern.matcher(result).replaceAll("&"); } return result; } - + private String toNormalform0(final boolean excludeReference, final boolean resolveHost, final boolean removeSessionID) { // generates a normal form of the URL boolean defaultPort = false; @@ -814,22 +816,22 @@ public class MultiProtocolURI implements Serializable, Comparable 0); } public final boolean isCGI() { - final String ls = unescape(path.toLowerCase()); + final String ls = unescape(this.path.toLowerCase()); return ls.indexOf(".cgi") >= 0 || ls.indexOf(".exe") >= 0; } - + public final boolean isIndividual() { - final String q = unescape(path.toLowerCase()); - for (String sid: sessionIDnames.keySet()) { + final String q = unescape(this.path.toLowerCase()); + for (final String sid: sessionIDnames.keySet()) { if (q.startsWith(sid.toLowerCase() + "=")) return true; - int p = q.indexOf("&" + sid.toLowerCase() + "="); + final int p = q.indexOf("&" + sid.toLowerCase() + "="); if (p >= 0) return true; } int pos; @@ -912,16 +914,16 @@ public class MultiProtocolURI implements Serializable, Comparable 0 && host.length() - pos == 3) language = host.substring(pos + 1).toLowerCase(); + if (this.host == null) return language; + final int pos = this.host.lastIndexOf('.'); + if (pos > 0 && this.host.length() - pos == 3) language = this.host.substring(pos + 1).toLowerCase(); if (language.equals("uk")) language = "en"; return language; } // The MultiProtocolURI may be used to integrate File- and SMB accessed into one object // some extraction methods that generate File/SmbFile objects from the MultiProtocolURI - + /** * create a standard java URL. * Please call isHTTP(), isHTTPS() and isFTP() before using this class @@ -930,7 +932,7 @@ public class MultiProtocolURI implements Serializable, Comparable in; + private final BlockingQueue out; + Thread[] worker; + public ExternalParser(final int concurrency) { + this.in = new LinkedBlockingQueue(); + this.out = new LinkedBlockingQueue(); + for (int i = 0; i < concurrency; i++) { + this.worker[i] = new Thread() { + public void run() { + String s; + try { + while ((s = ExternalParser.this.in.take()) != PIN) { + ExternalParser.this.out.put(new WordReferenceRow(s)); + } + } catch (final InterruptedException e) { + } + } + }; + this.worker[i].start(); + } + } + public ExternalParser() { + this(Runtime.getRuntime().availableProcessors()); + } + public void put(final String s) { + try { + this.in.put(s); + } catch (final InterruptedException e) { + } + } + public void terminate() { + for (@SuppressWarnings("unused") final Thread w : this.worker) { + try { + this.in.put(PIN); + } catch (final InterruptedException e) { + } + } + for (final Thread w : this.worker) { + try { + if (w.isAlive()) w.join(); + } catch (final InterruptedException e) { + } + } + try { + this.out.put(poison); + } catch (final InterruptedException e) { + } + } + public WordReferenceRow take() { + WordReferenceRow row; + try { + row = this.out.take(); + } catch (final InterruptedException e) { + return poison; + } + return row; + } + } + public WordReferenceRow(final String external) { this.entry = urlEntryRow.newEntry(external, true); } diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index 65ed9d0a7..86730abe8 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -63,8 +63,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc public char type; public int hitcount, llocal, lother, phrasesintext, posinphrase, posofphrase, - urlcomps, urllength, virtualAge, + urlcomps, urllength, wordsintext, wordsintitle; + private int virtualAge; private final ConcurrentLinkedQueue positions; public double termFrequency; @@ -89,7 +90,6 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc final double termfrequency ) { if (language == null || language.length != 2) language = default_language; - final int mddlm = MicroDate.microDateDays(lastmodified); //final int mddct = MicroDate.microDateDays(updatetime); this.flags = flags; //this.freshUntil = Math.max(0, mddlm + (mddct - mddlm) * 2); @@ -107,7 +107,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.posofphrase = posofphrase; this.urlcomps = urlComps; this.urllength = urlLength; - this.virtualAge = mddlm; + this.virtualAge = -1; // compute that later this.wordsintext = wordcount; this.wordsintitle = titleLength; this.termFrequency = termfrequency; @@ -288,6 +288,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc } public int virtualAge() { + if (this.virtualAge > 0) return this.virtualAge; + this.virtualAge = MicroDate.microDateDays(this.lastModified); return this.virtualAge; } @@ -312,7 +314,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (this.hitcount > (v = other.hitcount)) this.hitcount = v; if (this.llocal > (v = other.llocal)) this.llocal = v; if (this.lother > (v = other.lother)) this.lother = v; - if (this.virtualAge > (v = other.virtualAge)) this.virtualAge = v; + if (virtualAge() > (v = other.virtualAge())) this.virtualAge = v; if (this.wordsintext > (v = other.wordsintext)) this.wordsintext = v; if (this.phrasesintext > (v = other.phrasesintext)) this.phrasesintext = v; if (other.positions != null) a(this.positions, min(this.positions, other.positions)); @@ -334,7 +336,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (this.hitcount < (v = other.hitcount)) this.hitcount = v; if (this.llocal < (v = other.llocal)) this.llocal = v; if (this.lother < (v = other.lother)) this.lother = v; - if (this.virtualAge < (v = other.virtualAge)) this.virtualAge = v; + if (virtualAge() < (v = other.virtualAge())) this.virtualAge = v; if (this.wordsintext < (v = other.wordsintext)) this.wordsintext = v; if (this.phrasesintext < (v = other.phrasesintext)) this.phrasesintext = v; if (other.positions != null) a(this.positions, max(this.positions, other.positions));