diff --git a/build.properties b/build.properties index 2c369a989..84bcfc09a 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.386 +releaseVersion=0.387 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} diff --git a/source/de/anomic/kelondro/kelondroStack.java b/source/de/anomic/kelondro/kelondroStack.java index c9cbb0fb1..aaa3b2ec4 100644 --- a/source/de/anomic/kelondro/kelondroStack.java +++ b/source/de/anomic/kelondro/kelondroStack.java @@ -123,7 +123,7 @@ public class kelondroStack extends kelondroRecords { // expand the list at the end Node n = newNode(row); n.save(); - n.setOHHandle(new Handle[] {getHandle(toor),null}); + n.setOHHandle(new Handle[] {getHandle(toor), null}); Node n1 = getNode(getHandle(toor), null, 0); n1.setOHHandle(new Handle[] {n1.getOHHandle()[left], n.handle()}); // assign handles @@ -134,79 +134,111 @@ public class kelondroStack extends kelondroRecords { public synchronized byte[][] pop() throws IOException { // return row ontop of the stack and shrink stack by one - Handle h = getHandle(toor); - if (h == null) return null; - Node n = getNode(h, null, 0); - byte[][] ret = n.getValues(); - // shrink stack - Handle l = n.getOHHandle()[left]; - if (l == null) { - // the stack will be empty, write the root handle - setHandle(root, null); - } else { - // un-link the previous record - Node k = getNode(l, null, 0); - k.setOHHandle(new Handle[] {k.getOHHandle()[left], null}); - } - setHandle(toor, l); - deleteNode(h); - return ret; + return pop(0); } + public synchronized byte[][] pop(int dist) throws IOException { + // return row relative to top of the stack and remove addressed element + Node n = topNode(dist); + if (n == null) return null; + byte[][] ret = n.getValues(); + + // remove node + unlinkNode(n); + deleteNode(n.handle()); + + return ret; + } + public synchronized byte[][] top() throws IOException { // return row ontop of the stack - Handle h = getHandle(toor); - if (h == null) return null; - return getNode(h, null, 0).getValues(); + return top(0); } public synchronized byte[][] top(int dist) throws IOException { // return row ontop of the stack // with dist == 0 this is the same function as with top() - Handle h = getHandle(toor); - if (h == null) return null; - if (dist >= size()) return null; // that would exceed the stack - while (dist-- > 0) h = getNode(h, null, 0).getOHHandle()[left]; // track through elements - return getNode(h, null, 0).getValues(); + Node n = topNode(dist); + if (n == null) return null; + return n.getValues(); } public synchronized byte[][] pot() throws IOException { // return row on the bottom of the stack and remove record - Handle h = getHandle(root); - if (h == null) return null; - Node n = getNode(h, null, 0); + return pot(0); + } + + public synchronized byte[][] pot(int dist) throws IOException { + // return row relative to the bottom of the stack and remove addressed element + Node n = botNode(dist); + if (n == null) return null; byte[][] ret = n.getValues(); - // shrink stack - Handle r = n.getOHHandle()[right]; - if (r == null) { - // the stack will be empty, write the toor handle - setHandle(toor, null); - } else { - // un-link the next record - Node k = getNode(r, null, 0); - k.setOHHandle(new Handle[] {null, k.getOHHandle()[right]}); - } - setHandle(root, r); - deleteNode(h); + + // remove node + unlinkNode(n); + deleteNode(n.handle()); + return ret; } public synchronized byte[][] bot() throws IOException { // return row on the bottom of the stack - Handle h = getHandle(root); - if (h == null) return null; - return getNode(h, null, 0).getValues(); + return bot(0); } public synchronized byte[][] bot(int dist) throws IOException { // return row on bottom of the stack // with dist == 0 this is the same function as with bot() - Handle h = getHandle(root); + Node n = botNode(dist); + if (n == null) return null; + return n.getValues(); + } + + private void unlinkNode(Node n) throws IOException { + // join chaines over node + Handle l = n.getOHHandle()[left]; + Handle r = n.getOHHandle()[right]; + // look left + if (l == null) { + // reached the root on left side + setHandle(root, r); + } else { + // un-link the previous record + Node k = getNode(l, null, 0); + k.setOHHandle(new Handle[] {k.getOHHandle()[left], r}); + } + // look right + if (r == null) { + // reached the root on right side + setHandle(toor, l); + } else { + // un-link the following record + Node k = getNode(r, null, 0); + k.setOHHandle(new Handle[] {l, k.getOHHandle()[right]}); + } + } + + private Node topNode(int dist) throws IOException { + // return node ontop of the stack + return queueNode(dist, toor, left); + } + + private Node botNode(int dist) throws IOException { + // return node on bottom of the stack + return queueNode(dist, root, right); + } + + private Node queueNode(int dist, int side, int dir) throws IOException { + // with dist == 0 this is the same function as with getNode(getHandle(side), null, 0) + Handle h = getHandle(side); if (h == null) return null; if (dist >= size()) return null; // that would exceed the stack - while (dist-- > 0) h = getNode(h, null, 0).getOHHandle()[right]; // track through elements - return getNode(h, null, 0).getValues(); + while (dist-- > 0) h = getNode(h, null, 0).getOHHandle()[dir]; // track through elements + return getNode(h, null, 0); } + + + /* public synchronized byte[][] seekPop(byte[] key, long maxdepth) throws IOException { @@ -262,14 +294,13 @@ public class kelondroStack extends kelondroRecords { public void print() { super.print(false); - Handle h; Node n; try { Iterator it = iterator(); while (it.hasNext()) { - h = (Handle) it.next(); - n = getNode(h, null, 0); - System.out.println("> NODE " + hp(h) + + n = (Node) it.next(); + //n = getNode(h, null, 0); + System.out.println("> NODE " + hp(n.handle()) + "; left " + hp(n.getOHHandle()[left]) + ", right " + hp(n.getOHHandle()[right])); System.out.print(" KEY:'" + (new String(n.getValues()[0])).trim() + "'"); for (int j = 1; j < columns(); j++) @@ -323,6 +354,11 @@ public class kelondroStack extends kelondroRecords { } } ret = null; + } else if (args[0].equals("-g")) { + kelondroStack fm = new kelondroStack(new File(args[2]), 0x100000); + byte[][] ret2 = fm.pop(Integer.parseInt(args[1])); + ret = ((ret2 == null) ? null : ret2[1]); + fm.close(); } } else if (args.length == 4) { if (args[0].equals("-c")) { @@ -350,6 +386,14 @@ public class kelondroStack extends kelondroRecords { } public static void main(String[] args) { + // -c 10 20 test.stack + // -p a a1 test.stack + // -p b b1 test.stack + // -p c c1 test.stack + // -v test.stack + // -g test.stack + // -v test.stack + // -g 1 test.stack cmd(args); } diff --git a/source/de/anomic/plasma/plasmaCondenser.java b/source/de/anomic/plasma/plasmaCondenser.java index 6cc358d54..6066df400 100644 --- a/source/de/anomic/plasma/plasmaCondenser.java +++ b/source/de/anomic/plasma/plasmaCondenser.java @@ -202,7 +202,7 @@ public class plasmaCondenser { } words.put(word, sp); // we now have the unique handle of the word, put it into the sentence: - sentence = sentence + intString(wordHandle, numlength); + sentence = sentence + intString(wordHandle, numlength); // thread hang error here } } // finnish last sentence diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 35703de86..50641be30 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -162,34 +162,6 @@ public class plasmaCrawlLURL extends plasmaURL { } } - /* - public synchronized Entry addEntry(String propStr, boolean setGlobal, String initiatorHash, String executorHash, int stackType) { - if ((propStr.startsWith("{")) && (propStr.endsWith("}"))) { - //System.out.println("DEBUG: propStr=" + propStr); - try { - Entry e = new Entry(serverCodings.s2p(propStr.substring(1, propStr.length() - 1)), setGlobal); - if (initiatorHash == null) initiatorHash = dummyHash; - if (executorHash == null) executorHash = dummyHash; - switch (stackType) { - case 0: break; - case 1: externResultStack.add(e.urlHash + initiatorHash + executorHash); break; - case 2: searchResultStack.add(e.urlHash + initiatorHash + executorHash); break; - case 3: transfResultStack.add(e.urlHash + initiatorHash + executorHash); break; - case 4: proxyResultStack.add(e.urlHash + initiatorHash + executorHash); break; - case 5: lcrawlResultStack.add(e.urlHash + initiatorHash + executorHash); break; - case 6: gcrawlResultStack.add(e.urlHash + initiatorHash + executorHash); break; - } - return e; - } catch (Exception ex) { - System.out.println("INTERNAL ERROR in newEntry/2: " + ex.toString()); - return null; - } - } else { - return null; - } - } - */ - public void notifyGCrawl(String urlHash, String initiatorHash, String executorHash) { gcrawlResultStack.add(urlHash + initiatorHash + executorHash); } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index d5c3a6413..d3cb26872 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -376,9 +376,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task", peerPing = new serverInstantThread(yc, "peerPing", null), 2000); peerPing.setSyncObject(new Object()); + indexDistribution = new plasmaWordIndexDistribution(urlPool, wordIndex, log, getConfig("allowDistributeIndex", "false").equals("true")); - indexDistribution.setCounts(100 /*indexCount*/, 1 /*peerCount*/, 8000); + indexDistribution.setCounts(100, 1, 8000); deployThread("20_dhtdistribution", "DHT Distribution (currently by juniors only)", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", new serverInstantThread(indexDistribution, "job", null), 120000); @@ -400,7 +401,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/security/news/foren/go.shtml?read=1&msg_id=7301419&forum_id=72721"), query, true); //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260); - + log.logSystem("Finished Switchboard Initialization"); } private static String ppRamString(int bytes) { diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index adb1fa25c..039b6dc16 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -88,6 +88,24 @@ public final class plasmaWordIndexAssortmentCluster { } } + public plasmaWordIndexEntryContainer storeTry(String wordHash, plasmaWordIndexEntryContainer newContainer) { + // this tries to store the record. If the record does not fit, or a same hash already + // exists and would not fit together with the new record, then the record is deleted from + // the assortmen(s) and returned together with the newRecord. + // if storage was successful, NULL is returned. + if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit + plasmaWordIndexEntryContainer buffer; + while ((buffer = assortments[newContainer.size() - 1].remove(wordHash)) != null) { + newContainer.add(buffer); + if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit + } + // the assortment (newContainer.size() - 1) should now be empty. put it in there + assortments[newContainer.size() - 1].store(wordHash, newContainer); + // return null to show that we have stored the new Record successfully + return null; + } + + /* public plasmaWordIndexEntryContainer storeTry(String wordHash, plasmaWordIndexEntryContainer newContainer) { // this tries to store the record. If the record does not fit, or a same hash already // exists and would not fit together with the new record, then the record is deleted from @@ -105,6 +123,16 @@ public final class plasmaWordIndexAssortmentCluster { // return null to show that we have stored the new Record successfully return null; } + */ + + /* + public plasmaWordIndexEntryContainer removeFromOne(String wordHash, int assortment) { + // collect one container from a specific assortment + plasmaWordIndexEntryContainer container = assortments[assortment].remove(wordHash); + if (container == null) return new plasmaWordIndexEntryContainer(wordHash); + return container; + } + */ public plasmaWordIndexEntryContainer removeFromAll(String wordHash) { // collect all records from all the assortments and return them diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index 37c8712e7..a7486c495 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -358,6 +358,54 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { hashDate.deleteScore(key); } + // now decide where to flush that container + if (container.size() <= assortmentLimit) { + // this fits into the assortments + plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(key, container); + if (feedback == null) { + return container.size(); + } else if (reintegrate) { + // put assortmentRecord together with container back to ram + synchronized (cache) { + cache.put(key, feedback); + hashScore.setScore(key, feedback.size()); + hashDate.setScore(key, intTime(time)); + } + return container.size() - feedback.size(); + } else { + // *** should care about another option here *** + return backend.addEntries(feedback, time); + } + } else { + // store to back-end; this should be a rare case + return backend.addEntries(container, time); + } + + } + + /* + private int flushFromMem(String key, boolean reintegrate) { + // this method flushes indexes out from the ram to the disc. + // at first we check the singleton database and act accordingly + // if we we are to flush an index, but see also an entry in the singletons, we + // decide upn the 'reintegrate'-Flag: + // true: do not flush to disc, but re-Integrate the singleton to the RAM + // false: flush the singleton together with container to disc + + plasmaWordIndexEntryContainer container = null; + long time; + synchronized (cache) { + // get the container + container = (plasmaWordIndexEntryContainer) cache.get(key); + if (container == null) return 0; // flushing of nonexisting key + time = getUpdateTime(key); + + // remove it from the cache + cache.remove(key); + hashScore.deleteScore(key); + hashDate.deleteScore(key); + } + // now decide where to flush that container plasmaWordIndexEntryContainer flushedFromAssortment = assortmentCluster.removeFromAll(key); if ((flushedFromAssortment == null) || (flushedFromAssortment.size() == 0)) { @@ -394,6 +442,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { } } } + */ private int intTime(long longTime) { return (int) ((longTime - startTime) / 1000); diff --git a/source/de/anomic/server/serverCodings.java b/source/de/anomic/server/serverCodings.java index 48e13d092..510a87eaa 100644 --- a/source/de/anomic/server/serverCodings.java +++ b/source/de/anomic/server/serverCodings.java @@ -46,6 +46,7 @@ import java.io.FileInputStream; import java.io.InputStream; import java.security.MessageDigest; import java.util.Properties; +import java.util.HashMap; import java.util.StringTokenizer; @@ -275,7 +276,24 @@ public final class serverCodings { } return p; } - + + public static HashMap string2map(String string) { + // this can be used to parse a Map.toString() into a Map again + if (string == null) return null; + HashMap map = new HashMap(); + int pos; + pos = string.indexOf("{"); if (pos >= 0) string = string.substring(pos + 1).trim(); + pos = string.lastIndexOf("}"); if (pos >= 0) string = string.substring(0, pos).trim(); + StringTokenizer st = new StringTokenizer(string, ","); + String token; + while (st.hasMoreTokens()) { + token = st.nextToken().trim(); + pos = token.indexOf("="); + if (pos > 0) map.put(token.substring(0, pos).trim(), token.substring(pos + 1).trim()); + } + return map; + } + public static void main(String[] s) { serverCodings b64 = new serverCodings(true); if (s.length == 0) {System.out.println("usage: -[ec|dc|es|ds] "); System.exit(0);} diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index 5d1362eb7..cda365d51 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -106,7 +106,8 @@ public class yacyCore { private plasmaSwitchboard switchboard; private static TimeZone GMTTimeZone = TimeZone.getTimeZone("America/Los_Angeles"); - public static SimpleDateFormat shortFormatter = new SimpleDateFormat("yyyyMMddHHmmss"); + public static String universalDatePattern = "yyyyMMddHHmmss"; + public static SimpleDateFormat shortFormatter = new SimpleDateFormat(universalDatePattern); public static long universalTime() { return universalDate().getTime(); @@ -117,7 +118,11 @@ public class yacyCore { } public static String universalDateShortString() { - return shortFormatter.format(universalDate()); + return universalDateShortString(universalDate()); + } + + public static String universalDateShortString(Date date) { + return shortFormatter.format(date); } public static Date parseUniversalDate(String remoteTimeString) { diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index 66bec37fa..ee4c58150 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -349,18 +349,7 @@ public class yacySeed { if (seedStr == null) return null; String seed = crypt.simpleDecode(seedStr, key); if (seed == null) return null; - HashMap dna = new HashMap(); - int pos; - pos = seed.indexOf("{"); if (pos >= 0) seed = seed.substring(pos + 1).trim(); - pos = seed.lastIndexOf("}"); if (pos >= 0) seed = seed.substring(0, pos).trim(); - StringTokenizer st = new StringTokenizer(seed, ","); - String token; - while (st.hasMoreTokens()) { - token = st.nextToken().trim(); - //System.out.println("PARSED TOKEN: " + token); - pos = token.indexOf("="); - if (pos > 0) dna.put(token.substring(0, pos).trim(), token.substring(pos + 1).trim()); - } + HashMap dna = serverCodings.string2map(seed); String hash = (String) dna.remove("Hash"); return new yacySeed(hash, dna); } diff --git a/yacy.init b/yacy.init index de4cc37ae..1629c184e 100644 --- a/yacy.init +++ b/yacy.init @@ -399,7 +399,7 @@ xpstopw=true # and another idlesleep is performed 20_dhtdistribution_idlesleep=20000 20_dhtdistribution_busysleep=5000 -20_dhtdistribution_memprereq=5000000 +20_dhtdistribution_memprereq=3000000 30_peerping_idlesleep=120000 30_peerping_busysleep=120000 30_peerping_memprereq=20000 @@ -408,13 +408,13 @@ xpstopw=true 40_peerseedcycle_memprereq=2000000 50_localcrawl_idlesleep=1000 50_localcrawl_busysleep=200 -50_localcrawl_memprereq=4000000 +50_localcrawl_memprereq=2000000 61_globalcrawltrigger_idlesleep=2000 61_globalcrawltrigger_busysleep=200 -61_globalcrawltrigger_memprereq=4000000 +61_globalcrawltrigger_memprereq=2000000 62_remotetriggeredcrawl_idlesleep=10000 62_remotetriggeredcrawl_busysleep=200 -62_remotetriggeredcrawl_memprereq=5000000 +62_remotetriggeredcrawl_memprereq=3000000 70_cachemanager_idlesleep=1000 70_cachemanager_busysleep=0 70_cachemanager_memprereq=10000