From 123375bfbaf8703cd8401bd16a67174878295d5a Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 15 May 2011 22:57:31 +0000 Subject: [PATCH] added a new yacy protocol servlet 'idx'. This returns an index to one of the data entities that is stored in YaCy. This servlet currently only serves for indexes to the web structure hosts. It can be tested by calling http://localhost:8090/yacy/idx.json?object=host This yacy protocol servlet is the first one that returns JSON code and that also shows index entries in a readable format. This will make the development of API applications much easier. This is also an example implementation for possible json versions of the other existing YaCy protocol interfaces. The main purpose of this new feature is to provide a distributed block rank collection feature. Creating a block rank is very difficult if the forward-link data is first collected and then one peer must create a backward-link index. This interface provides already a partial backward index and therefore a collection of all these indexes needs only to be joined which is very easy. The result should be the computation of new block rank tables that all peers can perform. To reduce load from peers this servlet buffers all data and refreshes it only once in 12 hours. This very slow update cycle is needed because the interface will be called round-robin from all peers once after start-up. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7724 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexControlRWIs_p.java | 2 +- htroot/WatchWebStructure_p.java | 6 +- htroot/WebStructurePicture_p.java | 4 +- htroot/api/webstructure.java | 25 +- htroot/yacy/idx.java | 84 +++++ htroot/yacy/idx.json | 10 + htroot/yacy/search.java | 3 +- .../de/anomic/search/MetadataRepository.java | 44 --- source/de/anomic/search/SearchEvent.java | 3 +- source/de/anomic/search/Switchboard.java | 4 +- .../yacy/graphics/WebStructureGraph.java | 318 +++++++++++++----- source/de/anomic/yacy/yacyClient.java | 3 +- source/de/anomic/yacy/yacyNetwork.java | 2 +- .../data/citation/CitationReferenceRow.java | 2 +- .../data/image/ImageReferenceRow.java | 2 +- .../navigation/NavigationReferenceRow.java | 2 +- .../data/word/WordReferenceFactory.java | 99 ++++++ .../kelondro/data/word/WordReferenceRow.java | 8 +- .../kelondro/data/word/WordReferenceVars.java | 11 +- source/net/yacy/kelondro/index/Row.java | 14 +- .../yacy/kelondro/index/RowCollection.java | 2 +- .../yacy/kelondro/rwi/AbstractReference.java | 6 + .../yacy/kelondro/rwi/ReferenceContainer.java | 80 ----- 23 files changed, 480 insertions(+), 254 deletions(-) create mode 100644 htroot/yacy/idx.java create mode 100644 htroot/yacy/idx.json diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 804a131d3..08b15ee4d 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -279,7 +279,7 @@ public class IndexControlRWIs_p { } // make an indexContainerCache - ReferenceContainerCache icc = new ReferenceContainerCache(Segment.wordReferenceFactory, index.rowdef, Segment.wordOrder); + ReferenceContainerCache icc = new ReferenceContainerCache(Segment.wordReferenceFactory, index.row(), Segment.wordOrder); try { icc.add(index); } catch (RowSpaceExceededException e) { diff --git a/htroot/WatchWebStructure_p.java b/htroot/WatchWebStructure_p.java index 6bc988b98..715dd2418 100644 --- a/htroot/WatchWebStructure_p.java +++ b/htroot/WatchWebStructure_p.java @@ -5,6 +5,7 @@ // import net.yacy.cora.protocol.RequestHeader; +import net.yacy.kelondro.data.meta.DigestURI; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlSwitchboard; @@ -65,7 +66,10 @@ public class WatchWebStructure_p { } // find start point - if ((host == null) || (host.length() == 0) || (host.equals("auto"))) { + if (host == null || + host.length() == 0 || + host.equals("auto") || + sb.webStructure.referencesCount(DigestURI.hosthash6(host)) == 0) { // find domain with most references besthost = sb.webStructure.hostWithMaxReferences(); } else { diff --git a/htroot/WebStructurePicture_p.java b/htroot/WebStructurePicture_p.java index 966ab461d..b40c2d887 100644 --- a/htroot/WebStructurePicture_p.java +++ b/htroot/WebStructurePicture_p.java @@ -140,7 +140,7 @@ public class WebStructurePicture_p { if (nextlayer == maxlayer) return mynodes; nextlayer++; final double radius = 1.0 / (1 << nextlayer); - WebStructureGraph.structureEntry sr = structure.outgoingReferences(centerhash); + WebStructureGraph.StructureEntry sr = structure.outgoingReferences(centerhash); final Map next = (sr == null) ? new HashMap() : sr.references; Map.Entry entry; String targethash, targethost; @@ -153,7 +153,7 @@ public class WebStructurePicture_p { while ((i.hasNext()) && (maxnodes > 0) && (System.currentTimeMillis() < timeout)) { entry = i.next(); targethash = entry.getKey(); - targethost = structure.resolveDomHash2DomString(targethash); + targethost = structure.hostHash2hostName(targethash); if (targethost == null) continue; thisrefs = entry.getValue().intValue(); targetrefs = structure.referencesCount(targethash); // can be cpu/time-critical diff --git a/htroot/api/webstructure.java b/htroot/api/webstructure.java index bba5cbc5a..428575ea4 100644 --- a/htroot/api/webstructure.java +++ b/htroot/api/webstructure.java @@ -40,8 +40,7 @@ public class webstructure { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { final serverObjects prop = new serverObjects(); final Switchboard sb = (Switchboard) env; - final boolean latest = ((post == null) ? false : post.containsKey("latest")); - String about = ((post == null) ? null : post.get("about", null)); + String about = post == null ? null : post.get("about", null); prop.put("out", 0); prop.put("in", 0); if (about != null) { @@ -55,7 +54,7 @@ public class webstructure { } } if (url != null && about != null) { - WebStructureGraph.structureEntry sentry = sb.webStructure.outgoingReferences(about); + WebStructureGraph.StructureEntry sentry = sb.webStructure.outgoingReferences(about); if (sentry != null) { reference(prop, "out", 0, sentry, sb.webStructure); prop.put("out_domains", 1); @@ -74,10 +73,12 @@ public class webstructure { prop.put("in", 1); } } - } else { - final Iterator i = sb.webStructure.structureEntryIterator(latest); + } else if (sb.adminAuthenticated(header) >= 2) { + // show a complete list of link structure informations in case that the user is authenticated + final boolean latest = ((post == null) ? false : post.containsKey("latest")); + final Iterator i = sb.webStructure.structureEntryIterator(latest); int c = 0; - WebStructureGraph.structureEntry sentry; + WebStructureGraph.StructureEntry sentry; while (i.hasNext()) { sentry = i.next(); reference(prop, "out", c, sentry, sb.webStructure); @@ -86,6 +87,10 @@ public class webstructure { prop.put("out_domains", c); prop.put("out", 1); if (latest) sb.webStructure.joinOldNew(); + } else { + // not-authenticated users show nothing + prop.put("out_domains", 0); + prop.put("out", 1); } prop.put("out_maxref", WebStructureGraph.maxref); prop.put("maxhosts", WebStructureGraph.maxhosts); @@ -94,9 +99,9 @@ public class webstructure { return prop; } - public static void reference(serverObjects prop, String prefix, int c, WebStructureGraph.structureEntry sentry, WebStructureGraph ws) { - prop.put(prefix + "_domains_" + c + "_hash", sentry.domhash); - prop.put(prefix + "_domains_" + c + "_domain", sentry.domain); + public static void reference(serverObjects prop, String prefix, int c, WebStructureGraph.StructureEntry sentry, WebStructureGraph ws) { + prop.put(prefix + "_domains_" + c + "_hash", sentry.hosthash); + prop.put(prefix + "_domains_" + c + "_domain", sentry.hostname); prop.put(prefix + "_domains_" + c + "_date", sentry.date); Iterator> k = sentry.references.entrySet().iterator(); Map.Entry refentry; @@ -106,7 +111,7 @@ public class webstructure { refloop: while (k.hasNext()) { refentry = k.next(); refhash = refentry.getKey(); - refdom = ws.resolveDomHash2DomString(refhash); + refdom = ws.hostHash2hostName(refhash); if (refdom == null) continue refloop; prop.put(prefix + "_domains_" + c + "_citations_" + d + "_refhash", refhash); prop.put(prefix + "_domains_" + c + "_citations_" + d + "_refdom", refdom); diff --git a/htroot/yacy/idx.java b/htroot/yacy/idx.java new file mode 100644 index 000000000..09a4a4cce --- /dev/null +++ b/htroot/yacy/idx.java @@ -0,0 +1,84 @@ +/** + * idx + * Copyright 2011 by Michael Peter Christen + * First released 16.05.2011 at http://yacy.net + * + * $LastChangedDate: 2011-03-08 02:51:51 +0100 (Di, 08 Mrz 2011) $ + * $LastChangedRevision: 7567 $ + * $LastChangedBy: low012 $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +import java.util.Iterator; + +import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.RequestHeader; +import net.yacy.kelondro.rwi.ReferenceContainer; +import net.yacy.kelondro.rwi.ReferenceContainerCache; +import de.anomic.search.Switchboard; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyNetwork; +import de.anomic.yacy.graphics.WebStructureGraph; +import de.anomic.yacy.graphics.WebStructureGraph.HostReference; + +public final class idx { + + // example: + // http://localhost:8090/yacy/idx.json?object=host + + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { + if (post == null || env == null) { return null; } + + // return variable that accumulates replacements + final Switchboard sb = (Switchboard) env; + final serverObjects prop = new serverObjects(); + + prop.put("list", 0); + prop.put("rowdef",""); + prop.put("name",""); + + if (sb.adminAuthenticated(header) < 2 && !yacyNetwork.authentifyRequest(post, env)) { + return prop; + } + + if (post.get("object", "").equals("host")) { + prop.put("name","host"); + ReferenceContainerCache idx = sb.webStructure.incomingReferences(); + prop.put("rowdef", WebStructureGraph.hostReferenceFacory.getRow().toString()); + int count = 0; + for (ReferenceContainer references: idx) { + prop.put("list_" + count + "_term", UTF8.String(references.getTermHash())); + Iterator referenceIterator = references.entries(); + StringBuilder s = new StringBuilder(); + HostReference reference; + while (referenceIterator.hasNext()) { + reference = referenceIterator.next(); + s.append(reference.toPropertyForm()); + if (referenceIterator.hasNext()) s.append(","); + } + prop.put("list_" + count + "_references", s.toString()); + prop.put("list_" + count + "_comma", 1); + count++; + } + prop.put("list_" + (count-1) + "_comma", 0); + prop.put("list", count); + } + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacy/idx.json b/htroot/yacy/idx.json new file mode 100644 index 000000000..9ce5b4d5f --- /dev/null +++ b/htroot/yacy/idx.json @@ -0,0 +1,10 @@ +{ +"version":"#[version]#", +"uptime":"#[uptime]#", +"name":"#[name]#", +"rowdef":"#[rowdef]#", +"idx":{ +#{list}#"#[term]#":[#[references]#]#(comma)#::,#(/comma)# +#{/list}# +} +} \ No newline at end of file diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 4fe5cc154..312b05434 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -45,6 +45,7 @@ import net.yacy.cora.storage.ScoreMap; import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.WordReference; +import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.order.Bitfield; @@ -260,7 +261,7 @@ public final class search { indexabstract.append("indexabstract."); indexabstract.append(UTF8.String(wordhash)); indexabstract.append("="); - indexabstract.append(ReferenceContainer.compressIndex(container, null, 1000).toString()); + indexabstract.append(WordReferenceFactory.compressIndex(container, null, 1000).toString()); indexabstract.append(serverCore.CRLF_STRING); } } diff --git a/source/de/anomic/search/MetadataRepository.java b/source/de/anomic/search/MetadataRepository.java index 5c8334c33..6e7f49e83 100644 --- a/source/de/anomic/search/MetadataRepository.java +++ b/source/de/anomic/search/MetadataRepository.java @@ -35,11 +35,8 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; -import java.util.List; import java.util.Map; -import java.util.TreeMap; import java.util.TreeSet; -import java.util.concurrent.BlockingQueue; import de.anomic.crawler.CrawlStacker; @@ -59,7 +56,6 @@ import net.yacy.kelondro.index.Index; import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.table.SplitTable; import net.yacy.repository.Blacklist; @@ -153,46 +149,6 @@ public final class MetadataRepository implements Iterable { return null; } } - - public void load(final WeakPriorityBlockingQueue obrwis, int maxcount, long maxtime, final BlockingQueue rows) { - if (urlIndexFile == null) return; - if (obrwis == null) return; - final Map> collector = new TreeMap>(Base64Order.enhancedCoder); - final List collectOrder = new ArrayList(); - int count = 0; - long timelimit = System.currentTimeMillis() + maxtime; - WeakPriorityBlockingQueue.Element obrwi; - byte[] urlHash; - while (System.currentTimeMillis() < timelimit && count < maxcount) { - try { - obrwi = obrwis.take(); - } catch (InterruptedException e) { - break; - } - if (obrwi != null) { - urlHash = obrwi.getElement().metadataHash(); - if (urlHash != null) { - collector.put(urlHash, obrwi); - collectOrder.add(urlHash); - count++; - } - } - } - - try { - Map resultmap = urlIndexFile.get(collector.keySet()); - } catch (final IOException e) { - return; - } catch (InterruptedException e) { - return; - } - - for (byte[] hash: collectOrder) { - WeakPriorityBlockingQueue.Element element = collector.get(hash); - if (element == null) continue; - - } - } public void store(final URIMetadataRow entry) throws IOException { // Check if there is a more recent Entry already in the DB diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 63d95af76..6eb0884a5 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -39,6 +39,7 @@ import net.yacy.cora.document.UTF8; import net.yacy.cora.storage.ScoreMap; import net.yacy.document.LargeNumberCache; import net.yacy.kelondro.data.word.WordReference; +import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.rwi.ReferenceContainer; @@ -185,7 +186,7 @@ public final class SearchEvent { IAneardhthash = wordhash; } IACount.put(wordhash, LargeNumberCache.valueOf(container.size())); - IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString()); + IAResults.put(wordhash, WordReferenceFactory.compressIndex(container, null, 1000).toString()); } EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankingProcess.searchContainerMap().size(), System.currentTimeMillis() - timer), false); } else { diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index f0713b5a8..9156b82da 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -397,7 +397,7 @@ public final class Switchboard extends serverSwitch { this.proxyLastAccess = System.currentTimeMillis() - 10000; this.localSearchLastAccess = System.currentTimeMillis() - 10000; this.remoteSearchLastAccess = System.currentTimeMillis() - 10000; - this.webStructure = new WebStructureGraph(log, new File(queuesRoot, "webStructure.map")); + this.webStructure = new WebStructureGraph(new File(queuesRoot, "webStructure.map")); // configuring list path if (!(listsPath.exists())) { @@ -938,7 +938,7 @@ public final class Switchboard extends serverSwitch { 10000); // create new web structure - this.webStructure = new WebStructureGraph(log, new File(queuesRoot, "webStructure.map")); + this.webStructure = new WebStructureGraph(new File(queuesRoot, "webStructure.map")); // load domainList diff --git a/source/de/anomic/yacy/graphics/WebStructureGraph.java b/source/de/anomic/yacy/graphics/WebStructureGraph.java index 8a2578187..216144094 100644 --- a/source/de/anomic/yacy/graphics/WebStructureGraph.java +++ b/source/de/anomic/yacy/graphics/WebStructureGraph.java @@ -29,6 +29,9 @@ package de.anomic.yacy.graphics; import java.io.File; import java.io.IOException; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -47,38 +50,46 @@ import net.yacy.cora.document.UTF8; import net.yacy.document.Condenser; import net.yacy.document.Document; import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.index.Row; +import net.yacy.kelondro.index.Row.Entry; +import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.Base64Order; +import net.yacy.kelondro.order.MicroDate; +import net.yacy.kelondro.rwi.AbstractReference; +import net.yacy.kelondro.rwi.Reference; +import net.yacy.kelondro.rwi.ReferenceContainer; +import net.yacy.kelondro.rwi.ReferenceContainerCache; +import net.yacy.kelondro.rwi.ReferenceFactory; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.LookAheadIterator; public class WebStructureGraph { - public static int maxCRLDump = 500000; - public static int maxCRGDump = 200000; public static int maxref = 300; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia) public static int maxhosts = 20000; // maximum number of hosts in web structure map - private final Log log; + private final static Log log = new Log("WebStructureGraph"); + private final File structureFile; private final TreeMap structure_old; // ',' to {}* private final TreeMap structure_new; private final BlockingQueue publicRefDNSResolvingQueue; - private final publicRefDNSResolvingProcess publicRefDNSResolvingWorker; + private final PublicRefDNSResolvingProcess publicRefDNSResolvingWorker; private final static leanrefObject leanrefObjectPOISON = new leanrefObject(null, null); private static class leanrefObject { - public final DigestURI url; - public final Set globalRefURLs; - public leanrefObject(final DigestURI url, final Set globalRefURLs) { + private final DigestURI url; + private final Set globalRefURLs; + private leanrefObject(final DigestURI url, final Set globalRefURLs) { this.url = url; this.globalRefURLs = globalRefURLs; } } - public WebStructureGraph(final Log log, final File structureFile) { - this.log = log; + public WebStructureGraph(final File structureFile) { this.structure_old = new TreeMap(); this.structure_new = new TreeMap(); this.structureFile = structureFile; @@ -110,12 +121,12 @@ public class WebStructureGraph { delcount--; } } - this.publicRefDNSResolvingWorker = new publicRefDNSResolvingProcess(); + this.publicRefDNSResolvingWorker = new PublicRefDNSResolvingProcess(); this.publicRefDNSResolvingWorker.start(); } - private class publicRefDNSResolvingProcess extends Thread { - public publicRefDNSResolvingProcess() { + private class PublicRefDNSResolvingProcess extends Thread { + private PublicRefDNSResolvingProcess() { } public void run() { leanrefObject lro; @@ -155,7 +166,7 @@ public class WebStructureGraph { } } - public void learnrefs(final leanrefObject lro) { + private void learnrefs(final leanrefObject lro) { final StringBuilder cpg = new StringBuilder(240); assert cpg.length() % 12 == 0 : "cpg.length() = " + cpg.length() + ", cpg = " + cpg.toString(); final String refhashp = UTF8.String(lro.url.hash(), 6, 6); // ref hash part @@ -224,20 +235,20 @@ public class WebStructureGraph { return s.toString(); } - public structureEntry outgoingReferences(final String domhash) { - // returns a map with a domhash(String):refcount(Integer) relation - assert domhash.length() == 6; + public StructureEntry outgoingReferences(final String hosthash) { + // returns a map with a hosthash(String):refcount(Integer) relation + assert hosthash.length() == 6; SortedMap tailMap; Map h = new HashMap(); - String domain = ""; + String hostname = ""; String date = ""; String ref; synchronized (structure_old) { - tailMap = structure_old.tailMap(domhash); + tailMap = structure_old.tailMap(hosthash); if (!tailMap.isEmpty()) { final String key = tailMap.firstKey(); - if (key.startsWith(domhash)) { - domain = key.substring(7); + if (key.startsWith(hosthash)) { + hostname = key.substring(7); ref = tailMap.get(key); date = ref.substring(0, 8); h = refstr2map(ref); @@ -245,82 +256,220 @@ public class WebStructureGraph { } } synchronized (structure_new) { - tailMap = structure_new.tailMap(domhash); + tailMap = structure_new.tailMap(hosthash); if (!tailMap.isEmpty()) { final String key = tailMap.firstKey(); - if (key.startsWith(domhash)) { + if (key.startsWith(hosthash)) { ref = tailMap.get(key); - if (domain.length() == 0) domain = key.substring(7); + if (hostname.length() == 0) hostname = key.substring(7); if (date.length() == 0) date = ref.substring(0, 8); - assert domain.equals(key.substring(7)) : "domain = " + domain + ", key = " + key; h.putAll(refstr2map(ref)); } } } if (h.isEmpty()) return null; - return new structureEntry(domhash, domain, date, h); + return new StructureEntry(hosthash, hostname, date, h); } - public structureEntry incomingReferences(final String domhash) { - String host = resolveDomHash2DomString(domhash); - if (host == null) return null; + public StructureEntry incomingReferences(final String hosthash) { + String hostname = hostHash2hostName(hosthash); + if (hostname == null) return null; // collect the references - WebStructureGraph.structureEntry sentry; - HashMap domhashes = new HashMap(); - Iterator i = structureEntryIterator(false); + WebStructureGraph.StructureEntry sentry; + HashMap hosthashes = new HashMap(); + Iterator i = new StructureIterator(false); while (i.hasNext()) { sentry = i.next(); - if (sentry.references.containsKey(domhash)) domhashes.put(sentry.domhash, sentry.references.get(domhash)); + if (sentry.references.containsKey(hosthash)) hosthashes.put(sentry.hosthash, sentry.references.get(hosthash)); } - i = structureEntryIterator(true); + i = new StructureIterator(true); while (i.hasNext()) { sentry = i.next(); - if (sentry.references.containsKey(domhash)) domhashes.put(sentry.domhash, sentry.references.get(domhash)); + if (sentry.references.containsKey(hosthash)) hosthashes.put(sentry.hosthash, sentry.references.get(hosthash)); } // construct a new structureEntry Object - return new structureEntry( - domhash, - host, + return new StructureEntry( + hosthash, + hostname, GenericFormatter.SHORT_DAY_FORMATTER.format(), - domhashes); + hosthashes); + } + + public static class HostReferenceFactory implements ReferenceFactory { + + private static final Row hostReferenceRow = new Row("String h-6, Cardinal m-4 {b256}, Cardinal c-4 {b256}", Base64Order.enhancedCoder); + + public HostReferenceFactory() { + } + + public Row getRow() { + return hostReferenceRow; + } + + public HostReference produceSlow(Entry e) { + return new HostReference(e); + } + + public HostReference produceFast(HostReference e) { + return e; + } + + } + + public static class HostReference extends AbstractReference implements Reference { + + private final Row.Entry entry; + + public HostReference(final byte[] hostHash, final long modified, final int count) { + assert (hostHash.length == 6) : "hostHash = " + UTF8.String(hostHash); + this.entry = hostReferenceFacory.getRow().newEntry(); + this.entry.setCol(0, hostHash); + this.entry.setCol(1, MicroDate.microDateDays(modified)); + this.entry.setCol(2, count); + } + + public HostReference(Row.Entry entry) { + this.entry = entry; + } + + public String toPropertyForm() { + return this.entry.toPropertyForm(':', true, true, false, true); + } + + public Entry toKelondroEntry() { + return this.entry; + } + + public byte[] metadataHash() { + return this.entry.getPrimaryKeyBytes(); + } + + public int count() { + return (int) this.entry.getColLong(2); + } + + public long lastModified() { + return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(1)); + } + + public void join(final Reference r) { + // joins two entries into one entry + HostReference oe = (HostReference) r; + + // combine date + long o = oe.lastModified(); + if (this.lastModified() < o) this.entry.setCol(1, MicroDate.microDateDays(o)); + + // combine count + int c = oe.count(); + if (this.count() < c) this.entry.setCol(2, c); + } + + public Collection positions() { + return new ArrayList(0); + } } - public HashMap incomingDomains(final String domhash) { + public static final HostReferenceFactory hostReferenceFacory = new HostReferenceFactory(); + public static ReferenceContainerCache hostReferenceIndexCache = null; + public static long hostReferenceIndexCacheTime = 0; + public static final long hostReferenceIndexCacheTTL = 1000 * 60 * 60 * 12; // 12 hours time to live for cache + + public synchronized ReferenceContainerCache incomingReferences() { + // we return a cache if the cache is filled and not stale + if (hostReferenceIndexCache != null && + hostReferenceIndexCacheTime + hostReferenceIndexCacheTTL > System.currentTimeMillis()) return hostReferenceIndexCache; + // collect the references - WebStructureGraph.structureEntry sentry; - HashMap domains = new HashMap(); - Iterator i = structureEntryIterator(false); - while (i.hasNext()) { - sentry = i.next(); - if (sentry.references.containsKey(domhash)) domains.put(sentry.domain, sentry.references.get(domhash)); + HostReferenceFactory hostReferenceFactory = new HostReferenceFactory(); + ReferenceContainerCache idx = new ReferenceContainerCache(hostReferenceFactory, hostReferenceFactory.getRow(), Base64Order.enhancedCoder); + + // we iterate over all structure entries. + // one structure entry has information that a specific host links to a list of other hosts + incomingReferencesEnrich(idx, new StructureIterator(false), 3000); + incomingReferencesEnrich(idx, new StructureIterator(true), 3000); + + // fill the cache again and set fill time + hostReferenceIndexCache = idx; + hostReferenceIndexCacheTime = System.currentTimeMillis(); + //incomingReferencesTest(hostReferenceIndexCache); + return hostReferenceIndexCache; + } + + private void incomingReferencesEnrich( + ReferenceContainerCache idx, + Iterator structureIterator, + long time) { + // we iterate over all structure entries. + // one structure entry has information that a specific host links to a list of other hosts + long timeout = System.currentTimeMillis() + time; + byte[] term; + HostReference hr; + WebStructureGraph.StructureEntry sentry; + structureLoop: while (structureIterator.hasNext()) { + sentry = structureIterator.next(); + // then we loop over all the hosts that are linked from sentry.hosthash + refloop: for (Map.Entry refhosthashandcounter: sentry.references.entrySet()) { + term = UTF8.getBytes(refhosthashandcounter.getKey()); + try { + hr = new HostReference(UTF8.getBytes(sentry.hosthash), GenericFormatter.SHORT_DAY_FORMATTER.parse(sentry.date).getTime(), refhosthashandcounter.getValue().intValue()); + } catch (ParseException e) { + continue refloop; + } + // each term refers to an index entry. look if we already have such an entry + ReferenceContainer r = idx.get(term, null); + try { + if (r == null) { + r = new ReferenceContainer(hostReferenceFacory, term); + r.add(hr); + idx.add(r); + } else { + r.put(hr); + } + } catch (RowSpaceExceededException e) { + continue refloop; + } + } + if (System.currentTimeMillis() > timeout) break structureLoop; } - i = structureEntryIterator(true); - while (i.hasNext()) { - sentry = i.next(); - if (sentry.references.containsKey(domhash)) domains.put(sentry.domain, sentry.references.get(domhash)); + } + + /* + private void incomingReferencesTest(ReferenceContainerCache idx) { + for (ReferenceContainer references: idx) { + log.logInfo("Term-Host: " + hostHash2hostName(UTF8.String(references.getTermHash()))); + Iterator referenceIterator = references.entries(); + StringBuilder s = new StringBuilder(); + HostReference reference; + while (referenceIterator.hasNext()) { + reference = referenceIterator.next(); + s.append(reference.toPropertyForm()); + log.logInfo(" ... referenced by " + hostHash2hostName(UTF8.String(reference.metadataHash())) + ", " + reference.count() + " references"); + } } - return domains; } + */ - public int referencesCount(final String domhash) { - // returns the number of domains that are referenced by this domhash - assert domhash.length() == 6 : "domhash = " + domhash; + public int referencesCount(final String hosthash) { + // returns the number of hosts that are referenced by this hosthash + assert hosthash.length() == 6 : "hosthash = " + hosthash; + if (hosthash == null || hosthash.length() != 6) return 0; SortedMap tailMap; int c = 0; synchronized (structure_old) { - tailMap = structure_old.tailMap(domhash); + tailMap = structure_old.tailMap(hosthash); if (!tailMap.isEmpty()) { final String key = tailMap.firstKey(); - if (key.startsWith(domhash)) { + if (key.startsWith(hosthash)) { c = refstr2count(tailMap.get(key)); } } } synchronized (structure_new) { - tailMap = structure_new.tailMap(domhash); + tailMap = structure_new.tailMap(hosthash); if (!tailMap.isEmpty()) { final String key = tailMap.firstKey(); - if (key.startsWith(domhash)) { + if (key.startsWith(hosthash)) { c += refstr2count(tailMap.get(key)); } } @@ -328,24 +477,24 @@ public class WebStructureGraph { return c; } - public String resolveDomHash2DomString(final String domhash) { - // returns the domain as string, null if unknown - assert domhash.length() == 6; + public String hostHash2hostName(final String hosthash) { + // returns the host as string, null if unknown + assert hosthash.length() == 6; SortedMap tailMap; synchronized(structure_old) { - tailMap = structure_old.tailMap(domhash); + tailMap = structure_old.tailMap(hosthash); if (!tailMap.isEmpty()) { final String key = tailMap.firstKey(); - if (key.startsWith(domhash)) { + if (key.startsWith(hosthash)) { return key.substring(7); } } } synchronized(structure_new) { - tailMap = structure_new.tailMap(domhash); + tailMap = structure_new.tailMap(hosthash); if (!tailMap.isEmpty()) { final String key = tailMap.firstKey(); - if (key.startsWith(domhash)) { + if (key.startsWith(hosthash)) { return key.substring(7); } } @@ -354,10 +503,10 @@ public class WebStructureGraph { } private void learn(final DigestURI url, final StringBuilder reference /*string of b64(12digits)-hashes*/) { - final String domhash = UTF8.String(url.hash(), 6, 6); + final String hosthash = UTF8.String(url.hash(), 6, 6); // parse the new reference string and join it with the stored references - structureEntry structure = outgoingReferences(domhash); + StructureEntry structure = outgoingReferences(hosthash); final Map refs = (structure == null) ? new HashMap() : structure.references; assert reference.length() % 12 == 0 : "reference.length() = " + reference.length() + ", reference = " + reference.toString(); String dom; @@ -394,7 +543,7 @@ public class WebStructureGraph { // store the map back to the structure synchronized(structure_new) { - structure_new.put(domhash + "," + url.getHost(), map2refstr(refs)); + structure_new.put(hosthash + "," + url.getHost(), map2refstr(refs)); } } @@ -424,7 +573,7 @@ public class WebStructureGraph { } } - public void saveWebStructure() { + private void saveWebStructure() { joinOldNew(); try { synchronized(structure_old) { @@ -436,7 +585,7 @@ public class WebStructureGraph { } public String hostWithMaxReferences() { - // find domain with most references + // find host with most references String maxhost = null; int refsize, maxref = 0; joinOldNew(); @@ -452,20 +601,19 @@ public class WebStructureGraph { return maxhost; } - public Iterator structureEntryIterator(final boolean latest) { - // iterates objects of type structureEntry - return new structureIterator(latest); + public Iterator structureEntryIterator(final boolean latest) { + return new StructureIterator(latest); } - public class structureIterator extends LookAheadIterator implements Iterator { + private class StructureIterator extends LookAheadIterator implements Iterator { private final Iterator> i; - public structureIterator(final boolean latest) { + private StructureIterator(final boolean latest) { i = ((latest) ? structure_new : structure_old).entrySet().iterator(); } - public structureEntry next0() { + public StructureEntry next0() { Map.Entry entry = null; String dom = null, ref = ""; while (i.hasNext()) { @@ -478,20 +626,22 @@ public class WebStructureGraph { } if (entry == null || dom == null) return null; assert (ref.length() - 8) % 10 == 0 : "refs = " + ref + ", length = " + ref.length(); - return new structureEntry(dom.substring(0, 6), dom.substring(7), ref.substring(0, 8), refstr2map(ref)); + return new StructureEntry(dom.substring(0, 6), dom.substring(7), ref.substring(0, 8), refstr2map(ref)); } } - public static class structureEntry { - public String domhash, domain, date; - public Map references; - public structureEntry( - final String domhash, - final String domain, + public static class StructureEntry { + public String hosthash; // the tail of the host hash + public String hostname; // the host name + public String date; // date of latest change + public Map references; // a map from the referenced host hash to the number of referenced to that host + private StructureEntry( + final String hosthash, + final String hostname, final String date, final Map references) { - this.domhash = domhash; - this.domain = domain; + this.hosthash = hosthash; + this.hostname = hostname; this.date = date; this.references = references; } diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 0c6f6d2ad..25091a1e0 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -69,6 +69,7 @@ import net.yacy.cora.services.federated.opensearch.SRURSSConnector; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; +import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; @@ -571,7 +572,7 @@ public final class yacyClient { continue; } whacc += wordhash; - secondarySearchSuperviser.addAbstract(wordhash, ReferenceContainer.decompressIndex(ci, target.hash)); + secondarySearchSuperviser.addAbstract(wordhash, WordReferenceFactory.decompressIndex(ci, target.hash)); ac++; } diff --git a/source/de/anomic/yacy/yacyNetwork.java b/source/de/anomic/yacy/yacyNetwork.java index 063f0ad4f..b4f53d5cc 100644 --- a/source/de/anomic/yacy/yacyNetwork.java +++ b/source/de/anomic/yacy/yacyNetwork.java @@ -42,7 +42,7 @@ import de.anomic.server.serverSwitch; public class yacyNetwork { public static final boolean authentifyRequest(final serverObjects post, final serverSwitch env) { - if ((post == null) || (env == null)) return false; + if (post == null || env == null) return false; // identify network final String unitName = post.get(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT); // the network unit diff --git a/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java b/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java index 70b4fad74..cd1b7ffc0 100644 --- a/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java +++ b/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java @@ -129,7 +129,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ { } public String toPropertyForm() { - return entry.toPropertyForm(true, true, false); + return entry.toPropertyForm('=', true, true, false, false); } public Entry toKelondroEntry() { diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java index 500dbb23f..fc2cdcfe3 100644 --- a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java +++ b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java @@ -204,7 +204,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag } public String toPropertyForm() { - return entry.toPropertyForm(true, true, false); + return entry.toPropertyForm('=', true, true, false, false); } public Entry toKelondroEntry() { diff --git a/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java b/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java index dd7ed4ff6..2ff10e30c 100644 --- a/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java +++ b/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java @@ -111,7 +111,7 @@ public final class NavigationReferenceRow extends AbstractReference implements N } public String toPropertyForm() { - return entry.toPropertyForm(true, true, false); + return entry.toPropertyForm('=', true, true, false, false); } public Entry toKelondroEntry() { diff --git a/source/net/yacy/kelondro/data/word/WordReferenceFactory.java b/source/net/yacy/kelondro/data/word/WordReferenceFactory.java index a01d3ce5a..686b3862f 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceFactory.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceFactory.java @@ -26,9 +26,16 @@ package net.yacy.kelondro.data.word; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; + +import net.yacy.cora.document.UTF8; import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row.Entry; +import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceFactory; +import net.yacy.kelondro.util.ByteBuffer; public class WordReferenceFactory implements ReferenceFactory { @@ -45,4 +52,96 @@ public class WordReferenceFactory implements ReferenceFactory { return WordReferenceRow.urlEntryRow; } + /** + * create an index abstract for a given WordReference ReferenceContainer + * This extracts all the host hashes from a reference Container and returns a byte buffer + * with a compressed representation of the host references + * @param + * @param inputContainer + * @param excludeContainer + * @param maxtime + * @return + */ + public static final ByteBuffer compressIndex(final ReferenceContainer inputContainer, final ReferenceContainer excludeContainer, final long maxtime) { + // collect references according to domains + final long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; + final TreeMap doms = new TreeMap(); + synchronized (inputContainer) { + final Iterator i = inputContainer.entries(); + WordReference iEntry; + String dom, mod; + StringBuilder paths; + while (i.hasNext()) { + iEntry = i.next(); + if ((excludeContainer != null) && (excludeContainer.getReference(iEntry.metadataHash()) != null)) continue; // do not include urls that are in excludeContainer + dom = UTF8.String(iEntry.metadataHash(), 6, 6); + mod = UTF8.String(iEntry.metadataHash(), 0, 6); + if ((paths = doms.get(dom)) == null) { + doms.put(dom, new StringBuilder(30).append(mod)); + } else { + doms.put(dom, paths.append(mod)); + } + if (System.currentTimeMillis() > timeout) + break; + } + } + // construct a result string + final ByteBuffer bb = new ByteBuffer(inputContainer.size() * 6); + bb.append('{'); + final Iterator> i = doms.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = i.next(); + bb.append(entry.getKey()); + bb.append(':'); + bb.append(entry.getValue().toString()); + if (System.currentTimeMillis() > timeout) + break; + if (i.hasNext()) + bb.append(','); + } + bb.append('}'); + return bb; + } + + /** + * decompress an index abstract that was generated from a word index and transmitted over a network connection + * @param ci + * @param peerhash + * @return + */ + public static final TreeMap decompressIndex(ByteBuffer ci, final String peerhash) { + TreeMap target = new TreeMap(); + // target is a mapping from url-hashes to a string of peer-hashes + if (ci.byteAt(0) != '{' || ci.byteAt(ci.length() - 1) != '}') return target; + //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString()); + ci = ci.trim(1, ci.length() - 2); + String dom, url; + StringBuilder peers; + StringBuilder urlsb; + while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) { + assert ci.length() >= 6 : "ci.length() = " + ci.length(); + dom = ci.toStringBuilder(0, 6, 6).toString(); + ci.trim(7); + while ((ci.length() > 0) && (ci.byteAt(0) != ',')) { + assert ci.length() >= 6 : "ci.length() = " + ci.length(); + urlsb = ci.toStringBuilder(0, 6, 12); + urlsb.append(dom); + url = urlsb.toString(); + ci.trim(6); + + peers = target.get(url); + if (peers == null) { + peers = new StringBuilder(24); + peers.append(peerhash); + target.put(url, peers); + } else { + peers.append(peerhash); + } + //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url)); + } + if (ci.byteAt(0) == ',') ci.trim(1); + } + return target; + } } diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index 728730c94..19b20ab2e 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -229,7 +229,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef } public String toPropertyForm() { - return entry.toPropertyForm(true, true, false); + return entry.toPropertyForm('=', true, true, false, false); } public Entry toKelondroEntry() { @@ -322,12 +322,6 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return toPropertyForm(); } - public boolean isOlder(final Reference other) { - if (other == null) return false; - if (this.lastModified() < other.lastModified()) return true; - return false; - } - @Override public boolean equals(final Object obj) { if (this == obj) return true; diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index a20031359..10bfd22a2 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -196,11 +196,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc public Bitfield flags() { return flags; } -/* - public long freshUntil() { - return freshUntil; - } -*/ + public byte[] getLanguage() { return language; } @@ -213,11 +209,6 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc return hitcount; } - public boolean isOlder(final Reference other) { - assert false; // should not be used - return false; - } - public long lastModified() { return lastModified; } diff --git a/source/net/yacy/kelondro/index/Row.java b/source/net/yacy/kelondro/index/Row.java index 0091a6600..4f35713a7 100644 --- a/source/net/yacy/kelondro/index/Row.java +++ b/source/net/yacy/kelondro/index/Row.java @@ -79,7 +79,7 @@ public final class Row { this.objectOrder = objectOrder; // define row with row syntax // example: - //# Structure=,'=',,,,,,,,,, + //# Structure=,,,,,,,,,, // parse pivot definition: //does not work with 'String idx-26 "id = created + originator",String cat-8,String rec-14,short dis-2 {b64e},String att-462' @@ -553,22 +553,26 @@ public final class Row { System.arraycopy(rowinstance, offset + colstart[column], target, targetOffset, row[column].cellwidth); } - public final String toPropertyForm(final boolean includeBraces, final boolean decimalCardinal, final boolean longname) { + public final String toPropertyForm(final char propertySymbol, final boolean includeBraces, final boolean decimalCardinal, final boolean longname, final boolean quotes) { final ByteBuffer bb = new ByteBuffer(objectsize() * 2); if (includeBraces) bb.append('{'); for (int i = 0; i < row.length; i++) { + if (quotes) bb.append('"'); bb.append((longname) ? row[i].description : row[i].nickname); - bb.append('='); + if (quotes) bb.append('"'); + bb.append(propertySymbol); + if (quotes) bb.append('"'); if ((decimalCardinal) && (row[i].celltype == Column.celltype_cardinal)) { bb.append(Long.toString(getColLong(i))); } else if ((decimalCardinal) && (row[i].celltype == Column.celltype_bitfield)) { bb.append((new Bitfield(getColBytes(i, true))).exportB64()); } else if ((decimalCardinal) && (row[i].celltype == Column.celltype_binary)) { - assert row[i].cellwidth == 1; + assert row[i].cellwidth == 1 : toString(); bb.append(Integer.toString((0xff & getColByte(i)))); } else { bb.append(rowinstance, offset + colstart[i], row[i].cellwidth); } + if (quotes) bb.append('"'); if (i < row.length - 1) { bb.append(','); if (longname) bb.append(' '); @@ -581,7 +585,7 @@ public final class Row { @Override public final String toString() { - return toPropertyForm(true, false, false); + return toPropertyForm('=', true, false, false, false); } } diff --git a/source/net/yacy/kelondro/index/RowCollection.java b/source/net/yacy/kelondro/index/RowCollection.java index aa28f7d4c..5841f18e9 100644 --- a/source/net/yacy/kelondro/index/RowCollection.java +++ b/source/net/yacy/kelondro/index/RowCollection.java @@ -86,7 +86,7 @@ public class RowCollection implements Iterable, Cloneable { new ThreadPoolExecutor.CallerRunsPolicy()) : null; - public final Row rowdef; + protected final Row rowdef; protected byte[] chunkcache; protected int chunkcount; protected int sortBound; diff --git a/source/net/yacy/kelondro/rwi/AbstractReference.java b/source/net/yacy/kelondro/rwi/AbstractReference.java index 74b37ffc4..35cde10bb 100644 --- a/source/net/yacy/kelondro/rwi/AbstractReference.java +++ b/source/net/yacy/kelondro/rwi/AbstractReference.java @@ -108,4 +108,10 @@ public abstract class AbstractReference implements Reference { } return d / (positions().size() - 1); } + + public boolean isOlder(final Reference other) { + if (other == null) return false; + if (this.lastModified() < other.lastModified()) return true; + return false; + } } diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainer.java b/source/net/yacy/kelondro/rwi/ReferenceContainer.java index c2fe807d3..5f114698e 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainer.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainer.java @@ -30,7 +30,6 @@ import java.lang.reflect.Method; import java.util.Collection; import java.util.ConcurrentModificationException; import java.util.Iterator; -import java.util.Map; import java.util.TreeMap; import net.yacy.cora.document.UTF8; @@ -41,7 +40,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.ByteOrder; -import net.yacy.kelondro.util.ByteBuffer; /** @@ -513,82 +511,4 @@ public class ReferenceContainer extends RowSet return (int) Base64Order.enhancedCoder.decodeLong(this.termHash, 0, 4); } - - public static final ByteBuffer compressIndex(final ReferenceContainer inputContainer, final ReferenceContainer excludeContainer, final long maxtime) { - // collect references according to domains - final long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; - final TreeMap doms = new TreeMap(); - synchronized (inputContainer) { - final Iterator i = inputContainer.entries(); - Reference iEntry; - String dom, mod; - StringBuilder paths; - while (i.hasNext()) { - iEntry = i.next(); - if ((excludeContainer != null) && (excludeContainer.getReference(iEntry.metadataHash()) != null)) continue; // do not include urls that are in excludeContainer - dom = UTF8.String(iEntry.metadataHash(), 6, 6); - mod = UTF8.String(iEntry.metadataHash(), 0, 6); - if ((paths = doms.get(dom)) == null) { - doms.put(dom, new StringBuilder(30).append(mod)); - } else { - doms.put(dom, paths.append(mod)); - } - if (System.currentTimeMillis() > timeout) - break; - } - } - // construct a result string - final ByteBuffer bb = new ByteBuffer(inputContainer.size() * 6); - bb.append('{'); - final Iterator> i = doms.entrySet().iterator(); - Map.Entry entry; - while (i.hasNext()) { - entry = i.next(); - bb.append(entry.getKey()); - bb.append(':'); - bb.append(entry.getValue().toString()); - if (System.currentTimeMillis() > timeout) - break; - if (i.hasNext()) - bb.append(','); - } - bb.append('}'); - return bb; - } - - public static final TreeMap decompressIndex(ByteBuffer ci, final String peerhash) { - TreeMap target = new TreeMap(); - // target is a mapping from url-hashes to a string of peer-hashes - if (ci.byteAt(0) != '{' || ci.byteAt(ci.length() - 1) != '}') return target; - //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString()); - ci = ci.trim(1, ci.length() - 2); - String dom, url; - StringBuilder peers; - StringBuilder urlsb; - while ((ci.length() >= 13) && (ci.byteAt(6) == ':')) { - assert ci.length() >= 6 : "ci.length() = " + ci.length(); - dom = ci.toStringBuilder(0, 6, 6).toString(); - ci.trim(7); - while ((ci.length() > 0) && (ci.byteAt(0) != ',')) { - assert ci.length() >= 6 : "ci.length() = " + ci.length(); - urlsb = ci.toStringBuilder(0, 6, 12); - urlsb.append(dom); - url = urlsb.toString(); - ci.trim(6); - - peers = target.get(url); - if (peers == null) { - peers = new StringBuilder(24); - peers.append(peerhash); - target.put(url, peers); - } else { - peers.append(peerhash); - } - //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url)); - } - if (ci.byteAt(0) == ',') ci.trim(1); - } - return target; - } - }