From 5b579e21a3405e45d5e6ce584f37ec441f923b62 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 13 May 2011 06:21:40 +0000 Subject: [PATCH] code cleanup git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7713 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Bookmarks.java | 3 +- htroot/CrawlResults.java | 2 +- htroot/CrawlStartScanner_p.java | 5 +- htroot/IndexControlRWIs_p.java | 6 +- htroot/IndexControlURLs_p.java | 8 +- htroot/Settings_p.java | 3 +- htroot/Table_API_p.java | 2 +- htroot/ViewFile.java | 2 +- htroot/api/yacydoc.java | 4 +- htroot/yacy/urls.java | 2 +- htroot/yacysearch.java | 2 +- source/de/anomic/crawler/CrawlStacker.java | 2 +- source/de/anomic/crawler/SitemapImporter.java | 2 +- .../de/anomic/data/ymark/YMarkMetadata.java | 2 +- .../de/anomic/search/MetadataRepository.java | 78 +++++++++++++++++-- source/de/anomic/search/RankingProcess.java | 8 +- source/de/anomic/search/Segment.java | 4 +- source/de/anomic/search/Switchboard.java | 4 +- source/de/anomic/yacy/dht/Transmission.java | 2 +- .../kelondro/index/BufferedObjectIndex.java | 13 ++++ source/net/yacy/kelondro/index/Cache.java | 12 ++- source/net/yacy/kelondro/index/Index.java | 3 + source/net/yacy/kelondro/index/RAMIndex.java | 14 +++- .../yacy/kelondro/index/RAMIndexCluster.java | 13 +++- source/net/yacy/kelondro/index/RowSet.java | 14 +++- source/net/yacy/kelondro/table/SQLTable.java | 14 +++- .../net/yacy/kelondro/table/SplitTable.java | 12 +++ source/net/yacy/kelondro/table/Table.java | 12 ++- source/net/yacy/migration.java | 4 +- source/net/yacy/yacy.java | 2 +- 30 files changed, 205 insertions(+), 49 deletions(-) diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index cfcc9aaf6..61c7e2c90 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -38,6 +38,7 @@ import java.util.Iterator; import java.util.Set; import net.yacy.cora.date.ISO8601Formatter; +import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.document.Document; @@ -195,7 +196,7 @@ public class Bookmarks { final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash); if (bookmark == null) { // try to get the bookmark from the LURL database - final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(urlHash.getBytes(), null, 0); + final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(UTF8.getBytes(urlHash)); if (urlentry != null) try { final URIMetadataRow.Components metadata = urlentry.metadata(); final Document document = Document.mergeDocuments(metadata.url(), null, sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CrawlProfile.CacheStrategy.IFEXIST, 5000, Long.MAX_VALUE)); diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index fff252d13..f20e7f8e6 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -182,7 +182,7 @@ public class CrawlResults { while (i.hasNext()) { entry = i.next(); try { - urle = sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(entry.getKey().getBytes(), null, 0); + urle = sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(UTF8.getBytes(entry.getKey())); if (urle == null) { Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey()); urlstr = null; diff --git a/htroot/CrawlStartScanner_p.java b/htroot/CrawlStartScanner_p.java index 7d02afd6a..3ad9fdba5 100644 --- a/htroot/CrawlStartScanner_p.java +++ b/htroot/CrawlStartScanner_p.java @@ -40,6 +40,7 @@ import net.yacy.kelondro.order.Base64Order; import de.anomic.data.WorkTables; import de.anomic.search.SearchEventCache; import de.anomic.search.Switchboard; +import de.anomic.search.SwitchboardConstants; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -155,7 +156,7 @@ public class CrawlStartScanner_p { if (url != null) { String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99"; path += "&crawlingURL=" + url.toNormalform(true, false); - WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig("adminAccountBase64MD5", ""), path, pk); + WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), path, pk); } } } @@ -189,7 +190,7 @@ public class CrawlStartScanner_p { if (host.getValue() == Access.granted && Scanner.inIndex(apiCommentCache, urlString) == null) { String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99"; path += "&crawlingURL=" + urlString; - WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig("adminAccountBase64MD5", ""), path, u.hash()); + WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), path, u.hash()); } i++; } catch (MalformedURLException e) { diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index fe8b831af..804a131d3 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -265,7 +265,7 @@ public class IndexControlRWIs_p { URIMetadataRow lurl; while (urlIter.hasNext()) { iEntry = urlIter.next(); - lurl = segment.urlMetadata().load(iEntry.metadataHash(), null, 0); + lurl = segment.urlMetadata().load(iEntry.metadataHash()); if (lurl == null) { try { unknownURLEntries.put(iEntry.metadataHash()); @@ -341,7 +341,7 @@ public class IndexControlRWIs_p { } catch (RowSpaceExceededException e) { Log.logException(e); } - final URIMetadataRow e = segment.urlMetadata().load(b, null, 0); + final URIMetadataRow e = segment.urlMetadata().load(b); segment.urlMetadata().remove(b); if (e != null) { url = e.metadata().url(); @@ -374,7 +374,7 @@ public class IndexControlRWIs_p { } catch (RowSpaceExceededException e) { Log.logException(e); } - final URIMetadataRow e = segment.urlMetadata().load(b, null, 0); + final URIMetadataRow e = segment.urlMetadata().load(b); segment.urlMetadata().remove(b); if (e != null) { url = e.metadata().url(); diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java index 881c3c056..f9d43fad3 100644 --- a/htroot/IndexControlURLs_p.java +++ b/htroot/IndexControlURLs_p.java @@ -148,7 +148,7 @@ public class IndexControlURLs_p { } if (post.containsKey("urlhashdelete")) { - final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes(), null, 0); + final URIMetadataRow entry = segment.urlMetadata().load(UTF8.getBytes(urlhash)); if (entry == null) { prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted."); } else { @@ -182,7 +182,7 @@ public class IndexControlURLs_p { final DigestURI url = new DigestURI(urlstring); urlhash = UTF8.String(url.hash()); prop.put("urlhash", urlhash); - final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes(), null, 0); + final URIMetadataRow entry = segment.urlMetadata().load(UTF8.getBytes(urlhash)); if (entry == null) { prop.putHTML("result", "No Entry for URL " + url.toNormalform(true, true)); prop.putHTML("urlstring", urlstring); @@ -200,7 +200,7 @@ public class IndexControlURLs_p { } if (post.containsKey("urlhashsearch")) { - final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes(), null, 0); + final URIMetadataRow entry = segment.urlMetadata().load(UTF8.getBytes(urlhash)); if (entry == null) { prop.putHTML("result", "No Entry for URL hash " + urlhash); } else { @@ -327,7 +327,7 @@ public class IndexControlURLs_p { return prop; } final URIMetadataRow.Components metadata = entry.metadata(); - final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash(), null, 0); + final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash()); if (metadata == null || metadata.url() == null) { prop.put("genUrlProfile", "1"); prop.put("genUrlProfile_urlhash", urlhash); diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java index 0e6fd50d5..08881c9a9 100644 --- a/htroot/Settings_p.java +++ b/htroot/Settings_p.java @@ -29,6 +29,7 @@ import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import de.anomic.search.Switchboard; +import de.anomic.search.SwitchboardConstants; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; @@ -95,7 +96,7 @@ public final class Settings_p { int pos; // admin password - if (env.getConfig("adminAccountBase64", "").length() == 0) { + if (env.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0) { // no password has been specified prop.put("adminuser","admin"); } else { diff --git a/htroot/Table_API_p.java b/htroot/Table_API_p.java index 754c35ad7..13c160895 100644 --- a/htroot/Table_API_p.java +++ b/htroot/Table_API_p.java @@ -146,7 +146,7 @@ public class Table_API_p { } // now call the api URLs and store the result status - final Map l = sb.tables.execAPICalls("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig("adminAccountBase64MD5", ""), pks); + final Map l = sb.tables.execAPICalls("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), pks); // construct result table prop.put("showexec", l.size() > 0 ? 1 : 0); diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 44428c053..b0dcb29f1 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -122,7 +122,7 @@ public class ViewFile { String urlHash = post.get("urlHash", ""); URIMetadataRow urlEntry = null; // get the urlEntry that belongs to the url hash - if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().load(urlHash.getBytes(), null, 0)) != null) { + if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().load(UTF8.getBytes(urlHash))) != null) { // get the url that belongs to the entry final URIMetadataRow.Components metadata = urlEntry.metadata(); if ((metadata == null) || (metadata.url() == null)) { diff --git a/htroot/api/yacydoc.java b/htroot/api/yacydoc.java index 1f1203a5a..eab3a0586 100644 --- a/htroot/api/yacydoc.java +++ b/htroot/api/yacydoc.java @@ -85,14 +85,14 @@ public class yacydoc { } if (urlhash == null || urlhash.length() == 0) return prop; - final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes(), null, 0); + final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes()); if (entry == null) return prop; final URIMetadataRow.Components metadata = entry.metadata(); if (metadata.url() == null) { return prop; } - final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash(), null, 0); + final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash()); prop.putXML("dc_title", metadata.dc_title()); prop.putXML("dc_creator", metadata.dc_creator()); diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java index f4b972377..76f12bcf6 100644 --- a/htroot/yacy/urls.java +++ b/htroot/yacy/urls.java @@ -113,7 +113,7 @@ public class urls { URIMetadataRow.Components metadata; DigestURI referrer; for (int i = 0; i < count; i++) { - entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(urlhashes.substring(12 * i, 12 * (i + 1)).getBytes(), null, 0); + entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(UTF8.getBytes(urlhashes.substring(12 * i, 12 * (i + 1)))); if (entry == null) continue; // find referrer, if there is one referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerHash()); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index ca3d3f0c5..095adafc3 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -503,7 +503,7 @@ public class yacysearch { return prop; } final String recommendHash = post.get("recommendref", ""); // urlhash - final URIMetadataRow urlentry = indexSegment.urlMetadata().load(recommendHash.getBytes(), null, 0); + final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(recommendHash)); if (urlentry != null) { final URIMetadataRow.Components metadata = urlentry.metadata(); Document[] documents = null; diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 85952dc9c..0766508b3 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -482,7 +482,7 @@ public final class CrawlStacker { // check if the url is double registered final String dbocc = nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists - URIMetadataRow oldEntry = indexSegment.urlMetadata().load(url.hash(), null, 0); + URIMetadataRow oldEntry = indexSegment.urlMetadata().load(url.hash()); if (oldEntry == null) { if (dbocc != null) { // do double-check diff --git a/source/de/anomic/crawler/SitemapImporter.java b/source/de/anomic/crawler/SitemapImporter.java index 0158e677c..384ab53e3 100644 --- a/source/de/anomic/crawler/SitemapImporter.java +++ b/source/de/anomic/crawler/SitemapImporter.java @@ -79,7 +79,7 @@ public class SitemapImporter extends Thread { final String dbocc = this.sb.urlExists(Segments.Process.LOCALCRAWLING, nexturlhash); if ((dbocc != null) && (dbocc.equalsIgnoreCase("loaded"))) { // the url was already loaded. we need to check the date - final URIMetadataRow oldEntry = this.sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(nexturlhash, null, 0); + final URIMetadataRow oldEntry = this.sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(nexturlhash); if (oldEntry != null) { final Date modDate = oldEntry.moddate(); // check if modDate is null diff --git a/source/de/anomic/data/ymark/YMarkMetadata.java b/source/de/anomic/data/ymark/YMarkMetadata.java index 74b7fcac3..fc0e64061 100644 --- a/source/de/anomic/data/ymark/YMarkMetadata.java +++ b/source/de/anomic/data/ymark/YMarkMetadata.java @@ -103,7 +103,7 @@ public class YMarkMetadata { public EnumMap getMetadata() { final EnumMap metadata = new EnumMap(METADATA.class); - final URIMetadataRow urlEntry = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(this.uri.hash(), null, 0); + final URIMetadataRow urlEntry = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(this.uri.hash()); if (urlEntry != null) { metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size())); metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate())); diff --git a/source/de/anomic/search/MetadataRepository.java b/source/de/anomic/search/MetadataRepository.java index 7acbdf68d..5c8334c33 100644 --- a/source/de/anomic/search/MetadataRepository.java +++ b/source/de/anomic/search/MetadataRepository.java @@ -35,8 +35,11 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.BlockingQueue; import de.anomic.crawler.CrawlStacker; @@ -45,6 +48,7 @@ import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.storage.ConcurrentScoreMap; import net.yacy.cora.storage.ScoreMap; +import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; @@ -55,6 +59,7 @@ import net.yacy.kelondro.index.Index; import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.table.SplitTable; import net.yacy.repository.Blacklist; @@ -116,21 +121,78 @@ public final class MetadataRepository implements Iterable { if (urlIndexFile instanceof Cache) return ((Cache) urlIndexFile).writeBufferSize(); return 0; } - - public URIMetadataRow load(final byte[] urlHash, final WordReferenceVars searchedWord, final long ranking) { - // generates an plasmaLURLEntry using the url hash - // if the url cannot be found, this returns null + + /** + * generates an plasmaLURLEntry using the url hash + * if the url cannot be found, this returns null + * @param obrwi + * @return + */ + public URIMetadataRow load(final WeakPriorityBlockingQueue.Element obrwi) { + if (urlIndexFile == null) return null; + if (obrwi == null) return null; // all time was already wasted in takeRWI to get another element + byte[] urlHash = obrwi.getElement().metadataHash(); if (urlHash == null) return null; - assert urlIndexFile != null : "urlHash = " + UTF8.String(urlHash); + try { + final Row.Entry entry = urlIndexFile.get(urlHash); + if (entry == null) return null; + return new URIMetadataRow(entry, obrwi.getElement(), obrwi.getWeight()); + } catch (final IOException e) { + return null; + } + } + + public URIMetadataRow load(final byte[] urlHash) { if (urlIndexFile == null) return null; + if (urlHash == null) return null; try { final Row.Entry entry = urlIndexFile.get(urlHash); if (entry == null) return null; - return new URIMetadataRow(entry, searchedWord, ranking); + return new URIMetadataRow(entry, null, 0); } catch (final IOException e) { return null; } } + + public void load(final WeakPriorityBlockingQueue obrwis, int maxcount, long maxtime, final BlockingQueue rows) { + if (urlIndexFile == null) return; + if (obrwis == null) return; + final Map> collector = new TreeMap>(Base64Order.enhancedCoder); + final List collectOrder = new ArrayList(); + int count = 0; + long timelimit = System.currentTimeMillis() + maxtime; + WeakPriorityBlockingQueue.Element obrwi; + byte[] urlHash; + while (System.currentTimeMillis() < timelimit && count < maxcount) { + try { + obrwi = obrwis.take(); + } catch (InterruptedException e) { + break; + } + if (obrwi != null) { + urlHash = obrwi.getElement().metadataHash(); + if (urlHash != null) { + collector.put(urlHash, obrwi); + collectOrder.add(urlHash); + count++; + } + } + } + + try { + Map resultmap = urlIndexFile.get(collector.keySet()); + } catch (final IOException e) { + return; + } catch (InterruptedException e) { + return; + } + + for (byte[] hash: collectOrder) { + WeakPriorityBlockingQueue.Element element = collector.get(hash); + if (element == null) continue; + + } + } public void store(final URIMetadataRow entry) throws IOException { // Check if there is a more recent Entry already in the DB @@ -585,7 +647,7 @@ public final class MetadataRepository implements Iterable { TreeSet set = new TreeSet(); for (hashStat hs: map.values()) { if (hs == null) continue; - urlref = this.load(UTF8.getBytes(hs.urlhash), null, 0); + urlref = this.load(UTF8.getBytes(hs.urlhash)); if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue; set.add(urlref.metadata().url().getHost()); count--; @@ -619,7 +681,7 @@ public final class MetadataRepository implements Iterable { while (j.hasNext()) { urlhash = j.next(); if (urlhash == null) continue; - urlref = this.load(UTF8.getBytes(urlhash), null, 0); + urlref = this.load(UTF8.getBytes(urlhash)); if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue; if (statsDump == null) return new ArrayList().iterator(); // some other operation has destroyed the object comps = urlref.metadata(); diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 50efe3a00..4fec5a637 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -396,14 +396,12 @@ public final class RankingProcess extends Thread { // returns from the current RWI list the best URL entry and removes this entry from the list final long timeout = System.currentTimeMillis() + Math.max(10, waitingtime); int p = -1; - byte[] urlhash; long timeleft; while ((timeleft = timeout - System.currentTimeMillis()) > 0) { //System.out.println("timeleft = " + timeleft); final WeakPriorityBlockingQueue.Element obrwi = takeRWI(skipDoubleDom, timeleft); if (obrwi == null) return null; // all time was already wasted in takeRWI to get another element - urlhash = obrwi.getElement().metadataHash(); - final URIMetadataRow page = this.query.getSegment().urlMetadata().load(urlhash, obrwi.getElement(), obrwi.getWeight()); + final URIMetadataRow page = this.query.getSegment().urlMetadata().load(obrwi); if (page == null) { misses.add(obrwi.getElement().metadataHash()); continue; @@ -605,8 +603,8 @@ public final class RankingProcess extends Thread { if (this.hostResolver != null) while (domhashs.hasNext() && result.sizeSmaller(30)) { domhash = domhashs.next(); if (domhash == null) continue; - urlhash = this.hostResolver.get(domhash); - row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(UTF8.getBytes(urlhash), null, 0); + urlhash = this.hostResolver.get(domhash); + row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(UTF8.getBytes(urlhash)); hostname = row == null ? null : row.metadata().url().getHost(); if (hostname != null) { result.set(hostname, this.hostNavigator.get(domhash)); diff --git a/source/de/anomic/search/Segment.java b/source/de/anomic/search/Segment.java index b5bbd7a1e..977b6064a 100644 --- a/source/de/anomic/search/Segment.java +++ b/source/de/anomic/search/Segment.java @@ -410,7 +410,7 @@ public class Segment { if (urlhash == null) return 0; // determine the url string - final URIMetadataRow entry = urlMetadata().load(urlhash, null, 0); + final URIMetadataRow entry = urlMetadata().load(urlhash); if (entry == null) return 0; final URIMetadataRow.Components metadata = entry.metadata(); if (metadata == null || metadata.url() == null) return 0; @@ -481,7 +481,7 @@ public class Segment { entry = new WordReferenceVars(containerIterator.next()); // System.out.println("Wordhash: "+wordHash+" UrlHash: // "+entry.getUrlHash()); - final URIMetadataRow ue = urlMetadata.load(entry.metadataHash(), entry, 0); + final URIMetadataRow ue = urlMetadata.load(entry.metadataHash()); if (ue == null) { urlHashs.put(entry.metadataHash()); } else { diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index ffd444c6c..f0713b5a8 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -1131,7 +1131,7 @@ public final class Switchboard extends serverSwitch { public DigestURI getURL(final Segments.Process process, final byte[] urlhash) { if (urlhash == null) return null; if (urlhash.length == 0) return null; - final URIMetadataRow le = indexSegments.urlMetadata(process).load(urlhash, null, 0); + final URIMetadataRow le = indexSegments.urlMetadata(process).load(urlhash); if (le != null) { Components metadata = le.metadata(); if (metadata == null) return null; @@ -1608,7 +1608,7 @@ public final class Switchboard extends serverSwitch { Log.logException(e); continue; } - final Map callResult = this.tables.execAPICalls("localhost", (int) this.getConfigLong("port", 8090), this.getConfig("adminAccountBase64MD5", ""), pks); + final Map callResult = this.tables.execAPICalls("localhost", (int) this.getConfigLong("port", 8090), this.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), pks); for (final Map.Entry call: callResult.entrySet()) { log.logInfo("Scheduler executed api call, response " + call.getValue() + ": " + call.getKey()); } diff --git a/source/de/anomic/yacy/dht/Transmission.java b/source/de/anomic/yacy/dht/Transmission.java index c77bf2c2d..388896747 100644 --- a/source/de/anomic/yacy/dht/Transmission.java +++ b/source/de/anomic/yacy/dht/Transmission.java @@ -133,7 +133,7 @@ public class Transmission { notFoundx.add(e.metadataHash()); continue; } - URIMetadataRow r = segment.urlMetadata().load(e.metadataHash(), null, 0); + URIMetadataRow r = segment.urlMetadata().load(e.metadataHash()); if (r == null) { notFoundx.add(e.metadataHash()); badReferences.put(e.metadataHash()); diff --git a/source/net/yacy/kelondro/index/BufferedObjectIndex.java b/source/net/yacy/kelondro/index/BufferedObjectIndex.java index 0088c1f4e..5c2ea04d7 100644 --- a/source/net/yacy/kelondro/index/BufferedObjectIndex.java +++ b/source/net/yacy/kelondro/index/BufferedObjectIndex.java @@ -26,8 +26,11 @@ package net.yacy.kelondro.index; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.TreeMap; import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.logging.Log; @@ -137,6 +140,16 @@ public class BufferedObjectIndex implements Index, Iterable { } } + public Map get(Collection keys) throws IOException, InterruptedException { + final Map map = new TreeMap(this.row().objectOrder); + Row.Entry entry; + for (byte[] key: keys) { + entry = get(key); + if (entry != null) map.put(key, entry); + } + return map; + } + public boolean has(byte[] key) { synchronized (this.backend) { return this.buffer.has(key) || this.backend.has(key); diff --git a/source/net/yacy/kelondro/index/Cache.java b/source/net/yacy/kelondro/index/Cache.java index 44f15a957..88e95efb5 100644 --- a/source/net/yacy/kelondro/index/Cache.java +++ b/source/net/yacy/kelondro/index/Cache.java @@ -25,13 +25,13 @@ package net.yacy.kelondro.index; import java.io.IOException; +import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; - import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.util.MemoryControl; @@ -288,6 +288,16 @@ public final class Cache implements Index, Iterable { return entry; } + public Map get(Collection keys) throws IOException, InterruptedException { + final Map map = new TreeMap(this.row().objectOrder); + Row.Entry entry; + for (byte[] key: keys) { + entry = get(key); + if (entry != null) map.put(key, entry); + } + return map; + } + public final synchronized boolean put(final Row.Entry row) throws IOException, RowSpaceExceededException { assert (row != null); assert (row.columns() == row().columns()); diff --git a/source/net/yacy/kelondro/index/Index.java b/source/net/yacy/kelondro/index/Index.java index 85fcf6283..6604a5d18 100644 --- a/source/net/yacy/kelondro/index/Index.java +++ b/source/net/yacy/kelondro/index/Index.java @@ -25,8 +25,10 @@ package net.yacy.kelondro.index; import java.io.IOException; +import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.Map; import net.yacy.kelondro.order.CloneableIterator; @@ -41,6 +43,7 @@ public interface Index extends Iterable { public byte[] smallestKey(); public byte[] largestKey(); public boolean has(byte[] key); // use this only if there is no get in case that has returns true + public Map get(final Collection keys) throws IOException, InterruptedException; public Row.Entry get(byte[] key) throws IOException; public Row.Entry replace(Row.Entry row) throws RowSpaceExceededException, IOException; diff --git a/source/net/yacy/kelondro/index/RAMIndex.java b/source/net/yacy/kelondro/index/RAMIndex.java index 855f11efc..635445782 100644 --- a/source/net/yacy/kelondro/index/RAMIndex.java +++ b/source/net/yacy/kelondro/index/RAMIndex.java @@ -26,11 +26,11 @@ package net.yacy.kelondro.index; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; - import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.order.MergeIterator; @@ -137,7 +137,17 @@ public final class RAMIndex implements Index, Iterable { return index1.get(key); } - public final synchronized boolean has(final byte[] key) { + public Map get(Collection keys) throws IOException, InterruptedException { + final Map map = new TreeMap(this.row().objectOrder); + Row.Entry entry; + for (byte[] key: keys) { + entry = get(key); + if (entry != null) map.put(key, entry); + } + return map; + } + + public final synchronized boolean has(final byte[] key) { assert (key != null); finishInitialization(); assert index0.isSorted(); diff --git a/source/net/yacy/kelondro/index/RAMIndexCluster.java b/source/net/yacy/kelondro/index/RAMIndexCluster.java index 76c50f115..2bc997c21 100644 --- a/source/net/yacy/kelondro/index/RAMIndexCluster.java +++ b/source/net/yacy/kelondro/index/RAMIndexCluster.java @@ -29,7 +29,8 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; - +import java.util.Map; +import java.util.TreeMap; import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.CloneableIterator; @@ -154,6 +155,16 @@ public final class RAMIndexCluster implements Index, Iterable, Clonea return r.get(key); } + public Map get(Collection keys) throws IOException, InterruptedException { + final Map map = new TreeMap(this.row().objectOrder); + Row.Entry entry; + for (byte[] key: keys) { + entry = get(key); + if (entry != null) map.put(key, entry); + } + return map; + } + public final boolean has(final byte[] key) { final int i = indexFor(key); if (i < 0) return false; diff --git a/source/net/yacy/kelondro/index/RowSet.java b/source/net/yacy/kelondro/index/RowSet.java index 42ee64a2d..227aa826c 100644 --- a/source/net/yacy/kelondro/index/RowSet.java +++ b/source/net/yacy/kelondro/index/RowSet.java @@ -25,9 +25,11 @@ package net.yacy.kelondro.index; import java.io.IOException; +import java.util.Collection; import java.util.Iterator; +import java.util.Map; import java.util.Random; - +import java.util.TreeMap; import net.yacy.cora.document.UTF8; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; @@ -130,6 +132,16 @@ public class RowSet extends RowCollection implements Index, Iterable if (index < 0) return null; return get(index, true); } + + public Map get(Collection keys) throws IOException, InterruptedException { + final Map map = new TreeMap(this.row().objectOrder); + Row.Entry entry; + for (byte[] key: keys) { + entry = get(key); + if (entry != null) map.put(key, entry); + } + return map; + } /** * Adds the row to the index. The row is identified by the primary key of the row. diff --git a/source/net/yacy/kelondro/table/SQLTable.java b/source/net/yacy/kelondro/table/SQLTable.java index fc9257324..dc1ef405e 100644 --- a/source/net/yacy/kelondro/table/SQLTable.java +++ b/source/net/yacy/kelondro/table/SQLTable.java @@ -33,10 +33,12 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import java.util.Iterator; import java.util.List; - +import java.util.Map; +import java.util.TreeMap; import net.yacy.cora.document.UTF8; import net.yacy.kelondro.index.Index; import net.yacy.kelondro.index.Row; @@ -195,6 +197,16 @@ public class SQLTable implements Index, Iterable { } } + public Map get(Collection keys) throws IOException, InterruptedException { + final Map map = new TreeMap(this.row().objectOrder); + Row.Entry entry; + for (byte[] key: keys) { + entry = get(key); + if (entry != null) map.put(key, entry); + } + return map; + } + public Row.Entry replace(final Row.Entry row) throws IOException { try { final Row.Entry oldEntry = remove(row.getPrimaryKeyBytes()); diff --git a/source/net/yacy/kelondro/table/SplitTable.java b/source/net/yacy/kelondro/table/SplitTable.java index da6e2524f..d80152467 100644 --- a/source/net/yacy/kelondro/table/SplitTable.java +++ b/source/net/yacy/kelondro/table/SplitTable.java @@ -30,12 +30,14 @@ import java.io.File; import java.io.IOException; import java.text.ParseException; import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.TreeMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; @@ -305,6 +307,16 @@ public class SplitTable implements Index, Iterable { if (keeper == null) return null; return keeper.get(key); } + + public Map get(Collection keys) throws IOException, InterruptedException { + final Map map = new TreeMap(this.row().objectOrder); + Row.Entry entry; + for (byte[] key: keys) { + entry = get(key); + if (entry != null) map.put(key, entry); + } + return map; + } private Index newTable() { this.current = newFilename(); diff --git a/source/net/yacy/kelondro/table/Table.java b/source/net/yacy/kelondro/table/Table.java index 966ab5c7b..15dcac492 100644 --- a/source/net/yacy/kelondro/table/Table.java +++ b/source/net/yacy/kelondro/table/Table.java @@ -29,6 +29,7 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.ConcurrentModificationException; import java.util.Date; import java.util.HashMap; @@ -37,7 +38,6 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.TreeSet; - import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.HandleMap; import net.yacy.kelondro.index.Index; @@ -478,6 +478,16 @@ public class Table implements Index, Iterable { return rowdef.newEntry(b); } + public Map get(Collection keys) throws IOException, InterruptedException { + final Map map = new TreeMap(this.row().objectOrder); + Row.Entry entry; + for (byte[] key: keys) { + entry = get(key); + if (entry != null) map.put(key, entry); + } + return map; + } + public boolean has(final byte[] key) { if (index == null) return false; return index.has(key); diff --git a/source/net/yacy/migration.java b/source/net/yacy/migration.java index 301b20d32..b966e03aa 100644 --- a/source/net/yacy/migration.java +++ b/source/net/yacy/migration.java @@ -215,9 +215,9 @@ public class migration { sb.setConfig("proxyAccountBase64MD5", Digest.encodeMD5Hex(acc)); sb.setConfig("proxyAccountBase64", ""); } - if ((acc = sb.getConfig("adminAccountBase64", "")).length() > 0) { + if ((acc = sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "")).length() > 0) { sb.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(acc)); - sb.setConfig("adminAccountBase64", ""); + sb.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""); } if ((acc = sb.getConfig("uploadAccountBase64", "")).length() > 0) { sb.setConfig("uploadAccountBase64MD5", Digest.encodeMD5Hex(acc)); diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index c630d61a7..b7315cad8 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -670,7 +670,7 @@ public final class yacy { iEntry = wordIdxEntries.next(); final byte[] urlHash = iEntry.metadataHash(); if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try { - final URIMetadataRow urlEntry = currentUrlDB.load(urlHash, null, 0); + final URIMetadataRow urlEntry = currentUrlDB.load(urlHash); urlCounter++; minimizedUrlDB.store(urlEntry); if (urlCounter % 500 == 0) {