diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index 43ed0ec1a..b1b962571 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -50,7 +50,6 @@ import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; import net.yacy.peers.NewsPool; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.data.BookmarkHelper; import de.anomic.data.BookmarksDB; import de.anomic.data.BookmarksDB.Bookmark; @@ -195,7 +194,7 @@ public class Bookmarks { final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash); if (bookmark == null) { // try to get the bookmark from the LURL database - final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash)); + final URIMetadataRow urlentry = sb.index.urlMetadata().load(ASCII.getBytes(urlHash)); if (urlentry != null) try { final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE)); prop.put("mode_edit", "0"); // create mode diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 39c901113..de66ff41e 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -39,7 +39,6 @@ import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; import net.yacy.peers.Seed; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs.EventOrigin; import de.anomic.crawler.ResultURLs.InitExecEntry; @@ -117,7 +116,7 @@ public class CrawlResults { final String hash = post.get("hash", null); if (hash != null) { // delete from database - sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).remove(hash.getBytes()); + sb.index.urlMetadata().remove(hash.getBytes()); } } @@ -127,7 +126,7 @@ public class CrawlResults { if (hashpart != null) { // delete all urls for this domain from database try { - sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).deleteDomain(hashpart); + sb.index.urlMetadata().deleteDomain(hashpart); ResultURLs.deleteDomain(tabletype, domain, hashpart); } catch (final IOException e) { Log.logException(e); @@ -187,7 +186,7 @@ public class CrawlResults { while (i.hasNext()) { entry = i.next(); try { - urle = sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(UTF8.getBytes(entry.getKey())); + urle = sb.index.urlMetadata().load(UTF8.getBytes(entry.getKey())); if (urle == null) { Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey()); urlstr = null; diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index ceacd2286..6b43fd250 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -53,7 +53,6 @@ import net.yacy.peers.NewsPool; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.SitemapImporter; import de.anomic.crawler.ZURL.FailCategory; @@ -95,16 +94,7 @@ public class Crawler_p { prop.put("forwardToCrawlStart", "0"); // get segment - Segment indexSegment = null; - if (post != null && post.containsKey("segment")) { - final String segmentName = post.get("segment"); - if (sb.indexSegments.segmentExist(segmentName)) { - indexSegment = sb.indexSegments.segment(segmentName); - } - } else { - // take default segment - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } + Segment indexSegment = sb.index; prop.put("info", "0"); diff --git a/htroot/IndexCleaner_p.java b/htroot/IndexCleaner_p.java index a7e0e990a..c8f7db167 100644 --- a/htroot/IndexCleaner_p.java +++ b/htroot/IndexCleaner_p.java @@ -29,7 +29,6 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.search.Switchboard; import net.yacy.search.index.MetadataRepository; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -41,19 +40,10 @@ public class IndexCleaner_p { final serverObjects prop = new serverObjects(); final Switchboard sb = (Switchboard) env; prop.put("title", "DbCleanup_p"); - + // get segment - Segment indexSegment = null; - if (post != null && post.containsKey("segment")) { - String segmentName = post.get("segment"); - if (sb.indexSegments.segmentExist(segmentName)) { - indexSegment = sb.indexSegments.segment(segmentName); - } - } else { - // take default segment - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } - + Segment indexSegment = sb.index; + if (post!=null) { if (post.get("action").equals("ustart")) { if (urldbCleanerThread==null || !urldbCleanerThread.isAlive()) { diff --git a/htroot/IndexControlRWIs_p.html b/htroot/IndexControlRWIs_p.html index 1dc3760e3..448b3d2ee 100644 --- a/htroot/IndexControlRWIs_p.html +++ b/htroot/IndexControlRWIs_p.html @@ -11,16 +11,7 @@

The local index currently contains #[wcount]# reverse word indexes

RWI Retrieval (= search for a single word) -
-
Select Segment:
-
- -
- +
Retrieve by Word:
diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index aa1997815..54259b1e9 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -61,9 +61,7 @@ import net.yacy.peers.Seed; import net.yacy.peers.dht.PeerSelection; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; -import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import net.yacy.search.query.QueryParams; import net.yacy.search.query.RWIProcess; import net.yacy.search.query.SearchEventCache; @@ -92,19 +90,9 @@ public class IndexControlRWIs_p prop.put("keyhash", ""); prop.put("result", ""); prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0); - prop.put("cleanup_solr", sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null + prop.put("cleanup_solr", sb.index.getRemoteSolr() == null || !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1); - String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); - int i = 0; - for ( final String s : sb.indexSegments.segmentNames() ) { - prop.put("segments_" + i + "_name", s); - prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0); - i++; - } - Segment segment = sb.indexSegments.segment(segmentName); - prop.put("segments", i); - // switch off all optional forms/lists prop.put("searchresult", 0); prop.put("keyhashsimilar", 0); @@ -113,18 +101,9 @@ public class IndexControlRWIs_p // clean up all search events SearchEventCache.cleanupEvents(true); - if ( post != null ) { - // default values - segmentName = post.get("segment", segmentName).trim(); - i = 0; - for ( final String s : sb.indexSegments.segmentNames() ) { - prop.put("segments_" + i + "_name", s); - prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0); - i++; - } - prop.put("segments", i); - segment = sb.indexSegments.segment(segmentName); + Segment segment = sb.index; + if ( post != null ) { final String keystring = post.get("keystring", "").trim(); byte[] keyhash = post.get("keyhash", "").trim().getBytes(); if (keystring.length() > 0) { @@ -180,7 +159,7 @@ public class IndexControlRWIs_p if ( post.get("deleteSolr", "").equals("on") && sb.getConfigBool("federated.service.solr.indexing.enabled", false) ) { try { - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().clear(); + sb.index.getRemoteSolr().clear(); } catch ( final Exception e ) { Log.logException(e); } @@ -390,8 +369,8 @@ public class IndexControlRWIs_p final Iterator> containerIt = segment.termIndex().referenceContainer(keyhash, true, false, 256, false).iterator(); ReferenceContainer container; - i = 0; - int rows = 0, cols = 0; + + int i = 0, rows = 0, cols = 0; prop.put("keyhashsimilar", "1"); while ( containerIt.hasNext() && i < 256 ) { container = containerIt.next(); diff --git a/htroot/IndexControlURLs_p.html b/htroot/IndexControlURLs_p.html index 8f9a6f291..18b5a2113 100644 --- a/htroot/IndexControlURLs_p.html +++ b/htroot/IndexControlURLs_p.html @@ -67,14 +67,6 @@ function updatepage(str) {
URL Retrieval
-
Select Segment:
-
- -
Retrieve by URL:
diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java index eb419cb45..5b4c4fc03 100644 --- a/htroot/IndexControlURLs_p.java +++ b/htroot/IndexControlURLs_p.java @@ -41,7 +41,6 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.RotateIterator; import net.yacy.search.Switchboard; -import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.MetadataRepository; import net.yacy.search.index.Segment; import de.anomic.server.serverObjects; @@ -55,19 +54,12 @@ public class IndexControlURLs_p { final serverObjects prop = new serverObjects(); + Segment segment = sb.index; + // set default values prop.put("urlstring", ""); prop.put("urlhash", ""); prop.put("result", ""); - String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); - int i = 0; - for (final String s: sb.indexSegments.segmentNames()) { - prop.put("segments_" + i + "_name", s); - prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0); - i++; - } - Segment segment = sb.indexSegments.segment(segmentName); - prop.put("segments", i); prop.putNum("ucount", segment.urlMetadata().size()); prop.put("otherHosts", ""); prop.put("genUrlProfile", 0); @@ -76,20 +68,6 @@ public class IndexControlURLs_p { prop.put("statisticslines", 0); prop.put("reload", 0); - // do segment selection - if (post != null && post.containsKey("segment")) { - // default values - segmentName = post.get("segment", segmentName).trim(); - i= 0; - for (final String s: sb.indexSegments.segmentNames()) { - prop.put("segments_" + i + "_name", s); - prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0); - i++; - } - prop.put("segments", i); - segment = sb.indexSegments.segment(segmentName); - } - // show export messages final MetadataRepository.Export export = segment.urlMetadata().export(); if ((export != null) && (export.isAlive())) { @@ -147,7 +125,7 @@ public class IndexControlURLs_p { prop.put("result", " "); if (post.containsKey("urlhashdeleteall")) { - i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST); + int i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST); prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes."); prop.put("lurlexport", 0); prop.put("reload", 0); @@ -224,8 +202,7 @@ public class IndexControlURLs_p { final Iterator entryIt = new RotateIterator(segment.urlMetadata().entries(true, urlhash), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), segment.termIndex().sizesMax()); final StringBuilder result = new StringBuilder("Sequential List of URL-Hashes:
"); URIMetadataRow entry; - i = 0; - int rows = 0, cols = 0; + int i = 0, rows = 0, cols = 0; prop.put("urlhashsimilar", "1"); while (entryIt.hasNext() && i < 256) { entry = entryIt.next(); diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java index 514f2073a..b64a5390a 100644 --- a/htroot/IndexFederated_p.java +++ b/htroot/IndexFederated_p.java @@ -37,7 +37,6 @@ import net.yacy.cora.services.federated.solr.SolrConnector; import net.yacy.cora.storage.ConfigurationSet; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import net.yacy.search.index.SolrField; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -86,8 +85,8 @@ public class IndexFederated_p { if (solrWasOn) { // switch off - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().close(); - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null); + sb.index.getRemoteSolr().close(); + sb.index.connectRemoteSolr(null); } if (solrIsOnAfterwards) { @@ -97,13 +96,13 @@ public class IndexFederated_p { if (usesolr) { SolrConnector solr = new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true); solr.setCommitWithinMs(commitWithinMs); - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(solr); + sb.index.connectRemoteSolr(solr); } else { - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null); + sb.index.connectRemoteSolr(null); } } catch (final IOException e) { Log.logException(e); - sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null); + sb.index.connectRemoteSolr(null); } } @@ -138,11 +137,11 @@ public class IndexFederated_p { } // show solr host table - if (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null) { + if (sb.index.getRemoteSolr() == null) { prop.put("table", 0); } else { prop.put("table", 1); - final SolrConnector solr = sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr(); + final SolrConnector solr = sb.index.getRemoteSolr(); final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((SingleSolrConnector) solr).getSize()}; final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SingleSolrConnector) solr).getAdminInterface()}; boolean dark = false; diff --git a/htroot/IndexShare_p.java b/htroot/IndexShare_p.java index bcbb94774..d5865a81c 100644 --- a/htroot/IndexShare_p.java +++ b/htroot/IndexShare_p.java @@ -1,4 +1,4 @@ -// IndexShare_p.java +// IndexShare_p.java // ----------------------- // part of the AnomicHTTPD caching proxy // (C) by Michael Peter Christen; mc@yacy.net @@ -32,7 +32,6 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -44,17 +43,8 @@ public class IndexShare_p { final serverObjects prop = new serverObjects(); // get segment - Segment indexSegment = null; - if (post != null && post.containsKey("segment")) { - String segmentName = post.get("segment"); - if (sb.indexSegments.segmentExist(segmentName)) { - indexSegment = sb.indexSegments.segment(segmentName); - } - } else { - // take default segment - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } - + Segment indexSegment = sb.index; + if (post == null) { prop.put("linkfreq", sb.getConfigLong("defaultLinkReceiveFrequency",30)); prop.put("wordfreq", sb.getConfigLong("defaultWordReceiveFrequency",10)); @@ -64,7 +54,7 @@ public class IndexShare_p { prop.putNum("ucount", indexSegment.urlMetadata().size()); return prop; // be save } - + if (post.containsKey("indexsharesetting")) { sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW, post.containsKey("distribute")); sb.setConfig("allowReceiveIndex", post.containsKey("receive")); @@ -75,7 +65,7 @@ public class IndexShare_p { // insert constants prop.putNum("wcount", indexSegment.termIndex().sizesMax()); prop.putNum("ucount", indexSegment.urlMetadata().size()); - + // return rewrite properties return prop; } diff --git a/htroot/Load_RSS_p.java b/htroot/Load_RSS_p.java index 3209f8058..0137cbaf7 100644 --- a/htroot/Load_RSS_p.java +++ b/htroot/Load_RSS_p.java @@ -42,7 +42,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.RSSLoader; import de.anomic.crawler.retrieval.Response; import de.anomic.data.WorkTables; @@ -191,7 +190,7 @@ public class Load_RSS_p { messageurl = row.get("url", ""); if (messageurl.length() == 0) continue; // get referrer - final DigestURI referrer = sb.getURL(Segments.Process.LOCALCRAWLING, row.get("referrer", "").getBytes()); + final DigestURI referrer = sb.getURL(row.get("referrer", "").getBytes()); // check if feed is registered in scheduler final byte[] api_pk = row.get("api_pk"); final Row r = api_pk == null ? null : sb.tables.select("api", api_pk); @@ -271,7 +270,7 @@ public class Load_RSS_p { final RSSMessage message = feed.getMessage(entry.getValue().substring(5)); final DigestURI messageurl = new DigestURI(message.getLink()); if (RSSLoader.indexTriggered.containsKey(messageurl.hash())) continue loop; - if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop; + if (sb.urlExists(messageurl.hash()) != null) continue loop; sb.addToIndex(messageurl, null, null); RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date()); } catch (final IOException e) { @@ -316,7 +315,7 @@ public class Load_RSS_p { author = item.getAuthor(); if (author == null) author = item.getCopyright(); pubDate = item.getPubDate(); - prop.put("showitems_item_" + i + "_state", sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0); + prop.put("showitems_item_" + i + "_state", sb.urlExists(messageurl.hash()) != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0); prop.put("showitems_item_" + i + "_state_count", i); prop.putHTML("showitems_item_" + i + "_state_guid", item.getGuid()); prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author); diff --git a/htroot/PerformanceGraph.java b/htroot/PerformanceGraph.java index 9b8ae6a27..ffcf57903 100644 --- a/htroot/PerformanceGraph.java +++ b/htroot/PerformanceGraph.java @@ -42,7 +42,7 @@ public class PerformanceGraph { final int height = post.getInt("height", 240); final boolean showMemory = !post.containsKey("nomem"); - return ProfilingGraph.performanceGraph(width, height, sb.indexSegments.URLCount() + " URLS / " + sb.indexSegments.RWICount() + " WORDS IN INDEX / " + sb.indexSegments.RWIBufferCount() + " WORDS IN CACHE", showMemory); + return ProfilingGraph.performanceGraph(width, height, sb.index.URLCount() + " URLS / " + sb.index.RWICount() + " WORDS IN INDEX / " + sb.index.RWIBufferCount() + " WORDS IN CACHE", showMemory); } } \ No newline at end of file diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index ce90ff105..de414a3a4 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -40,7 +40,6 @@ import net.yacy.kelondro.workflow.WorkflowThread; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -63,16 +62,7 @@ public class PerformanceQueues_p { File defaultSettingsFile = new File(sb.getAppPath(), "defaults/yacy.init"); // get segment - Segment indexSegment = null; - if (post != null && post.containsKey("segment")) { - String segmentName = post.get("segment"); - if (sb.indexSegments.segmentExist(segmentName)) { - indexSegment = sb.indexSegments.segment(segmentName); - } - } else { - // take default segment - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } + Segment indexSegment = sb.index; if(post != null) { if(post.containsKey("defaultFile")){ diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java index e1105dcda..bdbfb53af 100644 --- a/htroot/QuickCrawlLink_p.java +++ b/htroot/QuickCrawlLink_p.java @@ -41,7 +41,6 @@ import net.yacy.cora.util.NumberTools; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.retrieval.Request; import de.anomic.server.serverObjects; @@ -63,16 +62,7 @@ public class QuickCrawlLink_p { final Switchboard sb = (Switchboard) env; // get segment - Segment indexSegment = null; - if (post != null && post.containsKey("segment")) { - final String segmentName = post.get("segment"); - if (sb.indexSegments.segmentExist(segmentName)) { - indexSegment = sb.indexSegments.segment(segmentName); - } - } else { - // take default segment - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } + Segment indexSegment = sb.index; if (post == null) { // send back usage example diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index c86da1110..571d01fa4 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -54,7 +54,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import com.hp.hpl.jena.rdf.model.Model; @@ -93,13 +92,8 @@ public class ViewFile { } // get segment - Segment indexSegment = null; + Segment indexSegment = sb.index; final boolean authorized = sb.verifyAuthentication(header); - if (post != null && post.containsKey("segment") && authorized) { - indexSegment = sb.indexSegments.segment(post.get("segment")); - } else { - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } if (post.containsKey("words")) prop.putHTML("error_words", post.get("words")); diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index 68064fb1b..cbcafa02a 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -38,7 +38,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; -import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -70,8 +69,7 @@ public class Vocabulary_p { boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted"); boolean discoverFromAuthor = post.get("discovermethod", "").equals("author"); if (discoveruri != null) { - String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); - Segment segment = sb.indexSegments.segment(segmentName); + Segment segment = sb.index; Iterator ui = segment.urlSelector(discoveruri); String t; while (ui.hasNext()) { diff --git a/htroot/YBRFetch_p.java b/htroot/YBRFetch_p.java index 3dc7625b3..2b85c004a 100644 --- a/htroot/YBRFetch_p.java +++ b/htroot/YBRFetch_p.java @@ -9,7 +9,6 @@ import net.yacy.kelondro.rwi.ReferenceContainerCache; import net.yacy.kelondro.util.MemoryControl; import net.yacy.peers.graphics.WebStructureGraph.HostReference; import net.yacy.search.Switchboard; -import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.MetadataRepository; import net.yacy.search.index.MetadataRepository.HostStat; import net.yacy.search.index.Segment; @@ -42,8 +41,7 @@ public class YBRFetch_p } // use an index segment to find hosts for given host hashes - final String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); - final Segment segment = sb.indexSegments.segment(segmentName); + final Segment segment = sb.index; final MetadataRepository metadata = segment.urlMetadata(); Map hostHashResolver; try { diff --git a/htroot/api/status_p.java b/htroot/api/status_p.java index 83ba95e41..1cbd6d1c7 100644 --- a/htroot/api/status_p.java +++ b/htroot/api/status_p.java @@ -32,7 +32,6 @@ import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -45,13 +44,9 @@ public class status_p { // return variable that accumulates replacements final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); - Segment segment = null; final boolean html = post != null && post.containsKey("html"); prop.setLocalized(html); - if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) { - segment = sb.indexSegments.segment(post.get("segment")); - } - if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC); + Segment segment = sb.index; prop.put("rejected", "0"); sb.updateMySeed(); diff --git a/htroot/api/termlist_p.java b/htroot/api/termlist_p.java index 4c3259535..4d7eeeca4 100644 --- a/htroot/api/termlist_p.java +++ b/htroot/api/termlist_p.java @@ -32,7 +32,6 @@ import net.yacy.kelondro.index.Row; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -43,13 +42,9 @@ public class termlist_p { final Log log = new Log("TERMLIST"); final serverObjects prop = new serverObjects(); final Switchboard sb = (Switchboard) env; - Segment segment = null; + Segment segment = sb.index; final boolean delete = post != null && post.containsKey("delete"); final long mincount = post == null ? 10000 : post.getLong("mincount", 10000); - if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) { - segment = sb.indexSegments.segment(post.get("segment")); - } - if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC); final Iterator> i = segment.termIndex().referenceCountIterator(null, false, false); Rating e; int c = 0, termnumber = 0; diff --git a/htroot/api/timeline.java b/htroot/api/timeline.java index bb269dcbb..2559e6019 100644 --- a/htroot/api/timeline.java +++ b/htroot/api/timeline.java @@ -41,7 +41,6 @@ import net.yacy.kelondro.util.ISO639; import net.yacy.peers.Network; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import net.yacy.search.query.QueryParams; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -56,12 +55,7 @@ public final class timeline { if ((post == null) || (env == null)) return prop; final boolean authenticated = sb.adminAuthenticated(header) >= 2; - Segment segment = null; - if (post.containsKey("segment") && authenticated) { - segment = sb.indexSegments.segment(post.get("segment")); - } else { - segment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } + Segment segment = sb.index; final String querystring = post.get("query", ""); // a string of word hashes that shall be searched and combined final int count = Math.min((authenticated) ? 1000 : 10, post.getInt("maximumRecords", 1000)); // SRU syntax diff --git a/htroot/api/webstructure.java b/htroot/api/webstructure.java index 644a2b013..de835ea95 100644 --- a/htroot/api/webstructure.java +++ b/htroot/api/webstructure.java @@ -41,7 +41,6 @@ import net.yacy.kelondro.rwi.IndexCell; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.peers.graphics.WebStructureGraph; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -65,7 +64,7 @@ public class webstructure { } else if (about.length() == 12 && Base64Order.enhancedCoder.wellformed(ASCII.getBytes(about))) { urlhash = ASCII.getBytes(about); hosthash = about.substring(6); - url = authenticated ? sb.getURL(Segments.Process.PUBLIC, urlhash) : null; + url = authenticated ? sb.getURL(urlhash) : null; } else if (authenticated && about.length() > 0) { // consider "about" as url or hostname try { @@ -138,7 +137,7 @@ public class webstructure { // citations prop.put("citations", 1); - IndexCell citationReferences = sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation(); + IndexCell citationReferences = sb.index.urlCitation(); ReferenceContainer citations = null; // citationReferences.count(urlhash) would give to the number of references good for ranking try { @@ -158,7 +157,7 @@ public class webstructure { while (i.hasNext()) { CitationReference cr = i.next(); byte[] refhash = cr.urlhash(); - DigestURI refurl = authenticated ? sb.getURL(Segments.Process.PUBLIC, refhash) : null; + DigestURI refurl = authenticated ? sb.getURL(refhash) : null; prop.put("citations_documents_0_anchors_" + d + "_urle", refurl == null ? 0 : 1); if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true, false)); prop.put("citations_documents_0_anchors_" + d + "_urle_hash", refhash); diff --git a/htroot/api/yacydoc.java b/htroot/api/yacydoc.java index 2ddfcdfd5..3aa549fe9 100644 --- a/htroot/api/yacydoc.java +++ b/htroot/api/yacydoc.java @@ -40,7 +40,6 @@ import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -55,15 +54,9 @@ public class yacydoc { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); - final Segment segment; + final Segment segment = sb.index; final boolean html = post != null && post.containsKey("html"); prop.setLocalized(html); - final boolean authorized = sb.verifyAuthentication(header); - if (post != null && post.containsKey("segment") && authorized) { - segment = sb.indexSegments.segment(post.get("segment")); - } else { - segment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } prop.put("dc_title", ""); prop.put("dc_creator", ""); @@ -131,7 +124,7 @@ public class yacydoc { prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true)); prop.put("yacy_size", entry.size()); prop.put("yacy_words", entry.wordCount()); - prop.put("yacy_citations", sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation().count(entry.hash())); + prop.put("yacy_citations", sb.index.urlCitation().count(entry.hash())); prop.put("yacy_inbound", entry.llocal()); prop.put("yacy_outbound", entry.lother()); @@ -140,18 +133,18 @@ public class yacydoc { String rdf = JenaTripleStore.getRDFByModel(model); prop.putXML("triples", rdf); prop.put("rdf", header.fileType() == FileType.XML ? rdf : ""); - - + + String references = ""; Iterator t = JenaTripleStore.getObjects("http://yacy.net/url#"+urlhash, "http://purl.org/dc/terms/references"); - + while (t.hasNext()) { RDFNode r = t.next(); references += r.toString()+","; } - + Log.logInfo ("TRIPLESTORE", references); - + prop.put("taglinks", references); // return rewrite properties diff --git a/htroot/api/ymarks/add_ymark.java b/htroot/api/ymarks/add_ymark.java index 31cf093d3..a83fc67c8 100644 --- a/htroot/api/ymarks/add_ymark.java +++ b/htroot/api/ymarks/add_ymark.java @@ -6,7 +6,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.data.UserDB; import de.anomic.data.ymark.YMarkEntry; import de.anomic.data.ymark.YMarkTables; @@ -35,7 +34,7 @@ public class add_ymark { if(post.containsKey("urlHash")) { final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING); - final DigestURI url = sb.indexSegments.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash.getBytes()).url(); + final DigestURI url = sb.index.urlMetadata().load(urlHash.getBytes()).url(); final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt()); final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING); try { @@ -66,7 +65,7 @@ public class add_ymark { } final YMarkEntry bmk = new YMarkEntry(); - + bmk.put(YMarkEntry.BOOKMARK.URL.key(), url); bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), post.get(YMarkEntry.BOOKMARK.TITLE.key(),YMarkEntry.BOOKMARK.TITLE.deflt())); bmk.put(YMarkEntry.BOOKMARK.DESC.key(), post.get(YMarkEntry.BOOKMARK.DESC.key(),YMarkEntry.BOOKMARK.DESC.deflt())); diff --git a/htroot/api/ymarks/get_metadata.java b/htroot/api/ymarks/get_metadata.java index f8641e480..a827a3efb 100644 --- a/htroot/api/ymarks/get_metadata.java +++ b/htroot/api/ymarks/get_metadata.java @@ -47,7 +47,7 @@ public class get_metadata { } try { - final YMarkMetadata meta = new YMarkMetadata(new DigestURI(url), sb.indexSegments); + final YMarkMetadata meta = new YMarkMetadata(new DigestURI(url), sb.index); final Document document = meta.loadDocument(sb.loader); final EnumMap metadata = meta.loadMetadata(); diff --git a/htroot/api/ymarks/get_treeview.java b/htroot/api/ymarks/get_treeview.java index f7dd8e134..9295c9314 100644 --- a/htroot/api/ymarks/get_treeview.java +++ b/htroot/api/ymarks/get_treeview.java @@ -215,7 +215,7 @@ public class get_treeview { } } else if (isAutoTagger || isMetadata || isURLdb || isCrawlStart) { try { - final YMarkMetadata meta = new YMarkMetadata(new DigestURI(post.get(ROOT).substring(2)), sb.indexSegments); + final YMarkMetadata meta = new YMarkMetadata(new DigestURI(post.get(ROOT).substring(2)), sb.index); final Document document = meta.loadDocument(sb.loader); final TreeMap tags = sb.tables.bookmarks.getTags(bmk_user); if(isAutoTagger) { diff --git a/htroot/suggest.java b/htroot/suggest.java index cb685796e..84c1af6aa 100644 --- a/htroot/suggest.java +++ b/htroot/suggest.java @@ -30,7 +30,6 @@ import net.yacy.cora.protocol.ResponseHeader; import net.yacy.kelondro.data.word.Word; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import de.anomic.data.DidYouMean; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -69,19 +68,7 @@ public class suggest { final int count = (post == null) ? 20 : post.getInt("count", 20); // get segment - final Segment indexSegment; - if (post != null && post.containsKey("segment")) { - final String segmentName = post.get("segment"); - if (sb.indexSegments.segmentExist(segmentName)) { - indexSegment = sb.indexSegments.segment(segmentName); - } else { - // take default segment - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } - } else { - // take default segment - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } + final Segment indexSegment = sb.index; int c = 0; if (more || diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index 58dbea76a..285f044bc 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -37,7 +37,6 @@ import net.yacy.peers.Protocol; import net.yacy.peers.Seed; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs.EventOrigin; import de.anomic.crawler.ZURL.FailCategory; @@ -150,7 +149,7 @@ public final class crawlReceipt { if ("fill".equals(result)) try { // put new entry into database - sb.indexSegments.urlMetadata(Segments.Process.RECEIPTS).store(entry); + sb.index.urlMetadata().store(entry); ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS); sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true)); diff --git a/htroot/yacy/query.java b/htroot/yacy/query.java index 6fbc23a8b..0ba75c4f4 100644 --- a/htroot/yacy/query.java +++ b/htroot/yacy/query.java @@ -33,10 +33,9 @@ import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.logging.Log; -import net.yacy.peers.Protocol; import net.yacy.peers.Network; +import net.yacy.peers.Protocol; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -91,7 +90,7 @@ public final class query { if (obj.equals("rwiurlcount")) try { // the total number of different urls in the rwi is returned // shall contain a word hash, the number of assigned lurls to this hash is returned - prop.put("response", sb.indexSegments.termIndex(Segments.Process.PUBLIC).get(env.getBytes(), null).size()); + prop.put("response", sb.index.termIndex().get(env.getBytes(), null).size()); return prop; } catch (final IOException e) { Log.logException(e); @@ -99,13 +98,13 @@ public final class query { if (obj.equals("rwicount")) { // return the total number of available word indexes - prop.put("response", sb.indexSegments.termIndex(Segments.Process.PUBLIC).sizesMax()); + prop.put("response", sb.index.termIndex().sizesMax()); return prop; } if (obj.equals("lurlcount")) { // return the number of all available l-url's - prop.put("response", sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).size()); + prop.put("response", sb.index.urlMetadata().size()); return prop; } diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 57648731a..2697e1239 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -66,7 +66,6 @@ import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import net.yacy.search.query.AccessTracker; import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; @@ -223,7 +222,7 @@ public final class search { ArrayList> accu = null; if (query.length() == 0 && abstractSet != null) { // this is _not_ a normal search, only a request for index abstracts - final Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); + final Segment indexSegment = sb.index; theQuery = new QueryParams( null, abstractSet, @@ -315,7 +314,7 @@ public final class search { DigestURI.TLD_any_zone_filter, client, false, - sb.indexSegments.segment(Segments.Process.PUBLIC), + sb.index, rankingProfile, header.get(RequestHeader.USER_AGENT, ""), false, 0.0d, 0.0d, 0.0d diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index b29b732b2..a51c20b85 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -49,7 +49,6 @@ import net.yacy.peers.dht.FlatWordPartitionScheme; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; -import net.yacy.search.index.Segments; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -116,9 +115,9 @@ public final class transferRWI { sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. This peer is in robinson mode"); result = "not_granted"; pause = 60000; - } else if (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() > cachelimit) { + } else if (sb.index.termIndex().getBufferSize() > cachelimit) { // we are too busy to receive indexes - sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() + ")."); + sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.index.termIndex().getBufferSize() + ")."); granted = false; // don't accept more words if there are too many words to flush result = "busy"; pause = 60000; @@ -152,7 +151,7 @@ public final class transferRWI { int received = 0; int blocked = 0; int receivedURL = 0; - final IndexCell cell = sb.indexSegments.termIndex(Segments.Process.DHTIN); + final IndexCell cell = sb.index.termIndex(); int count = 0; while (it.hasNext()) { serverCore.checkInterruption(); @@ -197,7 +196,7 @@ public final class transferRWI { // check if we need to ask for the corresponding URL if (!(knownURL.has(urlHash) || unknownURL.has(urlHash))) try { - if (sb.indexSegments.urlMetadata(Segments.Process.DHTIN).exists(urlHash)) { + if (sb.index.urlMetadata().exists(urlHash)) { knownURL.put(urlHash); } else { unknownURL.put(urlHash); @@ -230,7 +229,7 @@ public final class transferRWI { } result = "ok"; - pause = (int) (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time + pause = (int) (sb.index.termIndex().getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time } prop.put("unknownURL", unknownURLs.toString()); diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index a9b43c7a2..182d5925d 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -40,7 +40,6 @@ import net.yacy.peers.Protocol; import net.yacy.peers.Seed; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs.EventOrigin; import de.anomic.server.serverCore; @@ -84,7 +83,7 @@ public final class transferURL { } else { int received = 0; int blocked = 0; - final int sizeBefore = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size(); + final int sizeBefore = sb.index.urlMetadata().size(); // read the urls from the other properties and store String urls; URIMetadataRow lEntry; @@ -141,7 +140,7 @@ public final class transferURL { // write entry to database if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false)); try { - sb.indexSegments.urlMetadata(Segments.Process.DHTIN).store(lEntry); + sb.index.urlMetadata().store(lEntry); ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER); if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); received++; @@ -153,7 +152,7 @@ public final class transferURL { sb.peers.mySeed().incRU(received); // return rewrite properties - final int more = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size() - sizeBefore; + final int more = sb.index.urlMetadata().size() - sizeBefore; doublevalues = Integer.toString(received - more); Network.log.logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, blocked " + blocked + " URLs"); EventChannel.channels(EventChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + ", blocked " + blocked + " URLs from peer " + otherPeerName, "", otherPeer.hash)); diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java index 1cecfb9e5..cf6c79d38 100644 --- a/htroot/yacy/urls.java +++ b/htroot/yacy/urls.java @@ -34,7 +34,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.peers.Protocol; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.NoticedURL; import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.retrieval.Request; @@ -78,7 +77,7 @@ public class urls { if (entry == null) break; // find referrer, if there is one - referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerhash()); + referrer = sb.getURL(entry.referrerhash()); // place url to notice-url db sb.crawlQueues.delegatedURL.push( @@ -114,10 +113,10 @@ public class urls { URIMetadataRow entry; DigestURI referrer; for (int i = 0; i < count; i++) { - entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1)))); + entry = sb.index.urlMetadata().load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1)))); if (entry == null) continue; // find referrer, if there is one - referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerHash()); + referrer = sb.getURL(entry.referrerHash()); // create RSS entry prop.put("item_" + c + "_title", entry.dc_title()); prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false)); diff --git a/htroot/yacyinteractive.java b/htroot/yacyinteractive.java index 1ca22f96d..ee9ba0fc4 100644 --- a/htroot/yacyinteractive.java +++ b/htroot/yacyinteractive.java @@ -57,7 +57,7 @@ public class yacyinteractive { prop.putHTML("querys", query.replaceAll(" ", "+")); prop.put("serverlist", query.isEmpty() ? 1 : 0); prop.put("focus", focus ? 1 : 0); - prop.put("allowrealtime", sb.indexSegments.URLCount() < 100000 ? 1 : 0); + prop.put("allowrealtime", sb.index.URLCount() < 100000 ? 1 : 0); return prop; } } diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 43f5ddc9c..11db85f8b 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -73,7 +73,6 @@ import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import net.yacy.search.query.AccessTracker; import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; @@ -133,16 +132,7 @@ public class yacysearch { prop.put("sidebarVocabulary", j); // get segment - Segment indexSegment = null; - if ( post != null && post.containsKey("segment") ) { - final String segmentName = post.get("segment"); - if ( sb.indexSegments.segmentExist(segmentName) ) { - indexSegment = sb.indexSegments.segment(segmentName); - } - } else { - // take default segment - indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); - } + Segment indexSegment = sb.index; final String EXT = header.get("EXT", ""); final boolean rss = EXT.equals("rss"); diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java index 319854f13..c4a1d94c7 100644 --- a/source/de/anomic/crawler/CrawlQueues.java +++ b/source/de/anomic/crawler/CrawlQueues.java @@ -52,7 +52,6 @@ import net.yacy.peers.dht.PeerSelection; import net.yacy.search.Switchboard; import net.yacy.search.Switchboard.indexingQueueEntry; import net.yacy.search.SwitchboardConstants; -import net.yacy.search.index.Segments; import de.anomic.crawler.NoticedURL.StackType; import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.retrieval.Request; @@ -62,7 +61,6 @@ public class CrawlQueues { private static final String ERROR_DB_FILENAME = "urlError4.db"; private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db"; - private static final Segments.Process PROCESS = Segments.Process.LOCALCRAWLING; protected Switchboard sb; protected Log log; @@ -82,8 +80,8 @@ public class CrawlQueues { this.log.logConfig("Starting Crawling Management"); this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727); FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME)); - this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727); - this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727); + this.errorURL = new ZURL(sb.index.getRemoteSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727); + this.delegatedURL = new ZURL(sb.index.getRemoteSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727); } public void relocate(final File newQueuePath) { @@ -94,8 +92,8 @@ public class CrawlQueues { this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727); FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME)); - this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727); - this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727); + this.errorURL = new ZURL(this.sb.index.getRemoteSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727); + this.delegatedURL = new ZURL(this.sb.index.getRemoteSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727); } public synchronized void close() { @@ -276,7 +274,7 @@ public class CrawlQueues { return true; } try { - this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(PROCESS, new Response(urlEntry, profile), null, null)); + this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(new Response(urlEntry, profile), null, null)); Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false)); } catch (final InterruptedException e) { Log.logException(e); diff --git a/source/de/anomic/crawler/RSSLoader.java b/source/de/anomic/crawler/RSSLoader.java index 473d1b4da..7e03f948a 100644 --- a/source/de/anomic/crawler/RSSLoader.java +++ b/source/de/anomic/crawler/RSSLoader.java @@ -42,7 +42,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.retrieval.Response; import de.anomic.data.WorkTables; import de.anomic.server.serverObjects; @@ -59,6 +58,7 @@ public class RSSLoader extends Thread { this.urlf = urlf; } + @Override public void run() { RSSReader rss = null; try { @@ -89,7 +89,7 @@ public class RSSLoader extends Thread { try { final DigestURI messageurl = new DigestURI(message.getLink()); if (indexTriggered.containsKey(messageurl.hash())) continue loop; - if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop; + if (sb.urlExists(messageurl.hash()) != null) continue loop; sb.addToIndex(messageurl, null, null); indexTriggered.insertIfAbsent(messageurl.hash(), new Date()); loadCount++; diff --git a/source/de/anomic/crawler/SitemapImporter.java b/source/de/anomic/crawler/SitemapImporter.java index 9a0a01ec3..c792d09e0 100644 --- a/source/de/anomic/crawler/SitemapImporter.java +++ b/source/de/anomic/crawler/SitemapImporter.java @@ -1,4 +1,4 @@ -//SitemapImporter.java +//SitemapImporter.java //------------------------ //part of YaCy //(C) by Michael Peter Christen; mc@yacy.net @@ -34,7 +34,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.retrieval.Request; public class SitemapImporter extends Thread { @@ -43,7 +42,7 @@ public class SitemapImporter extends Thread { private static final Log logger = new Log("SITEMAP"); private DigestURI siteMapURL = null; private final Switchboard sb; - + public SitemapImporter(final Switchboard sb, final DigestURI sitemapURL, final CrawlProfile profileEntry) { assert sitemapURL != null; this.sb = sb; @@ -52,6 +51,7 @@ public class SitemapImporter extends Thread { this.crawlingProfile = profileEntry; } + @Override public void run() { try { logger.logInfo("Start parsing sitemap file " + this.siteMapURL); @@ -76,10 +76,10 @@ public class SitemapImporter extends Thread { // check if the url is known and needs to be recrawled Date lastMod = entry.lastmod(null); if (lastMod != null) { - final String dbocc = this.sb.urlExists(Segments.Process.LOCALCRAWLING, nexturlhash); + final String dbocc = this.sb.urlExists(nexturlhash); if ((dbocc != null) && (dbocc.equalsIgnoreCase("loaded"))) { // the url was already loaded. we need to check the date - final URIMetadataRow oldEntry = this.sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(nexturlhash); + final URIMetadataRow oldEntry = this.sb.index.urlMetadata().load(nexturlhash); if (oldEntry != null) { final Date modDate = oldEntry.moddate(); // check if modDate is null diff --git a/source/de/anomic/crawler/retrieval/FTPLoader.java b/source/de/anomic/crawler/retrieval/FTPLoader.java index aca20dfad..4979a94e0 100644 --- a/source/de/anomic/crawler/retrieval/FTPLoader.java +++ b/source/de/anomic/crawler/retrieval/FTPLoader.java @@ -41,7 +41,6 @@ import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.Latency; import de.anomic.crawler.ZURL.FailCategory; @@ -115,7 +114,7 @@ public class FTPLoader { // directory -> get list of files final RequestHeader requestHeader = new RequestHeader(); if (request.referrerhash() != null) { - final DigestURI u = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); + final DigestURI u = this.sb.getURL(request.referrerhash()); if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true, false)); } @@ -222,7 +221,7 @@ public class FTPLoader { // create response header final RequestHeader requestHeader = new RequestHeader(); if (request.referrerhash() != null) { - final DigestURI refurl = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); + final DigestURI refurl = this.sb.getURL(request.referrerhash()); if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true, false)); } final ResponseHeader responseHeader = new ResponseHeader(200); diff --git a/source/de/anomic/crawler/retrieval/FileLoader.java b/source/de/anomic/crawler/retrieval/FileLoader.java index a989d11a0..608aff2f8 100644 --- a/source/de/anomic/crawler/retrieval/FileLoader.java +++ b/source/de/anomic/crawler/retrieval/FileLoader.java @@ -40,7 +40,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.CrawlProfile; public class FileLoader { @@ -61,7 +60,7 @@ public class FileLoader { RequestHeader requestHeader = new RequestHeader(); if (request.referrerhash() != null) { - DigestURI ur = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); + DigestURI ur = this.sb.getURL(request.referrerhash()); if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false)); } diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java index 0d07ed5f0..9668ddef4 100644 --- a/source/de/anomic/crawler/retrieval/HTTPLoader.java +++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java @@ -39,7 +39,6 @@ import net.yacy.kelondro.logging.Log; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; -import net.yacy.search.index.Segments; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.Latency; import de.anomic.crawler.ZURL.FailCategory; @@ -118,7 +117,7 @@ public final class HTTPLoader { final RequestHeader requestHeader = new RequestHeader(); requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); DigestURI refererURL = null; - if (request.referrerhash() != null) refererURL = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); + if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash()); if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true)); requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT)); requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE)); @@ -168,7 +167,7 @@ public final class HTTPLoader { } // check if the url was already indexed - final String dbname = this.sb.urlExists(Segments.Process.LOCALCRAWLING, redirectionUrl.hash()); + final String dbname = this.sb.urlExists(redirectionUrl.hash()); if (dbname != null) { // customer request this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode); throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in db " + dbname); diff --git a/source/de/anomic/crawler/retrieval/SMBLoader.java b/source/de/anomic/crawler/retrieval/SMBLoader.java index 0726aabfe..ac9c6be73 100644 --- a/source/de/anomic/crawler/retrieval/SMBLoader.java +++ b/source/de/anomic/crawler/retrieval/SMBLoader.java @@ -49,7 +49,6 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.CrawlProfile; public class SMBLoader { @@ -73,7 +72,7 @@ public class SMBLoader { RequestHeader requestHeader = new RequestHeader(); if (request.referrerhash() != null) { - DigestURI ur = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); + DigestURI ur = this.sb.getURL(request.referrerhash()); if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false)); } diff --git a/source/de/anomic/data/ymark/YMarkMetadata.java b/source/de/anomic/data/ymark/YMarkMetadata.java index cd05270a6..9688d5baf 100644 --- a/source/de/anomic/data/ymark/YMarkMetadata.java +++ b/source/de/anomic/data/ymark/YMarkMetadata.java @@ -38,13 +38,13 @@ import net.yacy.document.Parser.Failure; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.repository.LoaderDispatcher; -import net.yacy.search.index.Segments; +import net.yacy.search.index.Segment; import de.anomic.crawler.retrieval.Response; public class YMarkMetadata { private DigestURI uri; Document document; - Segments indexSegment; + Segment indexSegment; public enum METADATA { TITLE, @@ -72,16 +72,16 @@ public class YMarkMetadata { this.indexSegment = null; } - public YMarkMetadata(final DigestURI uri, final Segments indexSegment) { + public YMarkMetadata(final DigestURI uri, final Segment indexSegment) { this.uri = uri; this.document = null; this.indexSegment = indexSegment; } - - public YMarkMetadata(final byte[] urlHash, final Segments indexSegment) { + + public YMarkMetadata(final byte[] urlHash, final Segment indexSegment) { this.document = null; this.indexSegment = indexSegment; - this.uri = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash).url(); + this.uri = this.indexSegment.urlMetadata().load(urlHash).url(); } public YMarkMetadata(final Document document) { @@ -101,11 +101,11 @@ public class YMarkMetadata { this.document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse()); } return this.document; - } + } public EnumMap getMetadata() { final EnumMap metadata = new EnumMap(METADATA.class); - final URIMetadataRow urlEntry = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(this.uri.hash()); + final URIMetadataRow urlEntry = this.indexSegment.urlMetadata().load(this.uri.hash()); if (urlEntry != null) { metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size())); metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate())); diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index e782c61e7..2b9a13d0a 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -54,7 +54,6 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; -import net.yacy.search.index.Segments; import de.anomic.crawler.Cache; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.ZURL.FailCategory; @@ -211,7 +210,7 @@ public final class LoaderDispatcher { final RequestHeader requestHeader = new RequestHeader(); requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); DigestURI refererURL = null; - if (request.referrerhash() != null) refererURL = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); + if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash()); if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true)); final Response response = new Response( request, diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index edcb7a958..0167a2e38 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -144,7 +144,6 @@ import net.yacy.repository.Blacklist; import net.yacy.repository.FilterEngine; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.index.Segment; -import net.yacy.search.index.Segments; import net.yacy.search.index.SolrConfiguration; import net.yacy.search.query.AccessTracker; import net.yacy.search.query.QueryParams; @@ -218,7 +217,7 @@ public final class Switchboard extends serverSwitch public File queuesRoot; public File surrogatesInPath; public File surrogatesOutPath; - public Segments indexSegments; + public Segment index; public LoaderDispatcher loader; public CrawlSwitchboard crawler; public CrawlQueues crawlQueues; @@ -379,16 +378,14 @@ public final class Switchboard extends serverSwitch // initialize index ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0); final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS"); - this.indexSegments = - new Segments( + this.index = + new Segment( this.log, - segmentsPath, + new File(segmentsPath, "default"), wordCacheMaxCount, fileSizeMax, this.useTailCache, this.exceed134217727); - // set the default segment names - setDefaultSegments(); // prepare a solr index profile switch list final File solrBackupProfile = new File("defaults/solr.keys.list"); @@ -418,7 +415,7 @@ public final class Switchboard extends serverSwitch ShardSelection.Method.MODULO_HOST_MD5, 10000, true); solr.setCommitWithinMs(commitWithinMs); - this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(solr); + this.index.connectRemoteSolr(solr); } catch ( final IOException e ) { Log.logException(e); } @@ -466,7 +463,7 @@ public final class Switchboard extends serverSwitch // init a DHT transmission dispatcher this.dhtDispatcher = (this.peers.sizeConnected() == 0) ? null : new Dispatcher( - this.indexSegments.segment(Segments.Process.LOCALCRAWLING), + this.index, this.peers, true, 10000); @@ -767,7 +764,7 @@ public final class Switchboard extends serverSwitch new CrawlStacker( this.crawlQueues, this.crawler, - this.indexSegments.segment(Segments.Process.LOCALCRAWLING), + this.index, this.peers, isIntranetMode(), isGlobalMode(), @@ -994,33 +991,6 @@ public final class Switchboard extends serverSwitch sb = this; } - private void setDefaultSegments() { - this.indexSegments.setSegment( - Segments.Process.RECEIPTS, - getConfig(SwitchboardConstants.SEGMENT_RECEIPTS, "default")); - this.indexSegments.setSegment( - Segments.Process.QUERIES, - getConfig(SwitchboardConstants.SEGMENT_QUERIES, "default")); - this.indexSegments.setSegment( - Segments.Process.DHTIN, - getConfig(SwitchboardConstants.SEGMENT_DHTIN, "default")); - this.indexSegments.setSegment( - Segments.Process.DHTOUT, - getConfig(SwitchboardConstants.SEGMENT_DHTOUT, "default")); - this.indexSegments.setSegment( - Segments.Process.PROXY, - getConfig(SwitchboardConstants.SEGMENT_PROXY, "default")); - this.indexSegments.setSegment( - Segments.Process.LOCALCRAWLING, - getConfig(SwitchboardConstants.SEGMENT_LOCALCRAWLING, "default")); - this.indexSegments.setSegment( - Segments.Process.REMOTECRAWLING, - getConfig(SwitchboardConstants.SEGMENT_REMOTECRAWLING, "default")); - this.indexSegments.setSegment( - Segments.Process.PUBLIC, - getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default")); - } - public int getIndexingProcessorsQueueSize() { return this.indexingDocumentProcessor.queueSize() + this.indexingCondensementProcessor.queueSize() @@ -1170,8 +1140,8 @@ public final class Switchboard extends serverSwitch if ( this.dhtDispatcher != null ) { this.dhtDispatcher.close(); } - synchronized ( this.indexSegments ) { - this.indexSegments.close(); + synchronized ( this.index ) { + this.index.close(); } this.crawlStacker.announceClose(); this.crawlStacker.close(); @@ -1211,16 +1181,14 @@ public final class Switchboard extends serverSwitch partitionExponent, this.useTailCache, this.exceed134217727); - this.indexSegments = - new Segments( + this.index = + new Segment( this.log, - new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), + new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"), wordCacheMaxCount, fileSizeMax, this.useTailCache, this.exceed134217727); - // set the default segment names - setDefaultSegments(); this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object // create a crawler @@ -1229,7 +1197,7 @@ public final class Switchboard extends serverSwitch // init a DHT transmission dispatcher this.dhtDispatcher = (this.peers.sizeConnected() == 0) ? null : new Dispatcher( - this.indexSegments.segment(Segments.Process.LOCALCRAWLING), + this.index, this.peers, true, 10000); @@ -1257,7 +1225,7 @@ public final class Switchboard extends serverSwitch new CrawlStacker( this.crawlQueues, this.crawler, - this.indexSegments.segment(Segments.Process.LOCALCRAWLING), + this.index, this.peers, "local.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0, "global.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0, @@ -1449,11 +1417,11 @@ public final class Switchboard extends serverSwitch } } - public String urlExists(final Segments.Process process, final byte[] hash) { + public String urlExists(final byte[] hash) { // tests if hash occurrs in any database // if it exists, the name of the database is returned, // if it not exists, null is returned - if ( this.indexSegments.urlMetadata(process).exists(hash) ) { + if ( this.index.urlMetadata().exists(hash) ) { return "loaded"; } return this.crawlQueues.urlExists(hash); @@ -1465,14 +1433,14 @@ public final class Switchboard extends serverSwitch this.crawlQueues.urlRemove(hash); } - public DigestURI getURL(final Segments.Process process, final byte[] urlhash) { + public DigestURI getURL(final byte[] urlhash) { if ( urlhash == null ) { return null; } if ( urlhash.length == 0 ) { return null; } - final URIMetadataRow le = this.indexSegments.urlMetadata(process).load(urlhash); + final URIMetadataRow le = this.index.urlMetadata().load(urlhash); if ( le != null ) { return le.url(); } @@ -1606,7 +1574,7 @@ public final class Switchboard extends serverSwitch this.crawler.close(); this.log .logConfig("SWITCHBOARD SHUTDOWN STEP 3: sending termination signal to database manager (stand by...)"); - this.indexSegments.close(); + this.index.close(); this.peers.close(); Cache.close(); this.tables.close(); @@ -1696,7 +1664,6 @@ public final class Switchboard extends serverSwitch } try { this.indexingDocumentProcessor.enQueue(new indexingQueueEntry( - Segments.Process.LOCALCRAWLING, response, null, null)); @@ -1810,9 +1777,7 @@ public final class Switchboard extends serverSwitch 0); response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false); final indexingQueueEntry queueEntry = - new indexingQueueEntry(Segments.Process.SURROGATES, response, new Document[] { - document - }, null); + new indexingQueueEntry(response, new Document[] {document}, null); // place the queue entry into the concurrent process of the condenser (document analysis) try { @@ -1887,18 +1852,15 @@ public final class Switchboard extends serverSwitch public static class indexingQueueEntry extends WorkflowJob { - public Segments.Process process; public Response queueEntry; public Document[] documents; public Condenser[] condenser; public indexingQueueEntry( - final Segments.Process process, final Response queueEntry, final Document[] documents, final Condenser[] condenser) { super(); - this.process = process; this.queueEntry = queueEntry; this.documents = documents; this.condenser = condenser; @@ -1929,9 +1891,7 @@ public final class Switchboard extends serverSwitch // clear caches if necessary if ( !MemoryControl.request(8000000L, false) ) { - for ( final Segment indexSegment : this.indexSegments ) { - indexSegment.urlMetadata().clearCache(); - } + sb.index.urlMetadata().clearCache(); SearchEventCache.cleanupEvents(false); this.trail.clear(); } @@ -2301,7 +2261,7 @@ public final class Switchboard extends serverSwitch if ( documents == null ) { return null; } - return new indexingQueueEntry(in.process, in.queueEntry, documents, null); + return new indexingQueueEntry(in.queueEntry, documents, null); } private Document[] parseDocument(final Response response) throws InterruptedException { @@ -2446,11 +2406,11 @@ public final class Switchboard extends serverSwitch + in.queueEntry.url().toNormalform(false, true) + "': indexing not wanted by crawl profile"); } - return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); + return new indexingQueueEntry(in.queueEntry, in.documents, null); } - boolean localSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr() != null && getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr"); - boolean remoteSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false); + boolean localSolr = this.index.getLocalSolr() != null && getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr"); + boolean remoteSolr = this.index.getRemoteSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false); if (localSolr || remoteSolr) { // send the documents to solr for ( final Document doc : in.documents ) { @@ -2470,8 +2430,8 @@ public final class Switchboard extends serverSwitch } try { SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc); - if (localSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr().add(solrDoc); - if (remoteSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().add(solrDoc); + if (localSolr) this.index.getLocalSolr().add(solrDoc); + if (remoteSolr) this.index.getRemoteSolr().add(solrDoc); } catch ( final IOException e ) { Log.logWarning( "SOLR", @@ -2494,7 +2454,7 @@ public final class Switchboard extends serverSwitch + in.queueEntry.url().toNormalform(false, true) + "': indexing not wanted by federated rule for YaCy"); } - return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); + return new indexingQueueEntry(in.queueEntry, in.documents, null); } final List doclist = new ArrayList(); @@ -2519,7 +2479,7 @@ public final class Switchboard extends serverSwitch } if ( doclist.isEmpty() ) { - return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); + return new indexingQueueEntry(in.queueEntry, in.documents, null); } in.documents = doclist.toArray(new Document[doclist.size()]); final Condenser[] condenser = new Condenser[in.documents.length]; @@ -2540,7 +2500,7 @@ public final class Switchboard extends serverSwitch ? true : !profile.remoteIndexing()); } - return new indexingQueueEntry(in.process, in.queueEntry, in.documents, condenser); + return new indexingQueueEntry(in.queueEntry, in.documents, condenser); } public indexingQueueEntry webStructureAnalysis(final indexingQueueEntry in) { @@ -2565,7 +2525,6 @@ public final class Switchboard extends serverSwitch if ( in.condenser != null ) { for ( int i = 0; i < in.documents.length; i++ ) { storeDocumentIndex( - in.process, in.queueEntry, in.documents[i], in.condenser[i], @@ -2577,7 +2536,6 @@ public final class Switchboard extends serverSwitch } private void storeDocumentIndex( - final Segments.Process process, final Response queueEntry, final Document document, final Condenser condenser, @@ -2591,9 +2549,6 @@ public final class Switchboard extends serverSwitch final DigestURI url = new DigestURI(document.dc_source()); final DigestURI referrerURL = queueEntry.referrerURL(); EventOrigin processCase = queueEntry.processCase(this.peers.mySeed().hash); - if ( process == Segments.Process.SURROGATES ) { - processCase = EventOrigin.SURROGATES; - } if ( condenser == null || document.indexingDenied() ) { //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase); @@ -2629,7 +2584,7 @@ public final class Switchboard extends serverSwitch URIMetadataRow newEntry = null; try { newEntry = - this.indexSegments.segment(process).storeDocument( + this.index.storeDocument( url, referrerURL, queueEntry.lastModified(), @@ -2763,11 +2718,10 @@ public final class Switchboard extends serverSwitch public void addToIndex(final DigestURI url, final SearchEvent searchEvent, final String heuristicName) throws IOException, Parser.Failure { - final Segments.Process process = Segments.Process.LOCALCRAWLING; if ( searchEvent != null ) { searchEvent.addHeuristic(url.hash(), heuristicName, true); } - if ( this.indexSegments.segment(process).exists(url.hash()) ) { + if ( this.index.exists(url.hash()) ) { return; // don't do double-work } final Request request = this.loader.request(url, true, true); @@ -2806,7 +2760,6 @@ public final class Switchboard extends serverSwitch ResultImages.registerImages(url, document, true); Switchboard.this.webStructure.generateCitationReference(url, document, condenser); storeDocumentIndex( - process, response, document, condenser, @@ -3023,7 +2976,7 @@ public final class Switchboard extends serverSwitch if ( getConfig(SwitchboardConstants.INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false") ) { return "no DHT distribution: not enabled (per setting)"; } - final Segment indexSegment = this.indexSegments.segment(segment); + final Segment indexSegment = this.index; if ( indexSegment.urlMetadata().size() < 10 ) { return "no DHT distribution: loadedURL.size() = " + indexSegment.urlMetadata().size(); } @@ -3299,12 +3252,12 @@ public final class Switchboard extends serverSwitch this.peers.mySeed().put(Seed.ISPEED, Integer.toString(currentPPM())); this.peers.mySeed().put(Seed.RSPEED, Float.toString(averageQPM())); this.peers.mySeed().put(Seed.UPTIME, Long.toString(uptime / 60)); // the number of minutes that the peer is up in minutes/day (moving average MA30) - this.peers.mySeed().put(Seed.LCOUNT, Long.toString(this.indexSegments.URLCount())); // the number of links that the peer has stored (LURL's) + this.peers.mySeed().put(Seed.LCOUNT, Long.toString(this.index.URLCount())); // the number of links that the peer has stored (LURL's) this.peers.mySeed().put(Seed.NCOUNT, Integer.toString(this.crawlQueues.noticeURL.size())); // the number of links that the peer has noticed, but not loaded (NURL's) this.peers.mySeed().put( Seed.RCOUNT, Integer.toString(this.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.GLOBAL))); // the number of links that the peer provides for remote crawling (ZURL's) - this.peers.mySeed().put(Seed.ICOUNT, Long.toString(this.indexSegments.RWICount())); // the minimum number of words that the peer has indexed (as it says) + this.peers.mySeed().put(Seed.ICOUNT, Long.toString(this.index.RWICount())); // the minimum number of words that the peer has indexed (as it says) this.peers.mySeed().put(Seed.SCOUNT, Integer.toString(this.peers.sizeConnected())); // the number of seeds that the peer has stored this.peers.mySeed().put( Seed.CCOUNT, diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index cbf35264d..6fe2bc9a8 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -137,24 +137,23 @@ public class Segment { maxFileSize, writeBufferSize); - /* - this.authorNavIndex = new IndexCell( - new File(new File(segmentPath, "nav_author"), "idx"), - navigationReferenceFactory, - wordOrder, - NavigationReferenceRow.navEntryRow, - entityCacheMaxSize, - targetFileSize, - maxFileSize, - this.merger, - writeBufferSize); - */ - // create LURL-db this.urlMetadata = new MetadataRepository(segmentPath, "text.urlmd", useTailCache, exceed134217727); //this.connectLocalSolr(); } + public long URLCount() { + return this.urlMetadata.size(); + } + + public long RWICount() { + return this.termIndex.sizesMax(); + } + + public int RWIBufferCount() { + return this.termIndex.getBufferSize(); + } + public void connectRemoteSolr(final SolrConnector solr) { this.urlMetadata.connectRemoteSolr(solr); } diff --git a/source/net/yacy/search/index/Segments.java b/source/net/yacy/search/index/Segments.java deleted file mode 100644 index 2d546f990..000000000 --- a/source/net/yacy/search/index/Segments.java +++ /dev/null @@ -1,196 +0,0 @@ -// Segments.java -// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 30.07.2009 on http://yacy.net -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.search.index; - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import net.yacy.kelondro.data.word.WordReference; -import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.rwi.IndexCell; - - -public class Segments implements Iterable { - - /** - * process enumeration type - * defines constants that can be used to assign process-related segment names - */ - public enum Process { - - RECEIPTS, - QUERIES, - DHTIN, - DHTOUT, // the only segment that is used for reading-only - PROXY, - LOCALCRAWLING, - REMOTECRAWLING, - PUBLIC, - SURROGATES; // includes the index that can be retrieved by the yacy p2p api - - public String toString() { - throw new UnsupportedOperationException("toString not allowed"); - } - } - - private final Log log; - private final File segmentsPath; - private final int entityCacheMaxSize; - private final long maxFileSize; - private Map segments; - private final HashMap process_assignment; - private final boolean useTailCache; - private final boolean exceed134217727; - - public Segments( - final Log log, - final File segmentsPath, - final int entityCacheMaxSize, - final long maxFileSize, - final boolean useTailCache, - final boolean exceed134217727) { - this.log = log; - this.segmentsPath = segmentsPath; - this.entityCacheMaxSize = entityCacheMaxSize; - this.maxFileSize = maxFileSize; - this.useTailCache = useTailCache; - this.exceed134217727 = exceed134217727; - this.segments = new HashMap(); - this.process_assignment = new HashMap(); - - // assign default segment names for the processes - this.process_assignment.put(Process.RECEIPTS, "default"); - this.process_assignment.put(Process.QUERIES, "default"); - this.process_assignment.put(Process.DHTIN, "default"); - this.process_assignment.put(Process.DHTOUT, "default"); - this.process_assignment.put(Process.PROXY, "default"); - this.process_assignment.put(Process.LOCALCRAWLING, "default"); - this.process_assignment.put(Process.REMOTECRAWLING, "default"); - this.process_assignment.put(Process.PUBLIC, "default"); - this.process_assignment.put(Process.SURROGATES, "default"); - } - - public void setSegment(final Process process, final String segmentName) { - this.process_assignment.put(process, segmentName); - } - - public String[] segmentNames() { - return this.segments.keySet().toArray(new String[this.segments.size()]); - } - - public boolean segmentExist(final String segmentName) { - return this.segments.containsKey(segmentName); - } - - public Segment segment(final Process process) { - return segment(this.process_assignment.get(process)); - } - - public Segment segment(final String segmentName) { - if (this.segments == null) return null; - Segment segment = this.segments.get(segmentName); - if (segment == null) { - // generate the segment - try { - segment = new Segment( - this.log, - new File(this.segmentsPath, segmentName), - this.entityCacheMaxSize, - this.maxFileSize, - this.useTailCache, - this.exceed134217727); - } catch (final IOException e) { - Log.logException(e); - return null; - } - this.segments.put(segmentName, segment); - } - return segment; - } - - public long URLCount() { - if (this.segments == null) return 0; - long c = 0; - for (final Segment s: this.segments.values()) c += s.urlMetadata().size(); - return c; - } - - public long RWICount() { - if (this.segments == null) return 0; - long c = 0; - for (final Segment s: this.segments.values()) c += s.termIndex().sizesMax(); - return c; - } - - public int RWIBufferCount() { - if (this.segments == null) return 0; - int c = 0; - for (final Segment s: this.segments.values()) c += s.termIndex().getBufferSize(); - return c; - } - - public MetadataRepository urlMetadata(final Process process) { - return segment(this.process_assignment.get(process)).urlMetadata(); - } - - public IndexCell termIndex(final Process process) { - return segment(this.process_assignment.get(process)).termIndex(); - } - - public void clear(final Process process) { - segment(this.process_assignment.get(process)).clear(); - } - - public File getLocation(final Process process) { - return segment(this.process_assignment.get(process)).getLocation(); - } - - public void close(final Process process) { - segment(this.process_assignment.get(process)).close(); - } - - public synchronized void close() { - if (this.segments != null) for (final Segment s: this.segments.values()) s.close(); - this.segments = null; - } - - public void finalize() { - this.close(); - } - - public synchronized Segment.ReferenceCleaner getReferenceCleaner(final String segmentName, final byte[] startHash) { - return segment(segmentName).getReferenceCleaner(startHash); - } - - public Iterator iterator() { - return this.segments.values().iterator(); - } -} -