removed segments-concept and the Segments class:

the segments had been there to create a tenant-infrastructure but were
never be used since that was all much too complex. There will be a
replacement using a solr navigation using a segment field in the search
index.
pull/1/head
Michael Peter Christen 13 years ago
parent 508a81b86c
commit 03280fb161

@ -50,7 +50,6 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.NewsPool;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.data.BookmarkHelper;
import de.anomic.data.BookmarksDB;
import de.anomic.data.BookmarksDB.Bookmark;
@ -195,7 +194,7 @@ public class Bookmarks {
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash);
if (bookmark == null) {
// try to get the bookmark from the LURL database
final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash));
final URIMetadataRow urlentry = sb.index.urlMetadata().load(ASCII.getBytes(urlHash));
if (urlentry != null) try {
final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE));
prop.put("mode_edit", "0"); // create mode

@ -39,7 +39,6 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Seed;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.ResultURLs.EventOrigin;
import de.anomic.crawler.ResultURLs.InitExecEntry;
@ -117,7 +116,7 @@ public class CrawlResults {
final String hash = post.get("hash", null);
if (hash != null) {
// delete from database
sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).remove(hash.getBytes());
sb.index.urlMetadata().remove(hash.getBytes());
}
}
@ -127,7 +126,7 @@ public class CrawlResults {
if (hashpart != null) {
// delete all urls for this domain from database
try {
sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).deleteDomain(hashpart);
sb.index.urlMetadata().deleteDomain(hashpart);
ResultURLs.deleteDomain(tabletype, domain, hashpart);
} catch (final IOException e) {
Log.logException(e);
@ -187,7 +186,7 @@ public class CrawlResults {
while (i.hasNext()) {
entry = i.next();
try {
urle = sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(UTF8.getBytes(entry.getKey()));
urle = sb.index.urlMetadata().load(UTF8.getBytes(entry.getKey()));
if (urle == null) {
Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey());
urlstr = null;

@ -53,7 +53,6 @@ import net.yacy.peers.NewsPool;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.SitemapImporter;
import de.anomic.crawler.ZURL.FailCategory;
@ -95,16 +94,7 @@ public class Crawler_p {
prop.put("forwardToCrawlStart", "0");
// get segment
Segment indexSegment = null;
if (post != null && post.containsKey("segment")) {
final String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
Segment indexSegment = sb.index;
prop.put("info", "0");

@ -29,7 +29,6 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.index.MetadataRepository;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -41,19 +40,10 @@ public class IndexCleaner_p {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
prop.put("title", "DbCleanup_p");
// get segment
Segment indexSegment = null;
if (post != null && post.containsKey("segment")) {
String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
Segment indexSegment = sb.index;
if (post!=null) {
if (post.get("action").equals("ustart")) {
if (urldbCleanerThread==null || !urldbCleanerThread.isAlive()) {

@ -11,16 +11,7 @@
<p>The local index currently contains #[wcount]# reverse word indexes</p>
<form action="IndexControlRWIs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>RWI Retrieval (= search for a single word)</legend>
<dl>
<dt class="TableCellDark">Select Segment:</dt>
<dd>
<select name="selectSegment" size="1">
#{segments}#
<option value="#[name]#" #(selected)#::selected="selected"#(/selected)#>#[name]#</option>
#{/segments}#
</select>
</dd>
<dl>
<dt class="TableCellDark">Retrieve by Word:</dt>
<dd><input type="text" name="keystring" value="#[keystring]#" size="40" maxlength="80" />
<input type="submit" name="keystringsearch" value="Show URL Entries for Word" />

@ -61,9 +61,7 @@ import net.yacy.peers.Seed;
import net.yacy.peers.dht.PeerSelection;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEventCache;
@ -92,19 +90,9 @@ public class IndexControlRWIs_p
prop.put("keyhash", "");
prop.put("result", "");
prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0);
prop.put("cleanup_solr", sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null
prop.put("cleanup_solr", sb.index.getRemoteSolr() == null
|| !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1);
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
int i = 0;
for ( final String s : sb.indexSegments.segmentNames() ) {
prop.put("segments_" + i + "_name", s);
prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0);
i++;
}
Segment segment = sb.indexSegments.segment(segmentName);
prop.put("segments", i);
// switch off all optional forms/lists
prop.put("searchresult", 0);
prop.put("keyhashsimilar", 0);
@ -113,18 +101,9 @@ public class IndexControlRWIs_p
// clean up all search events
SearchEventCache.cleanupEvents(true);
if ( post != null ) {
// default values
segmentName = post.get("segment", segmentName).trim();
i = 0;
for ( final String s : sb.indexSegments.segmentNames() ) {
prop.put("segments_" + i + "_name", s);
prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0);
i++;
}
prop.put("segments", i);
segment = sb.indexSegments.segment(segmentName);
Segment segment = sb.index;
if ( post != null ) {
final String keystring = post.get("keystring", "").trim();
byte[] keyhash = post.get("keyhash", "").trim().getBytes();
if (keystring.length() > 0) {
@ -180,7 +159,7 @@ public class IndexControlRWIs_p
if ( post.get("deleteSolr", "").equals("on")
&& sb.getConfigBool("federated.service.solr.indexing.enabled", false) ) {
try {
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().clear();
sb.index.getRemoteSolr().clear();
} catch ( final Exception e ) {
Log.logException(e);
}
@ -390,8 +369,8 @@ public class IndexControlRWIs_p
final Iterator<ReferenceContainer<WordReference>> containerIt =
segment.termIndex().referenceContainer(keyhash, true, false, 256, false).iterator();
ReferenceContainer<WordReference> container;
i = 0;
int rows = 0, cols = 0;
int i = 0, rows = 0, cols = 0;
prop.put("keyhashsimilar", "1");
while ( containerIt.hasNext() && i < 256 ) {
container = containerIt.next();

@ -67,14 +67,6 @@ function updatepage(str) {
<form action="IndexControlURLs_p.html" id="searchform" method="post" enctype="multipart/form-data" accept-charset="UTF-8" onkeyup="xmlhttpPost(); return false;">
<fieldset><legend>URL Retrieval</legend>
<dl>
<dt class="TableCellDark">Select Segment:</dt>
<dd>
<select name="selectSegment" size="1">
#{segments}#
<option value="#[name]#" #(selected)#::selected="selected"#(/selected)#>#[name]#</option>
#{/segments}#
</select>
</dd>
<dt class="TableCellDark">Retrieve by URL:</dt>
<dd><input type="text" name="urlstring" value="#[urlstring]#" size="40" maxlength="250" />

@ -41,7 +41,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.RotateIterator;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.MetadataRepository;
import net.yacy.search.index.Segment;
import de.anomic.server.serverObjects;
@ -55,19 +54,12 @@ public class IndexControlURLs_p {
final serverObjects prop = new serverObjects();
Segment segment = sb.index;
// set default values
prop.put("urlstring", "");
prop.put("urlhash", "");
prop.put("result", "");
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
int i = 0;
for (final String s: sb.indexSegments.segmentNames()) {
prop.put("segments_" + i + "_name", s);
prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0);
i++;
}
Segment segment = sb.indexSegments.segment(segmentName);
prop.put("segments", i);
prop.putNum("ucount", segment.urlMetadata().size());
prop.put("otherHosts", "");
prop.put("genUrlProfile", 0);
@ -76,20 +68,6 @@ public class IndexControlURLs_p {
prop.put("statisticslines", 0);
prop.put("reload", 0);
// do segment selection
if (post != null && post.containsKey("segment")) {
// default values
segmentName = post.get("segment", segmentName).trim();
i= 0;
for (final String s: sb.indexSegments.segmentNames()) {
prop.put("segments_" + i + "_name", s);
prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0);
i++;
}
prop.put("segments", i);
segment = sb.indexSegments.segment(segmentName);
}
// show export messages
final MetadataRepository.Export export = segment.urlMetadata().export();
if ((export != null) && (export.isAlive())) {
@ -147,7 +125,7 @@ public class IndexControlURLs_p {
prop.put("result", " ");
if (post.containsKey("urlhashdeleteall")) {
i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST);
int i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST);
prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes.");
prop.put("lurlexport", 0);
prop.put("reload", 0);
@ -224,8 +202,7 @@ public class IndexControlURLs_p {
final Iterator<URIMetadataRow> entryIt = new RotateIterator<URIMetadataRow>(segment.urlMetadata().entries(true, urlhash), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), segment.termIndex().sizesMax());
final StringBuilder result = new StringBuilder("Sequential List of URL-Hashes:<br />");
URIMetadataRow entry;
i = 0;
int rows = 0, cols = 0;
int i = 0, rows = 0, cols = 0;
prop.put("urlhashsimilar", "1");
while (entryIt.hasNext() && i < 256) {
entry = entryIt.next();

@ -37,7 +37,6 @@ import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.storage.ConfigurationSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import net.yacy.search.index.SolrField;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -86,8 +85,8 @@ public class IndexFederated_p {
if (solrWasOn) {
// switch off
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().close();
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null);
sb.index.getRemoteSolr().close();
sb.index.connectRemoteSolr(null);
}
if (solrIsOnAfterwards) {
@ -97,13 +96,13 @@ public class IndexFederated_p {
if (usesolr) {
SolrConnector solr = new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true);
solr.setCommitWithinMs(commitWithinMs);
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(solr);
sb.index.connectRemoteSolr(solr);
} else {
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null);
sb.index.connectRemoteSolr(null);
}
} catch (final IOException e) {
Log.logException(e);
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null);
sb.index.connectRemoteSolr(null);
}
}
@ -138,11 +137,11 @@ public class IndexFederated_p {
}
// show solr host table
if (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null) {
if (sb.index.getRemoteSolr() == null) {
prop.put("table", 0);
} else {
prop.put("table", 1);
final SolrConnector solr = sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr();
final SolrConnector solr = sb.index.getRemoteSolr();
final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((SingleSolrConnector) solr).getSize()};
final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SingleSolrConnector) solr).getAdminInterface()};
boolean dark = false;

@ -1,4 +1,4 @@
// IndexShare_p.java
// IndexShare_p.java
// -----------------------
// part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@yacy.net
@ -32,7 +32,6 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -44,17 +43,8 @@ public class IndexShare_p {
final serverObjects prop = new serverObjects();
// get segment
Segment indexSegment = null;
if (post != null && post.containsKey("segment")) {
String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
Segment indexSegment = sb.index;
if (post == null) {
prop.put("linkfreq", sb.getConfigLong("defaultLinkReceiveFrequency",30));
prop.put("wordfreq", sb.getConfigLong("defaultWordReceiveFrequency",10));
@ -64,7 +54,7 @@ public class IndexShare_p {
prop.putNum("ucount", indexSegment.urlMetadata().size());
return prop; // be save
}
if (post.containsKey("indexsharesetting")) {
sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW, post.containsKey("distribute"));
sb.setConfig("allowReceiveIndex", post.containsKey("receive"));
@ -75,7 +65,7 @@ public class IndexShare_p {
// insert constants
prop.putNum("wcount", indexSegment.termIndex().sizesMax());
prop.putNum("ucount", indexSegment.urlMetadata().size());
// return rewrite properties
return prop;
}

@ -42,7 +42,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.RSSLoader;
import de.anomic.crawler.retrieval.Response;
import de.anomic.data.WorkTables;
@ -191,7 +190,7 @@ public class Load_RSS_p {
messageurl = row.get("url", "");
if (messageurl.length() == 0) continue;
// get referrer
final DigestURI referrer = sb.getURL(Segments.Process.LOCALCRAWLING, row.get("referrer", "").getBytes());
final DigestURI referrer = sb.getURL(row.get("referrer", "").getBytes());
// check if feed is registered in scheduler
final byte[] api_pk = row.get("api_pk");
final Row r = api_pk == null ? null : sb.tables.select("api", api_pk);
@ -271,7 +270,7 @@ public class Load_RSS_p {
final RSSMessage message = feed.getMessage(entry.getValue().substring(5));
final DigestURI messageurl = new DigestURI(message.getLink());
if (RSSLoader.indexTriggered.containsKey(messageurl.hash())) continue loop;
if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop;
if (sb.urlExists(messageurl.hash()) != null) continue loop;
sb.addToIndex(messageurl, null, null);
RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
} catch (final IOException e) {
@ -316,7 +315,7 @@ public class Load_RSS_p {
author = item.getAuthor();
if (author == null) author = item.getCopyright();
pubDate = item.getPubDate();
prop.put("showitems_item_" + i + "_state", sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0);
prop.put("showitems_item_" + i + "_state", sb.urlExists(messageurl.hash()) != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0);
prop.put("showitems_item_" + i + "_state_count", i);
prop.putHTML("showitems_item_" + i + "_state_guid", item.getGuid());
prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author);

@ -42,7 +42,7 @@ public class PerformanceGraph {
final int height = post.getInt("height", 240);
final boolean showMemory = !post.containsKey("nomem");
return ProfilingGraph.performanceGraph(width, height, sb.indexSegments.URLCount() + " URLS / " + sb.indexSegments.RWICount() + " WORDS IN INDEX / " + sb.indexSegments.RWIBufferCount() + " WORDS IN CACHE", showMemory);
return ProfilingGraph.performanceGraph(width, height, sb.index.URLCount() + " URLS / " + sb.index.RWICount() + " WORDS IN INDEX / " + sb.index.RWIBufferCount() + " WORDS IN CACHE", showMemory);
}
}

@ -40,7 +40,6 @@ import net.yacy.kelondro.workflow.WorkflowThread;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -63,16 +62,7 @@ public class PerformanceQueues_p {
File defaultSettingsFile = new File(sb.getAppPath(), "defaults/yacy.init");
// get segment
Segment indexSegment = null;
if (post != null && post.containsKey("segment")) {
String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
Segment indexSegment = sb.index;
if(post != null) {
if(post.containsKey("defaultFile")){

@ -41,7 +41,6 @@ import net.yacy.cora.util.NumberTools;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Request;
import de.anomic.server.serverObjects;
@ -63,16 +62,7 @@ public class QuickCrawlLink_p {
final Switchboard sb = (Switchboard) env;
// get segment
Segment indexSegment = null;
if (post != null && post.containsKey("segment")) {
final String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
Segment indexSegment = sb.index;
if (post == null) {
// send back usage example

@ -54,7 +54,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import com.hp.hpl.jena.rdf.model.Model;
@ -93,13 +92,8 @@ public class ViewFile {
}
// get segment
Segment indexSegment = null;
Segment indexSegment = sb.index;
final boolean authorized = sb.verifyAuthentication(header);
if (post != null && post.containsKey("segment") && authorized) {
indexSegment = sb.indexSegments.segment(post.get("segment"));
} else {
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
if (post.containsKey("words"))
prop.putHTML("error_words", post.get("words"));

@ -38,7 +38,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -70,8 +69,7 @@ public class Vocabulary_p {
boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted");
boolean discoverFromAuthor = post.get("discovermethod", "").equals("author");
if (discoveruri != null) {
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
Segment segment = sb.indexSegments.segment(segmentName);
Segment segment = sb.index;
Iterator<DigestURI> ui = segment.urlSelector(discoveruri);
String t;
while (ui.hasNext()) {

@ -9,7 +9,6 @@ import net.yacy.kelondro.rwi.ReferenceContainerCache;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.peers.graphics.WebStructureGraph.HostReference;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.MetadataRepository;
import net.yacy.search.index.MetadataRepository.HostStat;
import net.yacy.search.index.Segment;
@ -42,8 +41,7 @@ public class YBRFetch_p
}
// use an index segment to find hosts for given host hashes
final String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
final Segment segment = sb.indexSegments.segment(segmentName);
final Segment segment = sb.index;
final MetadataRepository metadata = segment.urlMetadata();
Map<String, HostStat> hostHashResolver;
try {

@ -32,7 +32,6 @@ import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -45,13 +44,9 @@ public class status_p {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
Segment segment = null;
final boolean html = post != null && post.containsKey("html");
prop.setLocalized(html);
if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) {
segment = sb.indexSegments.segment(post.get("segment"));
}
if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
Segment segment = sb.index;
prop.put("rejected", "0");
sb.updateMySeed();

@ -32,7 +32,6 @@ import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -43,13 +42,9 @@ public class termlist_p {
final Log log = new Log("TERMLIST");
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
Segment segment = null;
Segment segment = sb.index;
final boolean delete = post != null && post.containsKey("delete");
final long mincount = post == null ? 10000 : post.getLong("mincount", 10000);
if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) {
segment = sb.indexSegments.segment(post.get("segment"));
}
if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
final Iterator<Rating<byte[]>> i = segment.termIndex().referenceCountIterator(null, false, false);
Rating<byte[]> e;
int c = 0, termnumber = 0;

@ -41,7 +41,6 @@ import net.yacy.kelondro.util.ISO639;
import net.yacy.peers.Network;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.QueryParams;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -56,12 +55,7 @@ public final class timeline {
if ((post == null) || (env == null)) return prop;
final boolean authenticated = sb.adminAuthenticated(header) >= 2;
Segment segment = null;
if (post.containsKey("segment") && authenticated) {
segment = sb.indexSegments.segment(post.get("segment"));
} else {
segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
Segment segment = sb.index;
final String querystring = post.get("query", ""); // a string of word hashes that shall be searched and combined
final int count = Math.min((authenticated) ? 1000 : 10, post.getInt("maximumRecords", 1000)); // SRU syntax

@ -41,7 +41,6 @@ import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -65,7 +64,7 @@ public class webstructure {
} else if (about.length() == 12 && Base64Order.enhancedCoder.wellformed(ASCII.getBytes(about))) {
urlhash = ASCII.getBytes(about);
hosthash = about.substring(6);
url = authenticated ? sb.getURL(Segments.Process.PUBLIC, urlhash) : null;
url = authenticated ? sb.getURL(urlhash) : null;
} else if (authenticated && about.length() > 0) {
// consider "about" as url or hostname
try {
@ -138,7 +137,7 @@ public class webstructure {
// citations
prop.put("citations", 1);
IndexCell<CitationReference> citationReferences = sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation();
IndexCell<CitationReference> citationReferences = sb.index.urlCitation();
ReferenceContainer<CitationReference> citations = null;
// citationReferences.count(urlhash) would give to the number of references good for ranking
try {
@ -158,7 +157,7 @@ public class webstructure {
while (i.hasNext()) {
CitationReference cr = i.next();
byte[] refhash = cr.urlhash();
DigestURI refurl = authenticated ? sb.getURL(Segments.Process.PUBLIC, refhash) : null;
DigestURI refurl = authenticated ? sb.getURL(refhash) : null;
prop.put("citations_documents_0_anchors_" + d + "_urle", refurl == null ? 0 : 1);
if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true, false));
prop.put("citations_documents_0_anchors_" + d + "_urle_hash", refhash);

@ -40,7 +40,6 @@ import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
@ -55,15 +54,9 @@ public class yacydoc {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
final Segment segment;
final Segment segment = sb.index;
final boolean html = post != null && post.containsKey("html");
prop.setLocalized(html);
final boolean authorized = sb.verifyAuthentication(header);
if (post != null && post.containsKey("segment") && authorized) {
segment = sb.indexSegments.segment(post.get("segment"));
} else {
segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
prop.put("dc_title", "");
prop.put("dc_creator", "");
@ -131,7 +124,7 @@ public class yacydoc {
prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true));
prop.put("yacy_size", entry.size());
prop.put("yacy_words", entry.wordCount());
prop.put("yacy_citations", sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation().count(entry.hash()));
prop.put("yacy_citations", sb.index.urlCitation().count(entry.hash()));
prop.put("yacy_inbound", entry.llocal());
prop.put("yacy_outbound", entry.lother());
@ -140,18 +133,18 @@ public class yacydoc {
String rdf = JenaTripleStore.getRDFByModel(model);
prop.putXML("triples", rdf);
prop.put("rdf", header.fileType() == FileType.XML ? rdf : "");
String references = "";
Iterator<RDFNode> t = JenaTripleStore.getObjects("http://yacy.net/url#"+urlhash, "http://purl.org/dc/terms/references");
while (t.hasNext()) {
RDFNode r = t.next();
references += r.toString()+",";
}
Log.logInfo ("TRIPLESTORE", references);
prop.put("taglinks", references);
// return rewrite properties

@ -6,7 +6,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkTables;
@ -35,7 +34,7 @@ public class add_ymark {
if(post.containsKey("urlHash")) {
final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING);
final DigestURI url = sb.indexSegments.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash.getBytes()).url();
final DigestURI url = sb.index.urlMetadata().load(urlHash.getBytes()).url();
final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt());
final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING);
try {
@ -66,7 +65,7 @@ public class add_ymark {
}
final YMarkEntry bmk = new YMarkEntry();
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), post.get(YMarkEntry.BOOKMARK.TITLE.key(),YMarkEntry.BOOKMARK.TITLE.deflt()));
bmk.put(YMarkEntry.BOOKMARK.DESC.key(), post.get(YMarkEntry.BOOKMARK.DESC.key(),YMarkEntry.BOOKMARK.DESC.deflt()));

@ -47,7 +47,7 @@ public class get_metadata {
}
try {
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(url), sb.indexSegments);
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(url), sb.index);
final Document document = meta.loadDocument(sb.loader);
final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();

@ -215,7 +215,7 @@ public class get_treeview {
}
} else if (isAutoTagger || isMetadata || isURLdb || isCrawlStart) {
try {
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(post.get(ROOT).substring(2)), sb.indexSegments);
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(post.get(ROOT).substring(2)), sb.index);
final Document document = meta.loadDocument(sb.loader);
final TreeMap<String, YMarkTag> tags = sb.tables.bookmarks.getTags(bmk_user);
if(isAutoTagger) {

@ -30,7 +30,6 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.kelondro.data.word.Word;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.data.DidYouMean;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -69,19 +68,7 @@ public class suggest {
final int count = (post == null) ? 20 : post.getInt("count", 20);
// get segment
final Segment indexSegment;
if (post != null && post.containsKey("segment")) {
final String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
final Segment indexSegment = sb.index;
int c = 0;
if (more ||

@ -37,7 +37,6 @@ import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.ResultURLs.EventOrigin;
import de.anomic.crawler.ZURL.FailCategory;
@ -150,7 +149,7 @@ public final class crawlReceipt {
if ("fill".equals(result)) try {
// put new entry into database
sb.indexSegments.urlMetadata(Segments.Process.RECEIPTS).store(entry);
sb.index.urlMetadata().store(entry);
ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true));

@ -33,10 +33,9 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Protocol;
import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -91,7 +90,7 @@ public final class query {
if (obj.equals("rwiurlcount")) try {
// the total number of different urls in the rwi is returned
// <env> shall contain a word hash, the number of assigned lurls to this hash is returned
prop.put("response", sb.indexSegments.termIndex(Segments.Process.PUBLIC).get(env.getBytes(), null).size());
prop.put("response", sb.index.termIndex().get(env.getBytes(), null).size());
return prop;
} catch (final IOException e) {
Log.logException(e);
@ -99,13 +98,13 @@ public final class query {
if (obj.equals("rwicount")) {
// return the total number of available word indexes
prop.put("response", sb.indexSegments.termIndex(Segments.Process.PUBLIC).sizesMax());
prop.put("response", sb.index.termIndex().sizesMax());
return prop;
}
if (obj.equals("lurlcount")) {
// return the number of all available l-url's
prop.put("response", sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).size());
prop.put("response", sb.index.urlMetadata().size());
return prop;
}

@ -66,7 +66,6 @@ import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
@ -223,7 +222,7 @@ public final class search {
ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null;
if (query.length() == 0 && abstractSet != null) {
// this is _not_ a normal search, only a request for index abstracts
final Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
final Segment indexSegment = sb.index;
theQuery = new QueryParams(
null,
abstractSet,
@ -315,7 +314,7 @@ public final class search {
DigestURI.TLD_any_zone_filter,
client,
false,
sb.indexSegments.segment(Segments.Process.PUBLIC),
sb.index,
rankingProfile,
header.get(RequestHeader.USER_AGENT, ""),
false, 0.0d, 0.0d, 0.0d

@ -49,7 +49,6 @@ import net.yacy.peers.dht.FlatWordPartitionScheme;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -116,9 +115,9 @@ public final class transferRWI {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. This peer is in robinson mode");
result = "not_granted";
pause = 60000;
} else if (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() > cachelimit) {
} else if (sb.index.termIndex().getBufferSize() > cachelimit) {
// we are too busy to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() + ").");
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.index.termIndex().getBufferSize() + ").");
granted = false; // don't accept more words if there are too many words to flush
result = "busy";
pause = 60000;
@ -152,7 +151,7 @@ public final class transferRWI {
int received = 0;
int blocked = 0;
int receivedURL = 0;
final IndexCell<WordReference> cell = sb.indexSegments.termIndex(Segments.Process.DHTIN);
final IndexCell<WordReference> cell = sb.index.termIndex();
int count = 0;
while (it.hasNext()) {
serverCore.checkInterruption();
@ -197,7 +196,7 @@ public final class transferRWI {
// check if we need to ask for the corresponding URL
if (!(knownURL.has(urlHash) || unknownURL.has(urlHash))) try {
if (sb.indexSegments.urlMetadata(Segments.Process.DHTIN).exists(urlHash)) {
if (sb.index.urlMetadata().exists(urlHash)) {
knownURL.put(urlHash);
} else {
unknownURL.put(urlHash);
@ -230,7 +229,7 @@ public final class transferRWI {
}
result = "ok";
pause = (int) (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time
pause = (int) (sb.index.termIndex().getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time
}
prop.put("unknownURL", unknownURLs.toString());

@ -40,7 +40,6 @@ import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.ResultURLs.EventOrigin;
import de.anomic.server.serverCore;
@ -84,7 +83,7 @@ public final class transferURL {
} else {
int received = 0;
int blocked = 0;
final int sizeBefore = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size();
final int sizeBefore = sb.index.urlMetadata().size();
// read the urls from the other properties and store
String urls;
URIMetadataRow lEntry;
@ -141,7 +140,7 @@ public final class transferURL {
// write entry to database
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false));
try {
sb.indexSegments.urlMetadata(Segments.Process.DHTIN).store(lEntry);
sb.index.urlMetadata().store(lEntry);
ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
received++;
@ -153,7 +152,7 @@ public final class transferURL {
sb.peers.mySeed().incRU(received);
// return rewrite properties
final int more = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size() - sizeBefore;
final int more = sb.index.urlMetadata().size() - sizeBefore;
doublevalues = Integer.toString(received - more);
Network.log.logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, blocked " + blocked + " URLs");
EventChannel.channels(EventChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + ", blocked " + blocked + " URLs from peer " + otherPeerName, "", otherPeer.hash));

@ -34,7 +34,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.peers.Protocol;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.NoticedURL;
import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.crawler.retrieval.Request;
@ -78,7 +77,7 @@ public class urls {
if (entry == null) break;
// find referrer, if there is one
referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerhash());
referrer = sb.getURL(entry.referrerhash());
// place url to notice-url db
sb.crawlQueues.delegatedURL.push(
@ -114,10 +113,10 @@ public class urls {
URIMetadataRow entry;
DigestURI referrer;
for (int i = 0; i < count; i++) {
entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
entry = sb.index.urlMetadata().load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
if (entry == null) continue;
// find referrer, if there is one
referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerHash());
referrer = sb.getURL(entry.referrerHash());
// create RSS entry
prop.put("item_" + c + "_title", entry.dc_title());
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false));

@ -57,7 +57,7 @@ public class yacyinteractive {
prop.putHTML("querys", query.replaceAll(" ", "+"));
prop.put("serverlist", query.isEmpty() ? 1 : 0);
prop.put("focus", focus ? 1 : 0);
prop.put("allowrealtime", sb.indexSegments.URLCount() < 100000 ? 1 : 0);
prop.put("allowrealtime", sb.index.URLCount() < 100000 ? 1 : 0);
return prop;
}
}

@ -73,7 +73,6 @@ import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
@ -133,16 +132,7 @@ public class yacysearch {
prop.put("sidebarVocabulary", j);
// get segment
Segment indexSegment = null;
if ( post != null && post.containsKey("segment") ) {
final String segmentName = post.get("segment");
if ( sb.indexSegments.segmentExist(segmentName) ) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
Segment indexSegment = sb.index;
final String EXT = header.get("EXT", "");
final boolean rss = EXT.equals("rss");

@ -52,7 +52,6 @@ import net.yacy.peers.dht.PeerSelection;
import net.yacy.search.Switchboard;
import net.yacy.search.Switchboard.indexingQueueEntry;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
import de.anomic.crawler.NoticedURL.StackType;
import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.crawler.retrieval.Request;
@ -62,7 +61,6 @@ public class CrawlQueues {
private static final String ERROR_DB_FILENAME = "urlError4.db";
private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";
private static final Segments.Process PROCESS = Segments.Process.LOCALCRAWLING;
protected Switchboard sb;
protected Log log;
@ -82,8 +80,8 @@ public class CrawlQueues {
this.log.logConfig("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
this.errorURL = new ZURL(sb.index.getRemoteSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.index.getRemoteSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
}
public void relocate(final File newQueuePath) {
@ -94,8 +92,8 @@ public class CrawlQueues {
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
this.errorURL = new ZURL(this.sb.index.getRemoteSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.index.getRemoteSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
}
public synchronized void close() {
@ -276,7 +274,7 @@ public class CrawlQueues {
return true;
}
try {
this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(PROCESS, new Response(urlEntry, profile), null, null));
this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(new Response(urlEntry, profile), null, null));
Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false));
} catch (final InterruptedException e) {
Log.logException(e);

@ -42,7 +42,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.retrieval.Response;
import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects;
@ -59,6 +58,7 @@ public class RSSLoader extends Thread {
this.urlf = urlf;
}
@Override
public void run() {
RSSReader rss = null;
try {
@ -89,7 +89,7 @@ public class RSSLoader extends Thread {
try {
final DigestURI messageurl = new DigestURI(message.getLink());
if (indexTriggered.containsKey(messageurl.hash())) continue loop;
if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop;
if (sb.urlExists(messageurl.hash()) != null) continue loop;
sb.addToIndex(messageurl, null, null);
indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
loadCount++;

@ -1,4 +1,4 @@
//SitemapImporter.java
//SitemapImporter.java
//------------------------
//part of YaCy
//(C) by Michael Peter Christen; mc@yacy.net
@ -34,7 +34,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.retrieval.Request;
public class SitemapImporter extends Thread {
@ -43,7 +42,7 @@ public class SitemapImporter extends Thread {
private static final Log logger = new Log("SITEMAP");
private DigestURI siteMapURL = null;
private final Switchboard sb;
public SitemapImporter(final Switchboard sb, final DigestURI sitemapURL, final CrawlProfile profileEntry) {
assert sitemapURL != null;
this.sb = sb;
@ -52,6 +51,7 @@ public class SitemapImporter extends Thread {
this.crawlingProfile = profileEntry;
}
@Override
public void run() {
try {
logger.logInfo("Start parsing sitemap file " + this.siteMapURL);
@ -76,10 +76,10 @@ public class SitemapImporter extends Thread {
// check if the url is known and needs to be recrawled
Date lastMod = entry.lastmod(null);
if (lastMod != null) {
final String dbocc = this.sb.urlExists(Segments.Process.LOCALCRAWLING, nexturlhash);
final String dbocc = this.sb.urlExists(nexturlhash);
if ((dbocc != null) && (dbocc.equalsIgnoreCase("loaded"))) {
// the url was already loaded. we need to check the date
final URIMetadataRow oldEntry = this.sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(nexturlhash);
final URIMetadataRow oldEntry = this.sb.index.urlMetadata().load(nexturlhash);
if (oldEntry != null) {
final Date modDate = oldEntry.moddate();
// check if modDate is null

@ -41,7 +41,6 @@ import net.yacy.document.TextParser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.Latency;
import de.anomic.crawler.ZURL.FailCategory;
@ -115,7 +114,7 @@ public class FTPLoader {
// directory -> get list of files
final RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) {
final DigestURI u = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
final DigestURI u = this.sb.getURL(request.referrerhash());
if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true, false));
}
@ -222,7 +221,7 @@ public class FTPLoader {
// create response header
final RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) {
final DigestURI refurl = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
final DigestURI refurl = this.sb.getURL(request.referrerhash());
if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true, false));
}
final ResponseHeader responseHeader = new ResponseHeader(200);

@ -40,7 +40,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile;
public class FileLoader {
@ -61,7 +60,7 @@ public class FileLoader {
RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) {
DigestURI ur = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
DigestURI ur = this.sb.getURL(request.referrerhash());
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
}

@ -39,7 +39,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.Latency;
import de.anomic.crawler.ZURL.FailCategory;
@ -118,7 +117,7 @@ public final class HTTPLoader {
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT));
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
@ -168,7 +167,7 @@ public final class HTTPLoader {
}
// check if the url was already indexed
final String dbname = this.sb.urlExists(Segments.Process.LOCALCRAWLING, redirectionUrl.hash());
final String dbname = this.sb.urlExists(redirectionUrl.hash());
if (dbname != null) { // customer request
this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in db " + dbname);

@ -49,7 +49,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile;
public class SMBLoader {
@ -73,7 +72,7 @@ public class SMBLoader {
RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) {
DigestURI ur = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
DigestURI ur = this.sb.getURL(request.referrerhash());
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
}

@ -38,13 +38,13 @@ import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.index.Segments;
import net.yacy.search.index.Segment;
import de.anomic.crawler.retrieval.Response;
public class YMarkMetadata {
private DigestURI uri;
Document document;
Segments indexSegment;
Segment indexSegment;
public enum METADATA {
TITLE,
@ -72,16 +72,16 @@ public class YMarkMetadata {
this.indexSegment = null;
}
public YMarkMetadata(final DigestURI uri, final Segments indexSegment) {
public YMarkMetadata(final DigestURI uri, final Segment indexSegment) {
this.uri = uri;
this.document = null;
this.indexSegment = indexSegment;
}
public YMarkMetadata(final byte[] urlHash, final Segments indexSegment) {
public YMarkMetadata(final byte[] urlHash, final Segment indexSegment) {
this.document = null;
this.indexSegment = indexSegment;
this.uri = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash).url();
this.uri = this.indexSegment.urlMetadata().load(urlHash).url();
}
public YMarkMetadata(final Document document) {
@ -101,11 +101,11 @@ public class YMarkMetadata {
this.document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
}
return this.document;
}
}
public EnumMap<METADATA, String> getMetadata() {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
final URIMetadataRow urlEntry = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(this.uri.hash());
final URIMetadataRow urlEntry = this.indexSegment.urlMetadata().load(this.uri.hash());
if (urlEntry != null) {
metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size()));
metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate()));

@ -54,7 +54,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.Cache;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.ZURL.FailCategory;
@ -211,7 +210,7 @@ public final class LoaderDispatcher {
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
final Response response = new Response(
request,

@ -144,7 +144,6 @@ import net.yacy.repository.Blacklist;
import net.yacy.repository.FilterEngine;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.index.SolrConfiguration;
import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryParams;
@ -218,7 +217,7 @@ public final class Switchboard extends serverSwitch
public File queuesRoot;
public File surrogatesInPath;
public File surrogatesOutPath;
public Segments indexSegments;
public Segment index;
public LoaderDispatcher loader;
public CrawlSwitchboard crawler;
public CrawlQueues crawlQueues;
@ -379,16 +378,14 @@ public final class Switchboard extends serverSwitch
// initialize index
ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
this.indexSegments =
new Segments(
this.index =
new Segment(
this.log,
segmentsPath,
new File(segmentsPath, "default"),
wordCacheMaxCount,
fileSizeMax,
this.useTailCache,
this.exceed134217727);
// set the default segment names
setDefaultSegments();
// prepare a solr index profile switch list
final File solrBackupProfile = new File("defaults/solr.keys.list");
@ -418,7 +415,7 @@ public final class Switchboard extends serverSwitch
ShardSelection.Method.MODULO_HOST_MD5,
10000, true);
solr.setCommitWithinMs(commitWithinMs);
this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(solr);
this.index.connectRemoteSolr(solr);
} catch ( final IOException e ) {
Log.logException(e);
}
@ -466,7 +463,7 @@ public final class Switchboard extends serverSwitch
// init a DHT transmission dispatcher
this.dhtDispatcher =
(this.peers.sizeConnected() == 0) ? null : new Dispatcher(
this.indexSegments.segment(Segments.Process.LOCALCRAWLING),
this.index,
this.peers,
true,
10000);
@ -767,7 +764,7 @@ public final class Switchboard extends serverSwitch
new CrawlStacker(
this.crawlQueues,
this.crawler,
this.indexSegments.segment(Segments.Process.LOCALCRAWLING),
this.index,
this.peers,
isIntranetMode(),
isGlobalMode(),
@ -994,33 +991,6 @@ public final class Switchboard extends serverSwitch
sb = this;
}
private void setDefaultSegments() {
this.indexSegments.setSegment(
Segments.Process.RECEIPTS,
getConfig(SwitchboardConstants.SEGMENT_RECEIPTS, "default"));
this.indexSegments.setSegment(
Segments.Process.QUERIES,
getConfig(SwitchboardConstants.SEGMENT_QUERIES, "default"));
this.indexSegments.setSegment(
Segments.Process.DHTIN,
getConfig(SwitchboardConstants.SEGMENT_DHTIN, "default"));
this.indexSegments.setSegment(
Segments.Process.DHTOUT,
getConfig(SwitchboardConstants.SEGMENT_DHTOUT, "default"));
this.indexSegments.setSegment(
Segments.Process.PROXY,
getConfig(SwitchboardConstants.SEGMENT_PROXY, "default"));
this.indexSegments.setSegment(
Segments.Process.LOCALCRAWLING,
getConfig(SwitchboardConstants.SEGMENT_LOCALCRAWLING, "default"));
this.indexSegments.setSegment(
Segments.Process.REMOTECRAWLING,
getConfig(SwitchboardConstants.SEGMENT_REMOTECRAWLING, "default"));
this.indexSegments.setSegment(
Segments.Process.PUBLIC,
getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"));
}
public int getIndexingProcessorsQueueSize() {
return this.indexingDocumentProcessor.queueSize()
+ this.indexingCondensementProcessor.queueSize()
@ -1170,8 +1140,8 @@ public final class Switchboard extends serverSwitch
if ( this.dhtDispatcher != null ) {
this.dhtDispatcher.close();
}
synchronized ( this.indexSegments ) {
this.indexSegments.close();
synchronized ( this.index ) {
this.index.close();
}
this.crawlStacker.announceClose();
this.crawlStacker.close();
@ -1211,16 +1181,14 @@ public final class Switchboard extends serverSwitch
partitionExponent,
this.useTailCache,
this.exceed134217727);
this.indexSegments =
new Segments(
this.index =
new Segment(
this.log,
new File(new File(indexPrimaryPath, networkName), "SEGMENTS"),
new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"),
wordCacheMaxCount,
fileSizeMax,
this.useTailCache,
this.exceed134217727);
// set the default segment names
setDefaultSegments();
this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object
// create a crawler
@ -1229,7 +1197,7 @@ public final class Switchboard extends serverSwitch
// init a DHT transmission dispatcher
this.dhtDispatcher =
(this.peers.sizeConnected() == 0) ? null : new Dispatcher(
this.indexSegments.segment(Segments.Process.LOCALCRAWLING),
this.index,
this.peers,
true,
10000);
@ -1257,7 +1225,7 @@ public final class Switchboard extends serverSwitch
new CrawlStacker(
this.crawlQueues,
this.crawler,
this.indexSegments.segment(Segments.Process.LOCALCRAWLING),
this.index,
this.peers,
"local.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0,
"global.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0,
@ -1449,11 +1417,11 @@ public final class Switchboard extends serverSwitch
}
}
public String urlExists(final Segments.Process process, final byte[] hash) {
public String urlExists(final byte[] hash) {
// tests if hash occurrs in any database
// if it exists, the name of the database is returned,
// if it not exists, null is returned
if ( this.indexSegments.urlMetadata(process).exists(hash) ) {
if ( this.index.urlMetadata().exists(hash) ) {
return "loaded";
}
return this.crawlQueues.urlExists(hash);
@ -1465,14 +1433,14 @@ public final class Switchboard extends serverSwitch
this.crawlQueues.urlRemove(hash);
}
public DigestURI getURL(final Segments.Process process, final byte[] urlhash) {
public DigestURI getURL(final byte[] urlhash) {
if ( urlhash == null ) {
return null;
}
if ( urlhash.length == 0 ) {
return null;
}
final URIMetadataRow le = this.indexSegments.urlMetadata(process).load(urlhash);
final URIMetadataRow le = this.index.urlMetadata().load(urlhash);
if ( le != null ) {
return le.url();
}
@ -1606,7 +1574,7 @@ public final class Switchboard extends serverSwitch
this.crawler.close();
this.log
.logConfig("SWITCHBOARD SHUTDOWN STEP 3: sending termination signal to database manager (stand by...)");
this.indexSegments.close();
this.index.close();
this.peers.close();
Cache.close();
this.tables.close();
@ -1696,7 +1664,6 @@ public final class Switchboard extends serverSwitch
}
try {
this.indexingDocumentProcessor.enQueue(new indexingQueueEntry(
Segments.Process.LOCALCRAWLING,
response,
null,
null));
@ -1810,9 +1777,7 @@ public final class Switchboard extends serverSwitch
0);
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false);
final indexingQueueEntry queueEntry =
new indexingQueueEntry(Segments.Process.SURROGATES, response, new Document[] {
document
}, null);
new indexingQueueEntry(response, new Document[] {document}, null);
// place the queue entry into the concurrent process of the condenser (document analysis)
try {
@ -1887,18 +1852,15 @@ public final class Switchboard extends serverSwitch
public static class indexingQueueEntry extends WorkflowJob
{
public Segments.Process process;
public Response queueEntry;
public Document[] documents;
public Condenser[] condenser;
public indexingQueueEntry(
final Segments.Process process,
final Response queueEntry,
final Document[] documents,
final Condenser[] condenser) {
super();
this.process = process;
this.queueEntry = queueEntry;
this.documents = documents;
this.condenser = condenser;
@ -1929,9 +1891,7 @@ public final class Switchboard extends serverSwitch
// clear caches if necessary
if ( !MemoryControl.request(8000000L, false) ) {
for ( final Segment indexSegment : this.indexSegments ) {
indexSegment.urlMetadata().clearCache();
}
sb.index.urlMetadata().clearCache();
SearchEventCache.cleanupEvents(false);
this.trail.clear();
}
@ -2301,7 +2261,7 @@ public final class Switchboard extends serverSwitch
if ( documents == null ) {
return null;
}
return new indexingQueueEntry(in.process, in.queueEntry, documents, null);
return new indexingQueueEntry(in.queueEntry, documents, null);
}
private Document[] parseDocument(final Response response) throws InterruptedException {
@ -2446,11 +2406,11 @@ public final class Switchboard extends serverSwitch
+ in.queueEntry.url().toNormalform(false, true)
+ "': indexing not wanted by crawl profile");
}
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null);
return new indexingQueueEntry(in.queueEntry, in.documents, null);
}
boolean localSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr() != null && getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr");
boolean remoteSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false);
boolean localSolr = this.index.getLocalSolr() != null && getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr");
boolean remoteSolr = this.index.getRemoteSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false);
if (localSolr || remoteSolr) {
// send the documents to solr
for ( final Document doc : in.documents ) {
@ -2470,8 +2430,8 @@ public final class Switchboard extends serverSwitch
}
try {
SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc);
if (localSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr().add(solrDoc);
if (remoteSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().add(solrDoc);
if (localSolr) this.index.getLocalSolr().add(solrDoc);
if (remoteSolr) this.index.getRemoteSolr().add(solrDoc);
} catch ( final IOException e ) {
Log.logWarning(
"SOLR",
@ -2494,7 +2454,7 @@ public final class Switchboard extends serverSwitch
+ in.queueEntry.url().toNormalform(false, true)
+ "': indexing not wanted by federated rule for YaCy");
}
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null);
return new indexingQueueEntry(in.queueEntry, in.documents, null);
}
final List<Document> doclist = new ArrayList<Document>();
@ -2519,7 +2479,7 @@ public final class Switchboard extends serverSwitch
}
if ( doclist.isEmpty() ) {
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null);
return new indexingQueueEntry(in.queueEntry, in.documents, null);
}
in.documents = doclist.toArray(new Document[doclist.size()]);
final Condenser[] condenser = new Condenser[in.documents.length];
@ -2540,7 +2500,7 @@ public final class Switchboard extends serverSwitch
? true
: !profile.remoteIndexing());
}
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, condenser);
return new indexingQueueEntry(in.queueEntry, in.documents, condenser);
}
public indexingQueueEntry webStructureAnalysis(final indexingQueueEntry in) {
@ -2565,7 +2525,6 @@ public final class Switchboard extends serverSwitch
if ( in.condenser != null ) {
for ( int i = 0; i < in.documents.length; i++ ) {
storeDocumentIndex(
in.process,
in.queueEntry,
in.documents[i],
in.condenser[i],
@ -2577,7 +2536,6 @@ public final class Switchboard extends serverSwitch
}
private void storeDocumentIndex(
final Segments.Process process,
final Response queueEntry,
final Document document,
final Condenser condenser,
@ -2591,9 +2549,6 @@ public final class Switchboard extends serverSwitch
final DigestURI url = new DigestURI(document.dc_source());
final DigestURI referrerURL = queueEntry.referrerURL();
EventOrigin processCase = queueEntry.processCase(this.peers.mySeed().hash);
if ( process == Segments.Process.SURROGATES ) {
processCase = EventOrigin.SURROGATES;
}
if ( condenser == null || document.indexingDenied() ) {
//if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase);
@ -2629,7 +2584,7 @@ public final class Switchboard extends serverSwitch
URIMetadataRow newEntry = null;
try {
newEntry =
this.indexSegments.segment(process).storeDocument(
this.index.storeDocument(
url,
referrerURL,
queueEntry.lastModified(),
@ -2763,11 +2718,10 @@ public final class Switchboard extends serverSwitch
public void addToIndex(final DigestURI url, final SearchEvent searchEvent, final String heuristicName)
throws IOException,
Parser.Failure {
final Segments.Process process = Segments.Process.LOCALCRAWLING;
if ( searchEvent != null ) {
searchEvent.addHeuristic(url.hash(), heuristicName, true);
}
if ( this.indexSegments.segment(process).exists(url.hash()) ) {
if ( this.index.exists(url.hash()) ) {
return; // don't do double-work
}
final Request request = this.loader.request(url, true, true);
@ -2806,7 +2760,6 @@ public final class Switchboard extends serverSwitch
ResultImages.registerImages(url, document, true);
Switchboard.this.webStructure.generateCitationReference(url, document, condenser);
storeDocumentIndex(
process,
response,
document,
condenser,
@ -3023,7 +2976,7 @@ public final class Switchboard extends serverSwitch
if ( getConfig(SwitchboardConstants.INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false") ) {
return "no DHT distribution: not enabled (per setting)";
}
final Segment indexSegment = this.indexSegments.segment(segment);
final Segment indexSegment = this.index;
if ( indexSegment.urlMetadata().size() < 10 ) {
return "no DHT distribution: loadedURL.size() = " + indexSegment.urlMetadata().size();
}
@ -3299,12 +3252,12 @@ public final class Switchboard extends serverSwitch
this.peers.mySeed().put(Seed.ISPEED, Integer.toString(currentPPM()));
this.peers.mySeed().put(Seed.RSPEED, Float.toString(averageQPM()));
this.peers.mySeed().put(Seed.UPTIME, Long.toString(uptime / 60)); // the number of minutes that the peer is up in minutes/day (moving average MA30)
this.peers.mySeed().put(Seed.LCOUNT, Long.toString(this.indexSegments.URLCount())); // the number of links that the peer has stored (LURL's)
this.peers.mySeed().put(Seed.LCOUNT, Long.toString(this.index.URLCount())); // the number of links that the peer has stored (LURL's)
this.peers.mySeed().put(Seed.NCOUNT, Integer.toString(this.crawlQueues.noticeURL.size())); // the number of links that the peer has noticed, but not loaded (NURL's)
this.peers.mySeed().put(
Seed.RCOUNT,
Integer.toString(this.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.GLOBAL))); // the number of links that the peer provides for remote crawling (ZURL's)
this.peers.mySeed().put(Seed.ICOUNT, Long.toString(this.indexSegments.RWICount())); // the minimum number of words that the peer has indexed (as it says)
this.peers.mySeed().put(Seed.ICOUNT, Long.toString(this.index.RWICount())); // the minimum number of words that the peer has indexed (as it says)
this.peers.mySeed().put(Seed.SCOUNT, Integer.toString(this.peers.sizeConnected())); // the number of seeds that the peer has stored
this.peers.mySeed().put(
Seed.CCOUNT,

@ -137,24 +137,23 @@ public class Segment {
maxFileSize,
writeBufferSize);
/*
this.authorNavIndex = new IndexCell<NavigationReference>(
new File(new File(segmentPath, "nav_author"), "idx"),
navigationReferenceFactory,
wordOrder,
NavigationReferenceRow.navEntryRow,
entityCacheMaxSize,
targetFileSize,
maxFileSize,
this.merger,
writeBufferSize);
*/
// create LURL-db
this.urlMetadata = new MetadataRepository(segmentPath, "text.urlmd", useTailCache, exceed134217727);
//this.connectLocalSolr();
}
public long URLCount() {
return this.urlMetadata.size();
}
public long RWICount() {
return this.termIndex.sizesMax();
}
public int RWIBufferCount() {
return this.termIndex.getBufferSize();
}
public void connectRemoteSolr(final SolrConnector solr) {
this.urlMetadata.connectRemoteSolr(solr);
}

@ -1,196 +0,0 @@
// Segments.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.07.2009 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.search.index;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.IndexCell;
public class Segments implements Iterable<Segment> {
/**
* process enumeration type
* defines constants that can be used to assign process-related segment names
*/
public enum Process {
RECEIPTS,
QUERIES,
DHTIN,
DHTOUT, // the only segment that is used for reading-only
PROXY,
LOCALCRAWLING,
REMOTECRAWLING,
PUBLIC,
SURROGATES; // includes the index that can be retrieved by the yacy p2p api
public String toString() {
throw new UnsupportedOperationException("toString not allowed");
}
}
private final Log log;
private final File segmentsPath;
private final int entityCacheMaxSize;
private final long maxFileSize;
private Map<String, Segment> segments;
private final HashMap<Process, String> process_assignment;
private final boolean useTailCache;
private final boolean exceed134217727;
public Segments(
final Log log,
final File segmentsPath,
final int entityCacheMaxSize,
final long maxFileSize,
final boolean useTailCache,
final boolean exceed134217727) {
this.log = log;
this.segmentsPath = segmentsPath;
this.entityCacheMaxSize = entityCacheMaxSize;
this.maxFileSize = maxFileSize;
this.useTailCache = useTailCache;
this.exceed134217727 = exceed134217727;
this.segments = new HashMap<String, Segment>();
this.process_assignment = new HashMap<Process, String>();
// assign default segment names for the processes
this.process_assignment.put(Process.RECEIPTS, "default");
this.process_assignment.put(Process.QUERIES, "default");
this.process_assignment.put(Process.DHTIN, "default");
this.process_assignment.put(Process.DHTOUT, "default");
this.process_assignment.put(Process.PROXY, "default");
this.process_assignment.put(Process.LOCALCRAWLING, "default");
this.process_assignment.put(Process.REMOTECRAWLING, "default");
this.process_assignment.put(Process.PUBLIC, "default");
this.process_assignment.put(Process.SURROGATES, "default");
}
public void setSegment(final Process process, final String segmentName) {
this.process_assignment.put(process, segmentName);
}
public String[] segmentNames() {
return this.segments.keySet().toArray(new String[this.segments.size()]);
}
public boolean segmentExist(final String segmentName) {
return this.segments.containsKey(segmentName);
}
public Segment segment(final Process process) {
return segment(this.process_assignment.get(process));
}
public Segment segment(final String segmentName) {
if (this.segments == null) return null;
Segment segment = this.segments.get(segmentName);
if (segment == null) {
// generate the segment
try {
segment = new Segment(
this.log,
new File(this.segmentsPath, segmentName),
this.entityCacheMaxSize,
this.maxFileSize,
this.useTailCache,
this.exceed134217727);
} catch (final IOException e) {
Log.logException(e);
return null;
}
this.segments.put(segmentName, segment);
}
return segment;
}
public long URLCount() {
if (this.segments == null) return 0;
long c = 0;
for (final Segment s: this.segments.values()) c += s.urlMetadata().size();
return c;
}
public long RWICount() {
if (this.segments == null) return 0;
long c = 0;
for (final Segment s: this.segments.values()) c += s.termIndex().sizesMax();
return c;
}
public int RWIBufferCount() {
if (this.segments == null) return 0;
int c = 0;
for (final Segment s: this.segments.values()) c += s.termIndex().getBufferSize();
return c;
}
public MetadataRepository urlMetadata(final Process process) {
return segment(this.process_assignment.get(process)).urlMetadata();
}
public IndexCell<WordReference> termIndex(final Process process) {
return segment(this.process_assignment.get(process)).termIndex();
}
public void clear(final Process process) {
segment(this.process_assignment.get(process)).clear();
}
public File getLocation(final Process process) {
return segment(this.process_assignment.get(process)).getLocation();
}
public void close(final Process process) {
segment(this.process_assignment.get(process)).close();
}
public synchronized void close() {
if (this.segments != null) for (final Segment s: this.segments.values()) s.close();
this.segments = null;
}
public void finalize() {
this.close();
}
public synchronized Segment.ReferenceCleaner getReferenceCleaner(final String segmentName, final byte[] startHash) {
return segment(segmentName).getReferenceCleaner(startHash);
}
public Iterator<Segment> iterator() {
return this.segments.values().iterator();
}
}
Loading…
Cancel
Save