removed segments-concept and the Segments class:

the segments had been there to create a tenant-infrastructure but were
never be used since that was all much too complex. There will be a
replacement using a solr navigation using a segment field in the search
index.
pull/1/head
Michael Peter Christen 13 years ago
parent 508a81b86c
commit 03280fb161

@ -50,7 +50,6 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.peers.NewsPool; import net.yacy.peers.NewsPool;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.data.BookmarkHelper; import de.anomic.data.BookmarkHelper;
import de.anomic.data.BookmarksDB; import de.anomic.data.BookmarksDB;
import de.anomic.data.BookmarksDB.Bookmark; import de.anomic.data.BookmarksDB.Bookmark;
@ -195,7 +194,7 @@ public class Bookmarks {
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash); final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash);
if (bookmark == null) { if (bookmark == null) {
// try to get the bookmark from the LURL database // try to get the bookmark from the LURL database
final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash)); final URIMetadataRow urlentry = sb.index.urlMetadata().load(ASCII.getBytes(urlHash));
if (urlentry != null) try { if (urlentry != null) try {
final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE)); final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE));
prop.put("mode_edit", "0"); // create mode prop.put("mode_edit", "0"); // create mode

@ -39,7 +39,6 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.ResultURLs.EventOrigin; import de.anomic.crawler.ResultURLs.EventOrigin;
import de.anomic.crawler.ResultURLs.InitExecEntry; import de.anomic.crawler.ResultURLs.InitExecEntry;
@ -117,7 +116,7 @@ public class CrawlResults {
final String hash = post.get("hash", null); final String hash = post.get("hash", null);
if (hash != null) { if (hash != null) {
// delete from database // delete from database
sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).remove(hash.getBytes()); sb.index.urlMetadata().remove(hash.getBytes());
} }
} }
@ -127,7 +126,7 @@ public class CrawlResults {
if (hashpart != null) { if (hashpart != null) {
// delete all urls for this domain from database // delete all urls for this domain from database
try { try {
sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).deleteDomain(hashpart); sb.index.urlMetadata().deleteDomain(hashpart);
ResultURLs.deleteDomain(tabletype, domain, hashpart); ResultURLs.deleteDomain(tabletype, domain, hashpart);
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
@ -187,7 +186,7 @@ public class CrawlResults {
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
try { try {
urle = sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(UTF8.getBytes(entry.getKey())); urle = sb.index.urlMetadata().load(UTF8.getBytes(entry.getKey()));
if (urle == null) { if (urle == null) {
Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey()); Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey());
urlstr = null; urlstr = null;

@ -53,7 +53,6 @@ import net.yacy.peers.NewsPool;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.SitemapImporter; import de.anomic.crawler.SitemapImporter;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
@ -95,16 +94,7 @@ public class Crawler_p {
prop.put("forwardToCrawlStart", "0"); prop.put("forwardToCrawlStart", "0");
// get segment // get segment
Segment indexSegment = null; Segment indexSegment = sb.index;
if (post != null && post.containsKey("segment")) {
final String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
prop.put("info", "0"); prop.put("info", "0");

@ -29,7 +29,6 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.MetadataRepository; import net.yacy.search.index.MetadataRepository;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -43,16 +42,7 @@ public class IndexCleaner_p {
prop.put("title", "DbCleanup_p"); prop.put("title", "DbCleanup_p");
// get segment // get segment
Segment indexSegment = null; Segment indexSegment = sb.index;
if (post != null && post.containsKey("segment")) {
String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
if (post!=null) { if (post!=null) {
if (post.get("action").equals("ustart")) { if (post.get("action").equals("ustart")) {

@ -12,15 +12,6 @@
<form action="IndexControlRWIs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8"> <form action="IndexControlRWIs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>RWI Retrieval (= search for a single word)</legend> <fieldset><legend>RWI Retrieval (= search for a single word)</legend>
<dl> <dl>
<dt class="TableCellDark">Select Segment:</dt>
<dd>
<select name="selectSegment" size="1">
#{segments}#
<option value="#[name]#" #(selected)#::selected="selected"#(/selected)#>#[name]#</option>
#{/segments}#
</select>
</dd>
<dt class="TableCellDark">Retrieve by Word:</dt> <dt class="TableCellDark">Retrieve by Word:</dt>
<dd><input type="text" name="keystring" value="#[keystring]#" size="40" maxlength="80" /> <dd><input type="text" name="keystring" value="#[keystring]#" size="40" maxlength="80" />
<input type="submit" name="keystringsearch" value="Show URL Entries for Word" /> <input type="submit" name="keystringsearch" value="Show URL Entries for Word" />

@ -61,9 +61,7 @@ import net.yacy.peers.Seed;
import net.yacy.peers.dht.PeerSelection; import net.yacy.peers.dht.PeerSelection;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import net.yacy.search.query.RWIProcess; import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
@ -92,19 +90,9 @@ public class IndexControlRWIs_p
prop.put("keyhash", ""); prop.put("keyhash", "");
prop.put("result", ""); prop.put("result", "");
prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0); prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0);
prop.put("cleanup_solr", sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null prop.put("cleanup_solr", sb.index.getRemoteSolr() == null
|| !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1); || !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1);
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
int i = 0;
for ( final String s : sb.indexSegments.segmentNames() ) {
prop.put("segments_" + i + "_name", s);
prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0);
i++;
}
Segment segment = sb.indexSegments.segment(segmentName);
prop.put("segments", i);
// switch off all optional forms/lists // switch off all optional forms/lists
prop.put("searchresult", 0); prop.put("searchresult", 0);
prop.put("keyhashsimilar", 0); prop.put("keyhashsimilar", 0);
@ -113,18 +101,9 @@ public class IndexControlRWIs_p
// clean up all search events // clean up all search events
SearchEventCache.cleanupEvents(true); SearchEventCache.cleanupEvents(true);
if ( post != null ) { Segment segment = sb.index;
// default values
segmentName = post.get("segment", segmentName).trim();
i = 0;
for ( final String s : sb.indexSegments.segmentNames() ) {
prop.put("segments_" + i + "_name", s);
prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0);
i++;
}
prop.put("segments", i);
segment = sb.indexSegments.segment(segmentName);
if ( post != null ) {
final String keystring = post.get("keystring", "").trim(); final String keystring = post.get("keystring", "").trim();
byte[] keyhash = post.get("keyhash", "").trim().getBytes(); byte[] keyhash = post.get("keyhash", "").trim().getBytes();
if (keystring.length() > 0) { if (keystring.length() > 0) {
@ -180,7 +159,7 @@ public class IndexControlRWIs_p
if ( post.get("deleteSolr", "").equals("on") if ( post.get("deleteSolr", "").equals("on")
&& sb.getConfigBool("federated.service.solr.indexing.enabled", false) ) { && sb.getConfigBool("federated.service.solr.indexing.enabled", false) ) {
try { try {
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().clear(); sb.index.getRemoteSolr().clear();
} catch ( final Exception e ) { } catch ( final Exception e ) {
Log.logException(e); Log.logException(e);
} }
@ -390,8 +369,8 @@ public class IndexControlRWIs_p
final Iterator<ReferenceContainer<WordReference>> containerIt = final Iterator<ReferenceContainer<WordReference>> containerIt =
segment.termIndex().referenceContainer(keyhash, true, false, 256, false).iterator(); segment.termIndex().referenceContainer(keyhash, true, false, 256, false).iterator();
ReferenceContainer<WordReference> container; ReferenceContainer<WordReference> container;
i = 0;
int rows = 0, cols = 0; int i = 0, rows = 0, cols = 0;
prop.put("keyhashsimilar", "1"); prop.put("keyhashsimilar", "1");
while ( containerIt.hasNext() && i < 256 ) { while ( containerIt.hasNext() && i < 256 ) {
container = containerIt.next(); container = containerIt.next();

@ -67,14 +67,6 @@ function updatepage(str) {
<form action="IndexControlURLs_p.html" id="searchform" method="post" enctype="multipart/form-data" accept-charset="UTF-8" onkeyup="xmlhttpPost(); return false;"> <form action="IndexControlURLs_p.html" id="searchform" method="post" enctype="multipart/form-data" accept-charset="UTF-8" onkeyup="xmlhttpPost(); return false;">
<fieldset><legend>URL Retrieval</legend> <fieldset><legend>URL Retrieval</legend>
<dl> <dl>
<dt class="TableCellDark">Select Segment:</dt>
<dd>
<select name="selectSegment" size="1">
#{segments}#
<option value="#[name]#" #(selected)#::selected="selected"#(/selected)#>#[name]#</option>
#{/segments}#
</select>
</dd>
<dt class="TableCellDark">Retrieve by URL:</dt> <dt class="TableCellDark">Retrieve by URL:</dt>
<dd><input type="text" name="urlstring" value="#[urlstring]#" size="40" maxlength="250" /> <dd><input type="text" name="urlstring" value="#[urlstring]#" size="40" maxlength="250" />

@ -41,7 +41,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.RotateIterator; import net.yacy.kelondro.order.RotateIterator;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.MetadataRepository; import net.yacy.search.index.MetadataRepository;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -55,19 +54,12 @@ public class IndexControlURLs_p {
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
Segment segment = sb.index;
// set default values // set default values
prop.put("urlstring", ""); prop.put("urlstring", "");
prop.put("urlhash", ""); prop.put("urlhash", "");
prop.put("result", ""); prop.put("result", "");
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
int i = 0;
for (final String s: sb.indexSegments.segmentNames()) {
prop.put("segments_" + i + "_name", s);
prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0);
i++;
}
Segment segment = sb.indexSegments.segment(segmentName);
prop.put("segments", i);
prop.putNum("ucount", segment.urlMetadata().size()); prop.putNum("ucount", segment.urlMetadata().size());
prop.put("otherHosts", ""); prop.put("otherHosts", "");
prop.put("genUrlProfile", 0); prop.put("genUrlProfile", 0);
@ -76,20 +68,6 @@ public class IndexControlURLs_p {
prop.put("statisticslines", 0); prop.put("statisticslines", 0);
prop.put("reload", 0); prop.put("reload", 0);
// do segment selection
if (post != null && post.containsKey("segment")) {
// default values
segmentName = post.get("segment", segmentName).trim();
i= 0;
for (final String s: sb.indexSegments.segmentNames()) {
prop.put("segments_" + i + "_name", s);
prop.put("segments_" + i + "_selected", (segmentName.equals(s)) ? 1 : 0);
i++;
}
prop.put("segments", i);
segment = sb.indexSegments.segment(segmentName);
}
// show export messages // show export messages
final MetadataRepository.Export export = segment.urlMetadata().export(); final MetadataRepository.Export export = segment.urlMetadata().export();
if ((export != null) && (export.isAlive())) { if ((export != null) && (export.isAlive())) {
@ -147,7 +125,7 @@ public class IndexControlURLs_p {
prop.put("result", " "); prop.put("result", " ");
if (post.containsKey("urlhashdeleteall")) { if (post.containsKey("urlhashdeleteall")) {
i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST); int i = segment.removeAllUrlReferences(urlhash.getBytes(), sb.loader, CacheStrategy.IFEXIST);
prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes."); prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes.");
prop.put("lurlexport", 0); prop.put("lurlexport", 0);
prop.put("reload", 0); prop.put("reload", 0);
@ -224,8 +202,7 @@ public class IndexControlURLs_p {
final Iterator<URIMetadataRow> entryIt = new RotateIterator<URIMetadataRow>(segment.urlMetadata().entries(true, urlhash), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), segment.termIndex().sizesMax()); final Iterator<URIMetadataRow> entryIt = new RotateIterator<URIMetadataRow>(segment.urlMetadata().entries(true, urlhash), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), segment.termIndex().sizesMax());
final StringBuilder result = new StringBuilder("Sequential List of URL-Hashes:<br />"); final StringBuilder result = new StringBuilder("Sequential List of URL-Hashes:<br />");
URIMetadataRow entry; URIMetadataRow entry;
i = 0; int i = 0, rows = 0, cols = 0;
int rows = 0, cols = 0;
prop.put("urlhashsimilar", "1"); prop.put("urlhashsimilar", "1");
while (entryIt.hasNext() && i < 256) { while (entryIt.hasNext() && i < 256) {
entry = entryIt.next(); entry = entryIt.next();

@ -37,7 +37,6 @@ import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.storage.ConfigurationSet; import net.yacy.cora.storage.ConfigurationSet;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import net.yacy.search.index.SolrField; import net.yacy.search.index.SolrField;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -86,8 +85,8 @@ public class IndexFederated_p {
if (solrWasOn) { if (solrWasOn) {
// switch off // switch off
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().close(); sb.index.getRemoteSolr().close();
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null); sb.index.connectRemoteSolr(null);
} }
if (solrIsOnAfterwards) { if (solrIsOnAfterwards) {
@ -97,13 +96,13 @@ public class IndexFederated_p {
if (usesolr) { if (usesolr) {
SolrConnector solr = new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true); SolrConnector solr = new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true);
solr.setCommitWithinMs(commitWithinMs); solr.setCommitWithinMs(commitWithinMs);
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(solr); sb.index.connectRemoteSolr(solr);
} else { } else {
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null); sb.index.connectRemoteSolr(null);
} }
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(null); sb.index.connectRemoteSolr(null);
} }
} }
@ -138,11 +137,11 @@ public class IndexFederated_p {
} }
// show solr host table // show solr host table
if (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() == null) { if (sb.index.getRemoteSolr() == null) {
prop.put("table", 0); prop.put("table", 0);
} else { } else {
prop.put("table", 1); prop.put("table", 1);
final SolrConnector solr = sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr(); final SolrConnector solr = sb.index.getRemoteSolr();
final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((SingleSolrConnector) solr).getSize()}; final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((SingleSolrConnector) solr).getSize()};
final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SingleSolrConnector) solr).getAdminInterface()}; final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SingleSolrConnector) solr).getAdminInterface()};
boolean dark = false; boolean dark = false;

@ -32,7 +32,6 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -44,16 +43,7 @@ public class IndexShare_p {
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
// get segment // get segment
Segment indexSegment = null; Segment indexSegment = sb.index;
if (post != null && post.containsKey("segment")) {
String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
if (post == null) { if (post == null) {
prop.put("linkfreq", sb.getConfigLong("defaultLinkReceiveFrequency",30)); prop.put("linkfreq", sb.getConfigLong("defaultLinkReceiveFrequency",30));

@ -42,7 +42,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.RSSLoader; import de.anomic.crawler.RSSLoader;
import de.anomic.crawler.retrieval.Response; import de.anomic.crawler.retrieval.Response;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
@ -191,7 +190,7 @@ public class Load_RSS_p {
messageurl = row.get("url", ""); messageurl = row.get("url", "");
if (messageurl.length() == 0) continue; if (messageurl.length() == 0) continue;
// get referrer // get referrer
final DigestURI referrer = sb.getURL(Segments.Process.LOCALCRAWLING, row.get("referrer", "").getBytes()); final DigestURI referrer = sb.getURL(row.get("referrer", "").getBytes());
// check if feed is registered in scheduler // check if feed is registered in scheduler
final byte[] api_pk = row.get("api_pk"); final byte[] api_pk = row.get("api_pk");
final Row r = api_pk == null ? null : sb.tables.select("api", api_pk); final Row r = api_pk == null ? null : sb.tables.select("api", api_pk);
@ -271,7 +270,7 @@ public class Load_RSS_p {
final RSSMessage message = feed.getMessage(entry.getValue().substring(5)); final RSSMessage message = feed.getMessage(entry.getValue().substring(5));
final DigestURI messageurl = new DigestURI(message.getLink()); final DigestURI messageurl = new DigestURI(message.getLink());
if (RSSLoader.indexTriggered.containsKey(messageurl.hash())) continue loop; if (RSSLoader.indexTriggered.containsKey(messageurl.hash())) continue loop;
if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop; if (sb.urlExists(messageurl.hash()) != null) continue loop;
sb.addToIndex(messageurl, null, null); sb.addToIndex(messageurl, null, null);
RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date()); RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
} catch (final IOException e) { } catch (final IOException e) {
@ -316,7 +315,7 @@ public class Load_RSS_p {
author = item.getAuthor(); author = item.getAuthor();
if (author == null) author = item.getCopyright(); if (author == null) author = item.getCopyright();
pubDate = item.getPubDate(); pubDate = item.getPubDate();
prop.put("showitems_item_" + i + "_state", sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0); prop.put("showitems_item_" + i + "_state", sb.urlExists(messageurl.hash()) != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0);
prop.put("showitems_item_" + i + "_state_count", i); prop.put("showitems_item_" + i + "_state_count", i);
prop.putHTML("showitems_item_" + i + "_state_guid", item.getGuid()); prop.putHTML("showitems_item_" + i + "_state_guid", item.getGuid());
prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author); prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author);

@ -42,7 +42,7 @@ public class PerformanceGraph {
final int height = post.getInt("height", 240); final int height = post.getInt("height", 240);
final boolean showMemory = !post.containsKey("nomem"); final boolean showMemory = !post.containsKey("nomem");
return ProfilingGraph.performanceGraph(width, height, sb.indexSegments.URLCount() + " URLS / " + sb.indexSegments.RWICount() + " WORDS IN INDEX / " + sb.indexSegments.RWIBufferCount() + " WORDS IN CACHE", showMemory); return ProfilingGraph.performanceGraph(width, height, sb.index.URLCount() + " URLS / " + sb.index.RWICount() + " WORDS IN INDEX / " + sb.index.RWIBufferCount() + " WORDS IN CACHE", showMemory);
} }
} }

@ -40,7 +40,6 @@ import net.yacy.kelondro.workflow.WorkflowThread;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -63,16 +62,7 @@ public class PerformanceQueues_p {
File defaultSettingsFile = new File(sb.getAppPath(), "defaults/yacy.init"); File defaultSettingsFile = new File(sb.getAppPath(), "defaults/yacy.init");
// get segment // get segment
Segment indexSegment = null; Segment indexSegment = sb.index;
if (post != null && post.containsKey("segment")) {
String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
if(post != null) { if(post != null) {
if(post.containsKey("defaultFile")){ if(post.containsKey("defaultFile")){

@ -41,7 +41,6 @@ import net.yacy.cora.util.NumberTools;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.Request;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -63,16 +62,7 @@ public class QuickCrawlLink_p {
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
// get segment // get segment
Segment indexSegment = null; Segment indexSegment = sb.index;
if (post != null && post.containsKey("segment")) {
final String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
if (post == null) { if (post == null) {
// send back usage example // send back usage example

@ -54,7 +54,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Model;
@ -93,13 +92,8 @@ public class ViewFile {
} }
// get segment // get segment
Segment indexSegment = null; Segment indexSegment = sb.index;
final boolean authorized = sb.verifyAuthentication(header); final boolean authorized = sb.verifyAuthentication(header);
if (post != null && post.containsKey("segment") && authorized) {
indexSegment = sb.indexSegments.segment(post.get("segment"));
} else {
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
if (post.containsKey("words")) if (post.containsKey("words"))
prop.putHTML("error_words", post.get("words")); prop.putHTML("error_words", post.get("words"));

@ -38,7 +38,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -70,8 +69,7 @@ public class Vocabulary_p {
boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted"); boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted");
boolean discoverFromAuthor = post.get("discovermethod", "").equals("author"); boolean discoverFromAuthor = post.get("discovermethod", "").equals("author");
if (discoveruri != null) { if (discoveruri != null) {
String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); Segment segment = sb.index;
Segment segment = sb.indexSegments.segment(segmentName);
Iterator<DigestURI> ui = segment.urlSelector(discoveruri); Iterator<DigestURI> ui = segment.urlSelector(discoveruri);
String t; String t;
while (ui.hasNext()) { while (ui.hasNext()) {

@ -9,7 +9,6 @@ import net.yacy.kelondro.rwi.ReferenceContainerCache;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.peers.graphics.WebStructureGraph.HostReference; import net.yacy.peers.graphics.WebStructureGraph.HostReference;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.MetadataRepository; import net.yacy.search.index.MetadataRepository;
import net.yacy.search.index.MetadataRepository.HostStat; import net.yacy.search.index.MetadataRepository.HostStat;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
@ -42,8 +41,7 @@ public class YBRFetch_p
} }
// use an index segment to find hosts for given host hashes // use an index segment to find hosts for given host hashes
final String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); final Segment segment = sb.index;
final Segment segment = sb.indexSegments.segment(segmentName);
final MetadataRepository metadata = segment.urlMetadata(); final MetadataRepository metadata = segment.urlMetadata();
Map<String, HostStat> hostHashResolver; Map<String, HostStat> hostHashResolver;
try { try {

@ -32,7 +32,6 @@ import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -45,13 +44,9 @@ public class status_p {
// return variable that accumulates replacements // return variable that accumulates replacements
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
Segment segment = null;
final boolean html = post != null && post.containsKey("html"); final boolean html = post != null && post.containsKey("html");
prop.setLocalized(html); prop.setLocalized(html);
if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) { Segment segment = sb.index;
segment = sb.indexSegments.segment(post.get("segment"));
}
if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
prop.put("rejected", "0"); prop.put("rejected", "0");
sb.updateMySeed(); sb.updateMySeed();

@ -32,7 +32,6 @@ import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -43,13 +42,9 @@ public class termlist_p {
final Log log = new Log("TERMLIST"); final Log log = new Log("TERMLIST");
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
Segment segment = null; Segment segment = sb.index;
final boolean delete = post != null && post.containsKey("delete"); final boolean delete = post != null && post.containsKey("delete");
final long mincount = post == null ? 10000 : post.getLong("mincount", 10000); final long mincount = post == null ? 10000 : post.getLong("mincount", 10000);
if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) {
segment = sb.indexSegments.segment(post.get("segment"));
}
if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
final Iterator<Rating<byte[]>> i = segment.termIndex().referenceCountIterator(null, false, false); final Iterator<Rating<byte[]>> i = segment.termIndex().referenceCountIterator(null, false, false);
Rating<byte[]> e; Rating<byte[]> e;
int c = 0, termnumber = 0; int c = 0, termnumber = 0;

@ -41,7 +41,6 @@ import net.yacy.kelondro.util.ISO639;
import net.yacy.peers.Network; import net.yacy.peers.Network;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -56,12 +55,7 @@ public final class timeline {
if ((post == null) || (env == null)) return prop; if ((post == null) || (env == null)) return prop;
final boolean authenticated = sb.adminAuthenticated(header) >= 2; final boolean authenticated = sb.adminAuthenticated(header) >= 2;
Segment segment = null; Segment segment = sb.index;
if (post.containsKey("segment") && authenticated) {
segment = sb.indexSegments.segment(post.get("segment"));
} else {
segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
final String querystring = post.get("query", ""); // a string of word hashes that shall be searched and combined final String querystring = post.get("query", ""); // a string of word hashes that shall be searched and combined
final int count = Math.min((authenticated) ? 1000 : 10, post.getInt("maximumRecords", 1000)); // SRU syntax final int count = Math.min((authenticated) ? 1000 : 10, post.getInt("maximumRecords", 1000)); // SRU syntax

@ -41,7 +41,6 @@ import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.peers.graphics.WebStructureGraph; import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -65,7 +64,7 @@ public class webstructure {
} else if (about.length() == 12 && Base64Order.enhancedCoder.wellformed(ASCII.getBytes(about))) { } else if (about.length() == 12 && Base64Order.enhancedCoder.wellformed(ASCII.getBytes(about))) {
urlhash = ASCII.getBytes(about); urlhash = ASCII.getBytes(about);
hosthash = about.substring(6); hosthash = about.substring(6);
url = authenticated ? sb.getURL(Segments.Process.PUBLIC, urlhash) : null; url = authenticated ? sb.getURL(urlhash) : null;
} else if (authenticated && about.length() > 0) { } else if (authenticated && about.length() > 0) {
// consider "about" as url or hostname // consider "about" as url or hostname
try { try {
@ -138,7 +137,7 @@ public class webstructure {
// citations // citations
prop.put("citations", 1); prop.put("citations", 1);
IndexCell<CitationReference> citationReferences = sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation(); IndexCell<CitationReference> citationReferences = sb.index.urlCitation();
ReferenceContainer<CitationReference> citations = null; ReferenceContainer<CitationReference> citations = null;
// citationReferences.count(urlhash) would give to the number of references good for ranking // citationReferences.count(urlhash) would give to the number of references good for ranking
try { try {
@ -158,7 +157,7 @@ public class webstructure {
while (i.hasNext()) { while (i.hasNext()) {
CitationReference cr = i.next(); CitationReference cr = i.next();
byte[] refhash = cr.urlhash(); byte[] refhash = cr.urlhash();
DigestURI refurl = authenticated ? sb.getURL(Segments.Process.PUBLIC, refhash) : null; DigestURI refurl = authenticated ? sb.getURL(refhash) : null;
prop.put("citations_documents_0_anchors_" + d + "_urle", refurl == null ? 0 : 1); prop.put("citations_documents_0_anchors_" + d + "_urle", refurl == null ? 0 : 1);
if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true, false)); if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true, false));
prop.put("citations_documents_0_anchors_" + d + "_urle_hash", refhash); prop.put("citations_documents_0_anchors_" + d + "_urle_hash", refhash);

@ -40,7 +40,6 @@ import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.RDFNode;
@ -55,15 +54,9 @@ public class yacydoc {
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
final Segment segment; final Segment segment = sb.index;
final boolean html = post != null && post.containsKey("html"); final boolean html = post != null && post.containsKey("html");
prop.setLocalized(html); prop.setLocalized(html);
final boolean authorized = sb.verifyAuthentication(header);
if (post != null && post.containsKey("segment") && authorized) {
segment = sb.indexSegments.segment(post.get("segment"));
} else {
segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
prop.put("dc_title", ""); prop.put("dc_title", "");
prop.put("dc_creator", ""); prop.put("dc_creator", "");
@ -131,7 +124,7 @@ public class yacydoc {
prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true)); prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true));
prop.put("yacy_size", entry.size()); prop.put("yacy_size", entry.size());
prop.put("yacy_words", entry.wordCount()); prop.put("yacy_words", entry.wordCount());
prop.put("yacy_citations", sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation().count(entry.hash())); prop.put("yacy_citations", sb.index.urlCitation().count(entry.hash()));
prop.put("yacy_inbound", entry.llocal()); prop.put("yacy_inbound", entry.llocal());
prop.put("yacy_outbound", entry.lother()); prop.put("yacy_outbound", entry.lother());

@ -6,7 +6,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.data.UserDB; import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkEntry; import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkTables; import de.anomic.data.ymark.YMarkTables;
@ -35,7 +34,7 @@ public class add_ymark {
if(post.containsKey("urlHash")) { if(post.containsKey("urlHash")) {
final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING); final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING);
final DigestURI url = sb.indexSegments.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash.getBytes()).url(); final DigestURI url = sb.index.urlMetadata().load(urlHash.getBytes()).url();
final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt()); final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt());
final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING); final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING);
try { try {

@ -47,7 +47,7 @@ public class get_metadata {
} }
try { try {
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(url), sb.indexSegments); final YMarkMetadata meta = new YMarkMetadata(new DigestURI(url), sb.index);
final Document document = meta.loadDocument(sb.loader); final Document document = meta.loadDocument(sb.loader);
final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata(); final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();

@ -215,7 +215,7 @@ public class get_treeview {
} }
} else if (isAutoTagger || isMetadata || isURLdb || isCrawlStart) { } else if (isAutoTagger || isMetadata || isURLdb || isCrawlStart) {
try { try {
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(post.get(ROOT).substring(2)), sb.indexSegments); final YMarkMetadata meta = new YMarkMetadata(new DigestURI(post.get(ROOT).substring(2)), sb.index);
final Document document = meta.loadDocument(sb.loader); final Document document = meta.loadDocument(sb.loader);
final TreeMap<String, YMarkTag> tags = sb.tables.bookmarks.getTags(bmk_user); final TreeMap<String, YMarkTag> tags = sb.tables.bookmarks.getTags(bmk_user);
if(isAutoTagger) { if(isAutoTagger) {

@ -30,7 +30,6 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import de.anomic.data.DidYouMean; import de.anomic.data.DidYouMean;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -69,19 +68,7 @@ public class suggest {
final int count = (post == null) ? 20 : post.getInt("count", 20); final int count = (post == null) ? 20 : post.getInt("count", 20);
// get segment // get segment
final Segment indexSegment; final Segment indexSegment = sb.index;
if (post != null && post.containsKey("segment")) {
final String segmentName = post.get("segment");
if (sb.indexSegments.segmentExist(segmentName)) {
indexSegment = sb.indexSegments.segment(segmentName);
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
int c = 0; int c = 0;
if (more || if (more ||

@ -37,7 +37,6 @@ import net.yacy.peers.Protocol;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.ResultURLs.EventOrigin; import de.anomic.crawler.ResultURLs.EventOrigin;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
@ -150,7 +149,7 @@ public final class crawlReceipt {
if ("fill".equals(result)) try { if ("fill".equals(result)) try {
// put new entry into database // put new entry into database
sb.indexSegments.urlMetadata(Segments.Process.RECEIPTS).store(entry); sb.index.urlMetadata().store(entry);
ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS); ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true)); if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true));

@ -33,10 +33,9 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Protocol;
import net.yacy.peers.Network; import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -91,7 +90,7 @@ public final class query {
if (obj.equals("rwiurlcount")) try { if (obj.equals("rwiurlcount")) try {
// the total number of different urls in the rwi is returned // the total number of different urls in the rwi is returned
// <env> shall contain a word hash, the number of assigned lurls to this hash is returned // <env> shall contain a word hash, the number of assigned lurls to this hash is returned
prop.put("response", sb.indexSegments.termIndex(Segments.Process.PUBLIC).get(env.getBytes(), null).size()); prop.put("response", sb.index.termIndex().get(env.getBytes(), null).size());
return prop; return prop;
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
@ -99,13 +98,13 @@ public final class query {
if (obj.equals("rwicount")) { if (obj.equals("rwicount")) {
// return the total number of available word indexes // return the total number of available word indexes
prop.put("response", sb.indexSegments.termIndex(Segments.Process.PUBLIC).sizesMax()); prop.put("response", sb.index.termIndex().sizesMax());
return prop; return prop;
} }
if (obj.equals("lurlcount")) { if (obj.equals("lurlcount")) {
// return the number of all available l-url's // return the number of all available l-url's
prop.put("response", sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).size()); prop.put("response", sb.index.urlMetadata().size());
return prop; return prop;
} }

@ -66,7 +66,6 @@ import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.AccessTracker; import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEvent;
@ -223,7 +222,7 @@ public final class search {
ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null; ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null;
if (query.length() == 0 && abstractSet != null) { if (query.length() == 0 && abstractSet != null) {
// this is _not_ a normal search, only a request for index abstracts // this is _not_ a normal search, only a request for index abstracts
final Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC); final Segment indexSegment = sb.index;
theQuery = new QueryParams( theQuery = new QueryParams(
null, null,
abstractSet, abstractSet,
@ -315,7 +314,7 @@ public final class search {
DigestURI.TLD_any_zone_filter, DigestURI.TLD_any_zone_filter,
client, client,
false, false,
sb.indexSegments.segment(Segments.Process.PUBLIC), sb.index,
rankingProfile, rankingProfile,
header.get(RequestHeader.USER_AGENT, ""), header.get(RequestHeader.USER_AGENT, ""),
false, 0.0d, 0.0d, 0.0d false, 0.0d, 0.0d, 0.0d

@ -49,7 +49,6 @@ import net.yacy.peers.dht.FlatWordPartitionScheme;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -116,9 +115,9 @@ public final class transferRWI {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. This peer is in robinson mode"); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. This peer is in robinson mode");
result = "not_granted"; result = "not_granted";
pause = 60000; pause = 60000;
} else if (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() > cachelimit) { } else if (sb.index.termIndex().getBufferSize() > cachelimit) {
// we are too busy to receive indexes // we are too busy to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() + ")."); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.index.termIndex().getBufferSize() + ").");
granted = false; // don't accept more words if there are too many words to flush granted = false; // don't accept more words if there are too many words to flush
result = "busy"; result = "busy";
pause = 60000; pause = 60000;
@ -152,7 +151,7 @@ public final class transferRWI {
int received = 0; int received = 0;
int blocked = 0; int blocked = 0;
int receivedURL = 0; int receivedURL = 0;
final IndexCell<WordReference> cell = sb.indexSegments.termIndex(Segments.Process.DHTIN); final IndexCell<WordReference> cell = sb.index.termIndex();
int count = 0; int count = 0;
while (it.hasNext()) { while (it.hasNext()) {
serverCore.checkInterruption(); serverCore.checkInterruption();
@ -197,7 +196,7 @@ public final class transferRWI {
// check if we need to ask for the corresponding URL // check if we need to ask for the corresponding URL
if (!(knownURL.has(urlHash) || unknownURL.has(urlHash))) try { if (!(knownURL.has(urlHash) || unknownURL.has(urlHash))) try {
if (sb.indexSegments.urlMetadata(Segments.Process.DHTIN).exists(urlHash)) { if (sb.index.urlMetadata().exists(urlHash)) {
knownURL.put(urlHash); knownURL.put(urlHash);
} else { } else {
unknownURL.put(urlHash); unknownURL.put(urlHash);
@ -230,7 +229,7 @@ public final class transferRWI {
} }
result = "ok"; result = "ok";
pause = (int) (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time pause = (int) (sb.index.termIndex().getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time
} }
prop.put("unknownURL", unknownURLs.toString()); prop.put("unknownURL", unknownURLs.toString());

@ -40,7 +40,6 @@ import net.yacy.peers.Protocol;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.ResultURLs.EventOrigin; import de.anomic.crawler.ResultURLs.EventOrigin;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
@ -84,7 +83,7 @@ public final class transferURL {
} else { } else {
int received = 0; int received = 0;
int blocked = 0; int blocked = 0;
final int sizeBefore = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size(); final int sizeBefore = sb.index.urlMetadata().size();
// read the urls from the other properties and store // read the urls from the other properties and store
String urls; String urls;
URIMetadataRow lEntry; URIMetadataRow lEntry;
@ -141,7 +140,7 @@ public final class transferURL {
// write entry to database // write entry to database
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false)); if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false));
try { try {
sb.indexSegments.urlMetadata(Segments.Process.DHTIN).store(lEntry); sb.index.urlMetadata().store(lEntry);
ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER); ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
received++; received++;
@ -153,7 +152,7 @@ public final class transferURL {
sb.peers.mySeed().incRU(received); sb.peers.mySeed().incRU(received);
// return rewrite properties // return rewrite properties
final int more = sb.indexSegments.urlMetadata(Segments.Process.DHTIN).size() - sizeBefore; final int more = sb.index.urlMetadata().size() - sizeBefore;
doublevalues = Integer.toString(received - more); doublevalues = Integer.toString(received - more);
Network.log.logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, blocked " + blocked + " URLs"); Network.log.logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, blocked " + blocked + " URLs");
EventChannel.channels(EventChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + ", blocked " + blocked + " URLs from peer " + otherPeerName, "", otherPeer.hash)); EventChannel.channels(EventChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + ", blocked " + blocked + " URLs from peer " + otherPeerName, "", otherPeer.hash));

@ -34,7 +34,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.peers.Protocol; import net.yacy.peers.Protocol;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.NoticedURL; import de.anomic.crawler.NoticedURL;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.Request;
@ -78,7 +77,7 @@ public class urls {
if (entry == null) break; if (entry == null) break;
// find referrer, if there is one // find referrer, if there is one
referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerhash()); referrer = sb.getURL(entry.referrerhash());
// place url to notice-url db // place url to notice-url db
sb.crawlQueues.delegatedURL.push( sb.crawlQueues.delegatedURL.push(
@ -114,10 +113,10 @@ public class urls {
URIMetadataRow entry; URIMetadataRow entry;
DigestURI referrer; DigestURI referrer;
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1)))); entry = sb.index.urlMetadata().load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
if (entry == null) continue; if (entry == null) continue;
// find referrer, if there is one // find referrer, if there is one
referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerHash()); referrer = sb.getURL(entry.referrerHash());
// create RSS entry // create RSS entry
prop.put("item_" + c + "_title", entry.dc_title()); prop.put("item_" + c + "_title", entry.dc_title());
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false)); prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false));

@ -57,7 +57,7 @@ public class yacyinteractive {
prop.putHTML("querys", query.replaceAll(" ", "+")); prop.putHTML("querys", query.replaceAll(" ", "+"));
prop.put("serverlist", query.isEmpty() ? 1 : 0); prop.put("serverlist", query.isEmpty() ? 1 : 0);
prop.put("focus", focus ? 1 : 0); prop.put("focus", focus ? 1 : 0);
prop.put("allowrealtime", sb.indexSegments.URLCount() < 100000 ? 1 : 0); prop.put("allowrealtime", sb.index.URLCount() < 100000 ? 1 : 0);
return prop; return prop;
} }
} }

@ -73,7 +73,6 @@ import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.AccessTracker; import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEvent;
@ -133,16 +132,7 @@ public class yacysearch {
prop.put("sidebarVocabulary", j); prop.put("sidebarVocabulary", j);
// get segment // get segment
Segment indexSegment = null; Segment indexSegment = sb.index;
if ( post != null && post.containsKey("segment") ) {
final String segmentName = post.get("segment");
if ( sb.indexSegments.segmentExist(segmentName) ) {
indexSegment = sb.indexSegments.segment(segmentName);
}
} else {
// take default segment
indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
}
final String EXT = header.get("EXT", ""); final String EXT = header.get("EXT", "");
final boolean rss = EXT.equals("rss"); final boolean rss = EXT.equals("rss");

@ -52,7 +52,6 @@ import net.yacy.peers.dht.PeerSelection;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.Switchboard.indexingQueueEntry; import net.yacy.search.Switchboard.indexingQueueEntry;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
import de.anomic.crawler.NoticedURL.StackType; import de.anomic.crawler.NoticedURL.StackType;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.Request;
@ -62,7 +61,6 @@ public class CrawlQueues {
private static final String ERROR_DB_FILENAME = "urlError4.db"; private static final String ERROR_DB_FILENAME = "urlError4.db";
private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db"; private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";
private static final Segments.Process PROCESS = Segments.Process.LOCALCRAWLING;
protected Switchboard sb; protected Switchboard sb;
protected Log log; protected Log log;
@ -82,8 +80,8 @@ public class CrawlQueues {
this.log.logConfig("Starting Crawling Management"); this.log.logConfig("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727); this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME)); FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727); this.errorURL = new ZURL(sb.index.getRemoteSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getRemoteSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727); this.delegatedURL = new ZURL(sb.index.getRemoteSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
} }
public void relocate(final File newQueuePath) { public void relocate(final File newQueuePath) {
@ -94,8 +92,8 @@ public class CrawlQueues {
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727); this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME)); FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727); this.errorURL = new ZURL(this.sb.index.getRemoteSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getRemoteSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727); this.delegatedURL = new ZURL(this.sb.index.getRemoteSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
} }
public synchronized void close() { public synchronized void close() {
@ -276,7 +274,7 @@ public class CrawlQueues {
return true; return true;
} }
try { try {
this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(PROCESS, new Response(urlEntry, profile), null, null)); this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(new Response(urlEntry, profile), null, null));
Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false)); Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false));
} catch (final InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);

@ -42,7 +42,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.retrieval.Response; import de.anomic.crawler.retrieval.Response;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -59,6 +58,7 @@ public class RSSLoader extends Thread {
this.urlf = urlf; this.urlf = urlf;
} }
@Override
public void run() { public void run() {
RSSReader rss = null; RSSReader rss = null;
try { try {
@ -89,7 +89,7 @@ public class RSSLoader extends Thread {
try { try {
final DigestURI messageurl = new DigestURI(message.getLink()); final DigestURI messageurl = new DigestURI(message.getLink());
if (indexTriggered.containsKey(messageurl.hash())) continue loop; if (indexTriggered.containsKey(messageurl.hash())) continue loop;
if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop; if (sb.urlExists(messageurl.hash()) != null) continue loop;
sb.addToIndex(messageurl, null, null); sb.addToIndex(messageurl, null, null);
indexTriggered.insertIfAbsent(messageurl.hash(), new Date()); indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
loadCount++; loadCount++;

@ -34,7 +34,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.Request;
public class SitemapImporter extends Thread { public class SitemapImporter extends Thread {
@ -52,6 +51,7 @@ public class SitemapImporter extends Thread {
this.crawlingProfile = profileEntry; this.crawlingProfile = profileEntry;
} }
@Override
public void run() { public void run() {
try { try {
logger.logInfo("Start parsing sitemap file " + this.siteMapURL); logger.logInfo("Start parsing sitemap file " + this.siteMapURL);
@ -76,10 +76,10 @@ public class SitemapImporter extends Thread {
// check if the url is known and needs to be recrawled // check if the url is known and needs to be recrawled
Date lastMod = entry.lastmod(null); Date lastMod = entry.lastmod(null);
if (lastMod != null) { if (lastMod != null) {
final String dbocc = this.sb.urlExists(Segments.Process.LOCALCRAWLING, nexturlhash); final String dbocc = this.sb.urlExists(nexturlhash);
if ((dbocc != null) && (dbocc.equalsIgnoreCase("loaded"))) { if ((dbocc != null) && (dbocc.equalsIgnoreCase("loaded"))) {
// the url was already loaded. we need to check the date // the url was already loaded. we need to check the date
final URIMetadataRow oldEntry = this.sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).load(nexturlhash); final URIMetadataRow oldEntry = this.sb.index.urlMetadata().load(nexturlhash);
if (oldEntry != null) { if (oldEntry != null) {
final Date modDate = oldEntry.moddate(); final Date modDate = oldEntry.moddate();
// check if modDate is null // check if modDate is null

@ -41,7 +41,6 @@ import net.yacy.document.TextParser;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.Latency; import de.anomic.crawler.Latency;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
@ -115,7 +114,7 @@ public class FTPLoader {
// directory -> get list of files // directory -> get list of files
final RequestHeader requestHeader = new RequestHeader(); final RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) { if (request.referrerhash() != null) {
final DigestURI u = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); final DigestURI u = this.sb.getURL(request.referrerhash());
if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true, false)); if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true, false));
} }
@ -222,7 +221,7 @@ public class FTPLoader {
// create response header // create response header
final RequestHeader requestHeader = new RequestHeader(); final RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) { if (request.referrerhash() != null) {
final DigestURI refurl = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); final DigestURI refurl = this.sb.getURL(request.referrerhash());
if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true, false)); if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true, false));
} }
final ResponseHeader responseHeader = new ResponseHeader(200); final ResponseHeader responseHeader = new ResponseHeader(200);

@ -40,7 +40,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
public class FileLoader { public class FileLoader {
@ -61,7 +60,7 @@ public class FileLoader {
RequestHeader requestHeader = new RequestHeader(); RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) { if (request.referrerhash() != null) {
DigestURI ur = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); DigestURI ur = this.sb.getURL(request.referrerhash());
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false)); if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
} }

@ -39,7 +39,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.Latency; import de.anomic.crawler.Latency;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
@ -118,7 +117,7 @@ public final class HTTPLoader {
final RequestHeader requestHeader = new RequestHeader(); final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null; DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true)); if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT)); requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT));
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE)); requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
@ -168,7 +167,7 @@ public final class HTTPLoader {
} }
// check if the url was already indexed // check if the url was already indexed
final String dbname = this.sb.urlExists(Segments.Process.LOCALCRAWLING, redirectionUrl.hash()); final String dbname = this.sb.urlExists(redirectionUrl.hash());
if (dbname != null) { // customer request if (dbname != null) { // customer request
this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode); this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in db " + dbname); throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in db " + dbname);

@ -49,7 +49,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
public class SMBLoader { public class SMBLoader {
@ -73,7 +72,7 @@ public class SMBLoader {
RequestHeader requestHeader = new RequestHeader(); RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) { if (request.referrerhash() != null) {
DigestURI ur = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); DigestURI ur = this.sb.getURL(request.referrerhash());
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false)); if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
} }

@ -38,13 +38,13 @@ import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segment;
import de.anomic.crawler.retrieval.Response; import de.anomic.crawler.retrieval.Response;
public class YMarkMetadata { public class YMarkMetadata {
private DigestURI uri; private DigestURI uri;
Document document; Document document;
Segments indexSegment; Segment indexSegment;
public enum METADATA { public enum METADATA {
TITLE, TITLE,
@ -72,16 +72,16 @@ public class YMarkMetadata {
this.indexSegment = null; this.indexSegment = null;
} }
public YMarkMetadata(final DigestURI uri, final Segments indexSegment) { public YMarkMetadata(final DigestURI uri, final Segment indexSegment) {
this.uri = uri; this.uri = uri;
this.document = null; this.document = null;
this.indexSegment = indexSegment; this.indexSegment = indexSegment;
} }
public YMarkMetadata(final byte[] urlHash, final Segments indexSegment) { public YMarkMetadata(final byte[] urlHash, final Segment indexSegment) {
this.document = null; this.document = null;
this.indexSegment = indexSegment; this.indexSegment = indexSegment;
this.uri = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash).url(); this.uri = this.indexSegment.urlMetadata().load(urlHash).url();
} }
public YMarkMetadata(final Document document) { public YMarkMetadata(final Document document) {
@ -105,7 +105,7 @@ public class YMarkMetadata {
public EnumMap<METADATA, String> getMetadata() { public EnumMap<METADATA, String> getMetadata() {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class); final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
final URIMetadataRow urlEntry = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(this.uri.hash()); final URIMetadataRow urlEntry = this.indexSegment.urlMetadata().load(this.uri.hash());
if (urlEntry != null) { if (urlEntry != null) {
metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size())); metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size()));
metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate())); metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate()));

@ -54,7 +54,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.Cache; import de.anomic.crawler.Cache;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
@ -211,7 +210,7 @@ public final class LoaderDispatcher {
final RequestHeader requestHeader = new RequestHeader(); final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null; DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true)); if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
final Response response = new Response( final Response response = new Response(
request, request,

@ -144,7 +144,6 @@ import net.yacy.repository.Blacklist;
import net.yacy.repository.FilterEngine; import net.yacy.repository.FilterEngine;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.index.SolrConfiguration; import net.yacy.search.index.SolrConfiguration;
import net.yacy.search.query.AccessTracker; import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
@ -218,7 +217,7 @@ public final class Switchboard extends serverSwitch
public File queuesRoot; public File queuesRoot;
public File surrogatesInPath; public File surrogatesInPath;
public File surrogatesOutPath; public File surrogatesOutPath;
public Segments indexSegments; public Segment index;
public LoaderDispatcher loader; public LoaderDispatcher loader;
public CrawlSwitchboard crawler; public CrawlSwitchboard crawler;
public CrawlQueues crawlQueues; public CrawlQueues crawlQueues;
@ -379,16 +378,14 @@ public final class Switchboard extends serverSwitch
// initialize index // initialize index
ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0); ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS"); final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
this.indexSegments = this.index =
new Segments( new Segment(
this.log, this.log,
segmentsPath, new File(segmentsPath, "default"),
wordCacheMaxCount, wordCacheMaxCount,
fileSizeMax, fileSizeMax,
this.useTailCache, this.useTailCache,
this.exceed134217727); this.exceed134217727);
// set the default segment names
setDefaultSegments();
// prepare a solr index profile switch list // prepare a solr index profile switch list
final File solrBackupProfile = new File("defaults/solr.keys.list"); final File solrBackupProfile = new File("defaults/solr.keys.list");
@ -418,7 +415,7 @@ public final class Switchboard extends serverSwitch
ShardSelection.Method.MODULO_HOST_MD5, ShardSelection.Method.MODULO_HOST_MD5,
10000, true); 10000, true);
solr.setCommitWithinMs(commitWithinMs); solr.setCommitWithinMs(commitWithinMs);
this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectRemoteSolr(solr); this.index.connectRemoteSolr(solr);
} catch ( final IOException e ) { } catch ( final IOException e ) {
Log.logException(e); Log.logException(e);
} }
@ -466,7 +463,7 @@ public final class Switchboard extends serverSwitch
// init a DHT transmission dispatcher // init a DHT transmission dispatcher
this.dhtDispatcher = this.dhtDispatcher =
(this.peers.sizeConnected() == 0) ? null : new Dispatcher( (this.peers.sizeConnected() == 0) ? null : new Dispatcher(
this.indexSegments.segment(Segments.Process.LOCALCRAWLING), this.index,
this.peers, this.peers,
true, true,
10000); 10000);
@ -767,7 +764,7 @@ public final class Switchboard extends serverSwitch
new CrawlStacker( new CrawlStacker(
this.crawlQueues, this.crawlQueues,
this.crawler, this.crawler,
this.indexSegments.segment(Segments.Process.LOCALCRAWLING), this.index,
this.peers, this.peers,
isIntranetMode(), isIntranetMode(),
isGlobalMode(), isGlobalMode(),
@ -994,33 +991,6 @@ public final class Switchboard extends serverSwitch
sb = this; sb = this;
} }
private void setDefaultSegments() {
this.indexSegments.setSegment(
Segments.Process.RECEIPTS,
getConfig(SwitchboardConstants.SEGMENT_RECEIPTS, "default"));
this.indexSegments.setSegment(
Segments.Process.QUERIES,
getConfig(SwitchboardConstants.SEGMENT_QUERIES, "default"));
this.indexSegments.setSegment(
Segments.Process.DHTIN,
getConfig(SwitchboardConstants.SEGMENT_DHTIN, "default"));
this.indexSegments.setSegment(
Segments.Process.DHTOUT,
getConfig(SwitchboardConstants.SEGMENT_DHTOUT, "default"));
this.indexSegments.setSegment(
Segments.Process.PROXY,
getConfig(SwitchboardConstants.SEGMENT_PROXY, "default"));
this.indexSegments.setSegment(
Segments.Process.LOCALCRAWLING,
getConfig(SwitchboardConstants.SEGMENT_LOCALCRAWLING, "default"));
this.indexSegments.setSegment(
Segments.Process.REMOTECRAWLING,
getConfig(SwitchboardConstants.SEGMENT_REMOTECRAWLING, "default"));
this.indexSegments.setSegment(
Segments.Process.PUBLIC,
getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"));
}
public int getIndexingProcessorsQueueSize() { public int getIndexingProcessorsQueueSize() {
return this.indexingDocumentProcessor.queueSize() return this.indexingDocumentProcessor.queueSize()
+ this.indexingCondensementProcessor.queueSize() + this.indexingCondensementProcessor.queueSize()
@ -1170,8 +1140,8 @@ public final class Switchboard extends serverSwitch
if ( this.dhtDispatcher != null ) { if ( this.dhtDispatcher != null ) {
this.dhtDispatcher.close(); this.dhtDispatcher.close();
} }
synchronized ( this.indexSegments ) { synchronized ( this.index ) {
this.indexSegments.close(); this.index.close();
} }
this.crawlStacker.announceClose(); this.crawlStacker.announceClose();
this.crawlStacker.close(); this.crawlStacker.close();
@ -1211,16 +1181,14 @@ public final class Switchboard extends serverSwitch
partitionExponent, partitionExponent,
this.useTailCache, this.useTailCache,
this.exceed134217727); this.exceed134217727);
this.indexSegments = this.index =
new Segments( new Segment(
this.log, this.log,
new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"),
wordCacheMaxCount, wordCacheMaxCount,
fileSizeMax, fileSizeMax,
this.useTailCache, this.useTailCache,
this.exceed134217727); this.exceed134217727);
// set the default segment names
setDefaultSegments();
this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object
// create a crawler // create a crawler
@ -1229,7 +1197,7 @@ public final class Switchboard extends serverSwitch
// init a DHT transmission dispatcher // init a DHT transmission dispatcher
this.dhtDispatcher = this.dhtDispatcher =
(this.peers.sizeConnected() == 0) ? null : new Dispatcher( (this.peers.sizeConnected() == 0) ? null : new Dispatcher(
this.indexSegments.segment(Segments.Process.LOCALCRAWLING), this.index,
this.peers, this.peers,
true, true,
10000); 10000);
@ -1257,7 +1225,7 @@ public final class Switchboard extends serverSwitch
new CrawlStacker( new CrawlStacker(
this.crawlQueues, this.crawlQueues,
this.crawler, this.crawler,
this.indexSegments.segment(Segments.Process.LOCALCRAWLING), this.index,
this.peers, this.peers,
"local.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0, "local.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0,
"global.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0, "global.any".indexOf(getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")) >= 0,
@ -1449,11 +1417,11 @@ public final class Switchboard extends serverSwitch
} }
} }
public String urlExists(final Segments.Process process, final byte[] hash) { public String urlExists(final byte[] hash) {
// tests if hash occurrs in any database // tests if hash occurrs in any database
// if it exists, the name of the database is returned, // if it exists, the name of the database is returned,
// if it not exists, null is returned // if it not exists, null is returned
if ( this.indexSegments.urlMetadata(process).exists(hash) ) { if ( this.index.urlMetadata().exists(hash) ) {
return "loaded"; return "loaded";
} }
return this.crawlQueues.urlExists(hash); return this.crawlQueues.urlExists(hash);
@ -1465,14 +1433,14 @@ public final class Switchboard extends serverSwitch
this.crawlQueues.urlRemove(hash); this.crawlQueues.urlRemove(hash);
} }
public DigestURI getURL(final Segments.Process process, final byte[] urlhash) { public DigestURI getURL(final byte[] urlhash) {
if ( urlhash == null ) { if ( urlhash == null ) {
return null; return null;
} }
if ( urlhash.length == 0 ) { if ( urlhash.length == 0 ) {
return null; return null;
} }
final URIMetadataRow le = this.indexSegments.urlMetadata(process).load(urlhash); final URIMetadataRow le = this.index.urlMetadata().load(urlhash);
if ( le != null ) { if ( le != null ) {
return le.url(); return le.url();
} }
@ -1606,7 +1574,7 @@ public final class Switchboard extends serverSwitch
this.crawler.close(); this.crawler.close();
this.log this.log
.logConfig("SWITCHBOARD SHUTDOWN STEP 3: sending termination signal to database manager (stand by...)"); .logConfig("SWITCHBOARD SHUTDOWN STEP 3: sending termination signal to database manager (stand by...)");
this.indexSegments.close(); this.index.close();
this.peers.close(); this.peers.close();
Cache.close(); Cache.close();
this.tables.close(); this.tables.close();
@ -1696,7 +1664,6 @@ public final class Switchboard extends serverSwitch
} }
try { try {
this.indexingDocumentProcessor.enQueue(new indexingQueueEntry( this.indexingDocumentProcessor.enQueue(new indexingQueueEntry(
Segments.Process.LOCALCRAWLING,
response, response,
null, null,
null)); null));
@ -1810,9 +1777,7 @@ public final class Switchboard extends serverSwitch
0); 0);
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false); response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false);
final indexingQueueEntry queueEntry = final indexingQueueEntry queueEntry =
new indexingQueueEntry(Segments.Process.SURROGATES, response, new Document[] { new indexingQueueEntry(response, new Document[] {document}, null);
document
}, null);
// place the queue entry into the concurrent process of the condenser (document analysis) // place the queue entry into the concurrent process of the condenser (document analysis)
try { try {
@ -1887,18 +1852,15 @@ public final class Switchboard extends serverSwitch
public static class indexingQueueEntry extends WorkflowJob public static class indexingQueueEntry extends WorkflowJob
{ {
public Segments.Process process;
public Response queueEntry; public Response queueEntry;
public Document[] documents; public Document[] documents;
public Condenser[] condenser; public Condenser[] condenser;
public indexingQueueEntry( public indexingQueueEntry(
final Segments.Process process,
final Response queueEntry, final Response queueEntry,
final Document[] documents, final Document[] documents,
final Condenser[] condenser) { final Condenser[] condenser) {
super(); super();
this.process = process;
this.queueEntry = queueEntry; this.queueEntry = queueEntry;
this.documents = documents; this.documents = documents;
this.condenser = condenser; this.condenser = condenser;
@ -1929,9 +1891,7 @@ public final class Switchboard extends serverSwitch
// clear caches if necessary // clear caches if necessary
if ( !MemoryControl.request(8000000L, false) ) { if ( !MemoryControl.request(8000000L, false) ) {
for ( final Segment indexSegment : this.indexSegments ) { sb.index.urlMetadata().clearCache();
indexSegment.urlMetadata().clearCache();
}
SearchEventCache.cleanupEvents(false); SearchEventCache.cleanupEvents(false);
this.trail.clear(); this.trail.clear();
} }
@ -2301,7 +2261,7 @@ public final class Switchboard extends serverSwitch
if ( documents == null ) { if ( documents == null ) {
return null; return null;
} }
return new indexingQueueEntry(in.process, in.queueEntry, documents, null); return new indexingQueueEntry(in.queueEntry, documents, null);
} }
private Document[] parseDocument(final Response response) throws InterruptedException { private Document[] parseDocument(final Response response) throws InterruptedException {
@ -2446,11 +2406,11 @@ public final class Switchboard extends serverSwitch
+ in.queueEntry.url().toNormalform(false, true) + in.queueEntry.url().toNormalform(false, true)
+ "': indexing not wanted by crawl profile"); + "': indexing not wanted by crawl profile");
} }
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); return new indexingQueueEntry(in.queueEntry, in.documents, null);
} }
boolean localSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr() != null && getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr"); boolean localSolr = this.index.getLocalSolr() != null && getConfig("federated.service.yacy.indexing.engine", "classic").equals("solr");
boolean remoteSolr = this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false); boolean remoteSolr = this.index.getRemoteSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false);
if (localSolr || remoteSolr) { if (localSolr || remoteSolr) {
// send the documents to solr // send the documents to solr
for ( final Document doc : in.documents ) { for ( final Document doc : in.documents ) {
@ -2470,8 +2430,8 @@ public final class Switchboard extends serverSwitch
} }
try { try {
SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc); SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc);
if (localSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getLocalSolr().add(solrDoc); if (localSolr) this.index.getLocalSolr().add(solrDoc);
if (remoteSolr) this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getRemoteSolr().add(solrDoc); if (remoteSolr) this.index.getRemoteSolr().add(solrDoc);
} catch ( final IOException e ) { } catch ( final IOException e ) {
Log.logWarning( Log.logWarning(
"SOLR", "SOLR",
@ -2494,7 +2454,7 @@ public final class Switchboard extends serverSwitch
+ in.queueEntry.url().toNormalform(false, true) + in.queueEntry.url().toNormalform(false, true)
+ "': indexing not wanted by federated rule for YaCy"); + "': indexing not wanted by federated rule for YaCy");
} }
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); return new indexingQueueEntry(in.queueEntry, in.documents, null);
} }
final List<Document> doclist = new ArrayList<Document>(); final List<Document> doclist = new ArrayList<Document>();
@ -2519,7 +2479,7 @@ public final class Switchboard extends serverSwitch
} }
if ( doclist.isEmpty() ) { if ( doclist.isEmpty() ) {
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, null); return new indexingQueueEntry(in.queueEntry, in.documents, null);
} }
in.documents = doclist.toArray(new Document[doclist.size()]); in.documents = doclist.toArray(new Document[doclist.size()]);
final Condenser[] condenser = new Condenser[in.documents.length]; final Condenser[] condenser = new Condenser[in.documents.length];
@ -2540,7 +2500,7 @@ public final class Switchboard extends serverSwitch
? true ? true
: !profile.remoteIndexing()); : !profile.remoteIndexing());
} }
return new indexingQueueEntry(in.process, in.queueEntry, in.documents, condenser); return new indexingQueueEntry(in.queueEntry, in.documents, condenser);
} }
public indexingQueueEntry webStructureAnalysis(final indexingQueueEntry in) { public indexingQueueEntry webStructureAnalysis(final indexingQueueEntry in) {
@ -2565,7 +2525,6 @@ public final class Switchboard extends serverSwitch
if ( in.condenser != null ) { if ( in.condenser != null ) {
for ( int i = 0; i < in.documents.length; i++ ) { for ( int i = 0; i < in.documents.length; i++ ) {
storeDocumentIndex( storeDocumentIndex(
in.process,
in.queueEntry, in.queueEntry,
in.documents[i], in.documents[i],
in.condenser[i], in.condenser[i],
@ -2577,7 +2536,6 @@ public final class Switchboard extends serverSwitch
} }
private void storeDocumentIndex( private void storeDocumentIndex(
final Segments.Process process,
final Response queueEntry, final Response queueEntry,
final Document document, final Document document,
final Condenser condenser, final Condenser condenser,
@ -2591,9 +2549,6 @@ public final class Switchboard extends serverSwitch
final DigestURI url = new DigestURI(document.dc_source()); final DigestURI url = new DigestURI(document.dc_source());
final DigestURI referrerURL = queueEntry.referrerURL(); final DigestURI referrerURL = queueEntry.referrerURL();
EventOrigin processCase = queueEntry.processCase(this.peers.mySeed().hash); EventOrigin processCase = queueEntry.processCase(this.peers.mySeed().hash);
if ( process == Segments.Process.SURROGATES ) {
processCase = EventOrigin.SURROGATES;
}
if ( condenser == null || document.indexingDenied() ) { if ( condenser == null || document.indexingDenied() ) {
//if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase); //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase);
@ -2629,7 +2584,7 @@ public final class Switchboard extends serverSwitch
URIMetadataRow newEntry = null; URIMetadataRow newEntry = null;
try { try {
newEntry = newEntry =
this.indexSegments.segment(process).storeDocument( this.index.storeDocument(
url, url,
referrerURL, referrerURL,
queueEntry.lastModified(), queueEntry.lastModified(),
@ -2763,11 +2718,10 @@ public final class Switchboard extends serverSwitch
public void addToIndex(final DigestURI url, final SearchEvent searchEvent, final String heuristicName) public void addToIndex(final DigestURI url, final SearchEvent searchEvent, final String heuristicName)
throws IOException, throws IOException,
Parser.Failure { Parser.Failure {
final Segments.Process process = Segments.Process.LOCALCRAWLING;
if ( searchEvent != null ) { if ( searchEvent != null ) {
searchEvent.addHeuristic(url.hash(), heuristicName, true); searchEvent.addHeuristic(url.hash(), heuristicName, true);
} }
if ( this.indexSegments.segment(process).exists(url.hash()) ) { if ( this.index.exists(url.hash()) ) {
return; // don't do double-work return; // don't do double-work
} }
final Request request = this.loader.request(url, true, true); final Request request = this.loader.request(url, true, true);
@ -2806,7 +2760,6 @@ public final class Switchboard extends serverSwitch
ResultImages.registerImages(url, document, true); ResultImages.registerImages(url, document, true);
Switchboard.this.webStructure.generateCitationReference(url, document, condenser); Switchboard.this.webStructure.generateCitationReference(url, document, condenser);
storeDocumentIndex( storeDocumentIndex(
process,
response, response,
document, document,
condenser, condenser,
@ -3023,7 +2976,7 @@ public final class Switchboard extends serverSwitch
if ( getConfig(SwitchboardConstants.INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false") ) { if ( getConfig(SwitchboardConstants.INDEX_DIST_ALLOW, "false").equalsIgnoreCase("false") ) {
return "no DHT distribution: not enabled (per setting)"; return "no DHT distribution: not enabled (per setting)";
} }
final Segment indexSegment = this.indexSegments.segment(segment); final Segment indexSegment = this.index;
if ( indexSegment.urlMetadata().size() < 10 ) { if ( indexSegment.urlMetadata().size() < 10 ) {
return "no DHT distribution: loadedURL.size() = " + indexSegment.urlMetadata().size(); return "no DHT distribution: loadedURL.size() = " + indexSegment.urlMetadata().size();
} }
@ -3299,12 +3252,12 @@ public final class Switchboard extends serverSwitch
this.peers.mySeed().put(Seed.ISPEED, Integer.toString(currentPPM())); this.peers.mySeed().put(Seed.ISPEED, Integer.toString(currentPPM()));
this.peers.mySeed().put(Seed.RSPEED, Float.toString(averageQPM())); this.peers.mySeed().put(Seed.RSPEED, Float.toString(averageQPM()));
this.peers.mySeed().put(Seed.UPTIME, Long.toString(uptime / 60)); // the number of minutes that the peer is up in minutes/day (moving average MA30) this.peers.mySeed().put(Seed.UPTIME, Long.toString(uptime / 60)); // the number of minutes that the peer is up in minutes/day (moving average MA30)
this.peers.mySeed().put(Seed.LCOUNT, Long.toString(this.indexSegments.URLCount())); // the number of links that the peer has stored (LURL's) this.peers.mySeed().put(Seed.LCOUNT, Long.toString(this.index.URLCount())); // the number of links that the peer has stored (LURL's)
this.peers.mySeed().put(Seed.NCOUNT, Integer.toString(this.crawlQueues.noticeURL.size())); // the number of links that the peer has noticed, but not loaded (NURL's) this.peers.mySeed().put(Seed.NCOUNT, Integer.toString(this.crawlQueues.noticeURL.size())); // the number of links that the peer has noticed, but not loaded (NURL's)
this.peers.mySeed().put( this.peers.mySeed().put(
Seed.RCOUNT, Seed.RCOUNT,
Integer.toString(this.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.GLOBAL))); // the number of links that the peer provides for remote crawling (ZURL's) Integer.toString(this.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.GLOBAL))); // the number of links that the peer provides for remote crawling (ZURL's)
this.peers.mySeed().put(Seed.ICOUNT, Long.toString(this.indexSegments.RWICount())); // the minimum number of words that the peer has indexed (as it says) this.peers.mySeed().put(Seed.ICOUNT, Long.toString(this.index.RWICount())); // the minimum number of words that the peer has indexed (as it says)
this.peers.mySeed().put(Seed.SCOUNT, Integer.toString(this.peers.sizeConnected())); // the number of seeds that the peer has stored this.peers.mySeed().put(Seed.SCOUNT, Integer.toString(this.peers.sizeConnected())); // the number of seeds that the peer has stored
this.peers.mySeed().put( this.peers.mySeed().put(
Seed.CCOUNT, Seed.CCOUNT,

@ -137,24 +137,23 @@ public class Segment {
maxFileSize, maxFileSize,
writeBufferSize); writeBufferSize);
/*
this.authorNavIndex = new IndexCell<NavigationReference>(
new File(new File(segmentPath, "nav_author"), "idx"),
navigationReferenceFactory,
wordOrder,
NavigationReferenceRow.navEntryRow,
entityCacheMaxSize,
targetFileSize,
maxFileSize,
this.merger,
writeBufferSize);
*/
// create LURL-db // create LURL-db
this.urlMetadata = new MetadataRepository(segmentPath, "text.urlmd", useTailCache, exceed134217727); this.urlMetadata = new MetadataRepository(segmentPath, "text.urlmd", useTailCache, exceed134217727);
//this.connectLocalSolr(); //this.connectLocalSolr();
} }
public long URLCount() {
return this.urlMetadata.size();
}
public long RWICount() {
return this.termIndex.sizesMax();
}
public int RWIBufferCount() {
return this.termIndex.getBufferSize();
}
public void connectRemoteSolr(final SolrConnector solr) { public void connectRemoteSolr(final SolrConnector solr) {
this.urlMetadata.connectRemoteSolr(solr); this.urlMetadata.connectRemoteSolr(solr);
} }

@ -1,196 +0,0 @@
// Segments.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.07.2009 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.search.index;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.IndexCell;
public class Segments implements Iterable<Segment> {
/**
* process enumeration type
* defines constants that can be used to assign process-related segment names
*/
public enum Process {
RECEIPTS,
QUERIES,
DHTIN,
DHTOUT, // the only segment that is used for reading-only
PROXY,
LOCALCRAWLING,
REMOTECRAWLING,
PUBLIC,
SURROGATES; // includes the index that can be retrieved by the yacy p2p api
public String toString() {
throw new UnsupportedOperationException("toString not allowed");
}
}
private final Log log;
private final File segmentsPath;
private final int entityCacheMaxSize;
private final long maxFileSize;
private Map<String, Segment> segments;
private final HashMap<Process, String> process_assignment;
private final boolean useTailCache;
private final boolean exceed134217727;
public Segments(
final Log log,
final File segmentsPath,
final int entityCacheMaxSize,
final long maxFileSize,
final boolean useTailCache,
final boolean exceed134217727) {
this.log = log;
this.segmentsPath = segmentsPath;
this.entityCacheMaxSize = entityCacheMaxSize;
this.maxFileSize = maxFileSize;
this.useTailCache = useTailCache;
this.exceed134217727 = exceed134217727;
this.segments = new HashMap<String, Segment>();
this.process_assignment = new HashMap<Process, String>();
// assign default segment names for the processes
this.process_assignment.put(Process.RECEIPTS, "default");
this.process_assignment.put(Process.QUERIES, "default");
this.process_assignment.put(Process.DHTIN, "default");
this.process_assignment.put(Process.DHTOUT, "default");
this.process_assignment.put(Process.PROXY, "default");
this.process_assignment.put(Process.LOCALCRAWLING, "default");
this.process_assignment.put(Process.REMOTECRAWLING, "default");
this.process_assignment.put(Process.PUBLIC, "default");
this.process_assignment.put(Process.SURROGATES, "default");
}
public void setSegment(final Process process, final String segmentName) {
this.process_assignment.put(process, segmentName);
}
public String[] segmentNames() {
return this.segments.keySet().toArray(new String[this.segments.size()]);
}
public boolean segmentExist(final String segmentName) {
return this.segments.containsKey(segmentName);
}
public Segment segment(final Process process) {
return segment(this.process_assignment.get(process));
}
public Segment segment(final String segmentName) {
if (this.segments == null) return null;
Segment segment = this.segments.get(segmentName);
if (segment == null) {
// generate the segment
try {
segment = new Segment(
this.log,
new File(this.segmentsPath, segmentName),
this.entityCacheMaxSize,
this.maxFileSize,
this.useTailCache,
this.exceed134217727);
} catch (final IOException e) {
Log.logException(e);
return null;
}
this.segments.put(segmentName, segment);
}
return segment;
}
public long URLCount() {
if (this.segments == null) return 0;
long c = 0;
for (final Segment s: this.segments.values()) c += s.urlMetadata().size();
return c;
}
public long RWICount() {
if (this.segments == null) return 0;
long c = 0;
for (final Segment s: this.segments.values()) c += s.termIndex().sizesMax();
return c;
}
public int RWIBufferCount() {
if (this.segments == null) return 0;
int c = 0;
for (final Segment s: this.segments.values()) c += s.termIndex().getBufferSize();
return c;
}
public MetadataRepository urlMetadata(final Process process) {
return segment(this.process_assignment.get(process)).urlMetadata();
}
public IndexCell<WordReference> termIndex(final Process process) {
return segment(this.process_assignment.get(process)).termIndex();
}
public void clear(final Process process) {
segment(this.process_assignment.get(process)).clear();
}
public File getLocation(final Process process) {
return segment(this.process_assignment.get(process)).getLocation();
}
public void close(final Process process) {
segment(this.process_assignment.get(process)).close();
}
public synchronized void close() {
if (this.segments != null) for (final Segment s: this.segments.values()) s.close();
this.segments = null;
}
public void finalize() {
this.close();
}
public synchronized Segment.ReferenceCleaner getReferenceCleaner(final String segmentName, final byte[] startHash) {
return segment(segmentName).getReferenceCleaner(startHash);
}
public Iterator<Segment> iterator() {
return this.segments.values().iterator();
}
}
Loading…
Cancel
Save