refactoring

pull/1/head
Michael Peter Christen 13 years ago
parent bf55f69176
commit 0cab06c47c

@ -195,7 +195,7 @@ public class Bookmarks {
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash);
if (bookmark == null) {
// try to get the bookmark from the LURL database
final URIMetadata urlentry = sb.index.urlMetadata().getMetadata(ASCII.getBytes(urlHash));
final URIMetadata urlentry = sb.index.fulltext().getMetadata(ASCII.getBytes(urlHash));
if (urlentry != null) try {
final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, null, TextSnippet.snippetMinLoadDelay));
prop.put("mode_edit", "0"); // create mode

@ -116,7 +116,7 @@ public class CrawlResults {
final String hash = post.get("hash", null);
if (hash != null) {
// delete from database
sb.index.urlMetadata().remove(hash.getBytes());
sb.index.fulltext().remove(hash.getBytes());
}
}
@ -126,7 +126,7 @@ public class CrawlResults {
if (hashpart != null) {
// delete all urls for this domain from database
try {
sb.index.urlMetadata().deleteDomain(hashpart);
sb.index.fulltext().deleteDomain(hashpart);
ResultURLs.deleteDomain(tabletype, domain, hashpart);
} catch (final IOException e) {
Log.logException(e);
@ -186,7 +186,7 @@ public class CrawlResults {
while (i.hasNext()) {
entry = i.next();
try {
urle = sb.index.urlMetadata().getMetadata(UTF8.getBytes(entry.getKey()));
urle = sb.index.fulltext().getMetadata(UTF8.getBytes(entry.getKey()));
if (urle == null) {
Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey());
urlstr = null;

@ -319,7 +319,7 @@ public class Crawler_p {
// first delete old entry, if exists
final DigestURI url = new DigestURI(crawlingStart);
final byte[] urlhash = url.hash();
indexSegment.urlMetadata().remove(urlhash);
indexSegment.fulltext().remove(urlhash);
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
sb.crawlQueues.errorURL.remove(urlhash);
@ -592,7 +592,7 @@ public class Crawler_p {
nexturl = new DigestURI(e.getKey());
// remove the url from the database to be prepared to crawl them again
final byte[] urlhash = nexturl.hash();
indexSegment.urlMetadata().remove(urlhash);
indexSegment.fulltext().remove(urlhash);
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
sb.crawlQueues.errorURL.remove(urlhash);
sb.crawlStacker.enqueueEntry(new Request(

@ -93,7 +93,7 @@ public class IndexControlRWIs_p {
prop.put("keyhash", "");
prop.put("result", "");
prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0);
prop.put("cleanup_solr", sb.index.urlMetadata().connectedSolr() ? 1 : 0);
prop.put("cleanup_solr", sb.index.fulltext().connectedSolr() ? 1 : 0);
// switch off all optional forms/lists
prop.put("searchresult", 0);
@ -158,9 +158,9 @@ public class IndexControlRWIs_p {
if ( post.get("deleteIndex", "").equals("on") ) {
segment.clear();
}
if ( post.get("deleteRemoteSolr", "").equals("on") && sb.index.urlMetadata().connectedSolr()) {
if ( post.get("deleteRemoteSolr", "").equals("on") && sb.index.fulltext().connectedSolr()) {
try {
sb.index.urlMetadata().getSolr().clear();
sb.index.fulltext().getSolr().clear();
} catch ( final Exception e ) {
Log.logException(e);
}
@ -320,7 +320,7 @@ public class IndexControlRWIs_p {
URIMetadata lurl;
while (urlIter.hasNext()) {
iEntry = urlIter.next();
lurl = segment.urlMetadata().getMetadata(iEntry.urlhash());
lurl = segment.fulltext().getMetadata(iEntry.urlhash());
if (lurl == null) {
try {
unknownURLEntries.put(iEntry.urlhash());
@ -415,8 +415,8 @@ public class IndexControlRWIs_p {
} catch ( final SpaceExceededException e ) {
Log.logException(e);
}
final URIMetadata e = segment.urlMetadata().getMetadata(b);
segment.urlMetadata().remove(b);
final URIMetadata e = segment.fulltext().getMetadata(b);
segment.fulltext().remove(b);
if ( e != null ) {
url = e.url();
pw.println(url.getHost() + "/" + url.getFile());
@ -450,8 +450,8 @@ public class IndexControlRWIs_p {
} catch ( final SpaceExceededException e ) {
Log.logException(e);
}
final URIMetadata e = segment.urlMetadata().getMetadata(b);
segment.urlMetadata().remove(b);
final URIMetadata e = segment.fulltext().getMetadata(b);
segment.fulltext().remove(b);
if ( e != null ) {
url = e.url();
pw.println(url.getHost() + "/.*");

@ -60,7 +60,7 @@ public class IndexControlURLs_p {
prop.put("urlstring", "");
prop.put("urlhash", "");
prop.put("result", "");
prop.putNum("ucount", segment.urlMetadata().size());
prop.putNum("ucount", segment.fulltext().size());
prop.put("otherHosts", "");
prop.put("genUrlProfile", 0);
prop.put("statistics", 1);
@ -69,7 +69,7 @@ public class IndexControlURLs_p {
prop.put("reload", 0);
// show export messages
final Fulltext.Export export = segment.urlMetadata().export();
final Fulltext.Export export = segment.fulltext().export();
if ((export != null) && (export.isAlive())) {
// there is currently a running export
prop.put("lurlexport", 2);
@ -132,7 +132,7 @@ public class IndexControlURLs_p {
}
if (post.containsKey("urlhashdelete")) {
final URIMetadata entry = segment.urlMetadata().getMetadata(ASCII.getBytes(urlhash));
final URIMetadata entry = segment.fulltext().getMetadata(ASCII.getBytes(urlhash));
if (entry == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
} else {
@ -166,7 +166,7 @@ public class IndexControlURLs_p {
final DigestURI url = new DigestURI(urlstring);
urlhash = ASCII.String(url.hash());
prop.put("urlhash", urlhash);
final URIMetadata entry = segment.urlMetadata().getMetadata(ASCII.getBytes(urlhash));
final URIMetadata entry = segment.fulltext().getMetadata(ASCII.getBytes(urlhash));
if (entry == null) {
prop.putHTML("result", "No Entry for URL " + url.toNormalform(true, true));
prop.putHTML("urlstring", urlstring);
@ -184,7 +184,7 @@ public class IndexControlURLs_p {
}
if (post.containsKey("urlhashsearch")) {
final URIMetadata entry = segment.urlMetadata().getMetadata(ASCII.getBytes(urlhash));
final URIMetadata entry = segment.fulltext().getMetadata(ASCII.getBytes(urlhash));
if (entry == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash);
} else {
@ -199,7 +199,7 @@ public class IndexControlURLs_p {
// generate list
if (post.containsKey("urlhashsimilar")) {
try {
final Iterator<URIMetadata> entryIt = new RotateIterator<URIMetadata>(segment.urlMetadata().entries(true, urlhash), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), segment.termIndex().sizesMax());
final Iterator<URIMetadata> entryIt = new RotateIterator<URIMetadata>(segment.fulltext().entries(true, urlhash), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), segment.termIndex().sizesMax());
final StringBuilder result = new StringBuilder("Sequential List of URL-Hashes:<br />");
URIMetadata entry;
int i = 0, rows = 0, cols = 0;
@ -245,7 +245,7 @@ public class IndexControlURLs_p {
final File f = new File(s);
f.getParentFile().mkdirs();
final String filter = post.get("exportfilter", ".*");
final Fulltext.Export running = segment.urlMetadata().export(f, filter, null, format, dom);
final Fulltext.Export running = segment.fulltext().export(f, filter, null, format, dom);
prop.put("lurlexport_exportfile", s);
prop.put("lurlexport_urlcount", running.count());
@ -258,7 +258,7 @@ public class IndexControlURLs_p {
if (post.containsKey("deletedomain")) {
final String hp = post.get("hashpart");
try {
segment.urlMetadata().deleteDomain(hp);
segment.fulltext().deleteDomain(hp);
} catch (final IOException e) {
// TODO Auto-generated catch block
Log.logException(e);
@ -274,7 +274,7 @@ public class IndexControlURLs_p {
prop.put("statistics_lines", count);
int cnt = 0;
try {
final Fulltext metadata = segment.urlMetadata();
final Fulltext metadata = segment.fulltext();
statsiter = metadata.statistics(count, metadata.urlSampleScores(metadata.domainSampleCollector()));
boolean dark = true;
Fulltext.HostStat hs;
@ -298,7 +298,7 @@ public class IndexControlURLs_p {
}
// insert constants
prop.putNum("ucount", segment.urlMetadata().size());
prop.putNum("ucount", segment.fulltext().size());
// return rewrite properties
return prop;
}
@ -310,7 +310,7 @@ public class IndexControlURLs_p {
prop.put("genUrlProfile_urlhash", urlhash);
return prop;
}
final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().getMetadata(entry.referrerHash());
final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.fulltext().getMetadata(entry.referrerHash());
if (entry.url() == null) {
prop.put("genUrlProfile", "1");
prop.put("genUrlProfile_urlhash", urlhash);

@ -73,23 +73,23 @@ public class IndexFederated_p {
} catch (IOException e) { Log.logException(e); } // switch on
boolean post_core_fulltext = post.getBoolean(SwitchboardConstants.CORE_SERVICE_FULLTEXT);
final boolean previous_core_fulltext = sb.index.urlMetadata().connectedLocalSolr() && env.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, false);
final boolean previous_core_fulltext = sb.index.fulltext().connectedLocalSolr() && env.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, false);
env.setConfig(SwitchboardConstants.CORE_SERVICE_FULLTEXT, post_core_fulltext);
final int commitWithinMs = post.getInt("solr.indexing.commitWithinMs", env.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000));
if (previous_core_fulltext && !post_core_fulltext) {
// switch off
sb.index.urlMetadata().disconnectLocalSolr();
sb.index.urlMetadata().disconnectUrlDb();
sb.index.fulltext().disconnectLocalSolr();
sb.index.fulltext().disconnectUrlDb();
}
if (!previous_core_fulltext && post_core_fulltext) {
// switch on
sb.index.connectUrlDb(sb.useTailCache, sb.exceed134217727);
try { sb.index.urlMetadata().connectLocalSolr(commitWithinMs); } catch (IOException e) { Log.logException(e); }
try { sb.index.fulltext().connectLocalSolr(commitWithinMs); } catch (IOException e) { Log.logException(e); }
}
// solr
final boolean solrRemoteWasOn = sb.index.urlMetadata().connectedRemoteSolr() && env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, true);
final boolean solrRemoteWasOn = sb.index.fulltext().connectedRemoteSolr() && env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, true);
final boolean solrRemoteIsOnAfterwards = post.getBoolean("solr.indexing.solrremote");
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, solrRemoteIsOnAfterwards);
String solrurls = post.get("solr.indexing.url", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr"));
@ -119,7 +119,7 @@ public class IndexFederated_p {
if (solrRemoteWasOn && !solrRemoteIsOnAfterwards) {
// switch off
sb.index.urlMetadata().disconnectRemoteSolr();
sb.index.fulltext().disconnectRemoteSolr();
}
if (!solrRemoteWasOn && solrRemoteIsOnAfterwards) {
@ -129,18 +129,18 @@ public class IndexFederated_p {
if (usesolr) {
SolrConnector solr = new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true);
solr.setCommitWithinMs(commitWithinMs);
sb.index.urlMetadata().connectRemoteSolr(solr);
sb.index.fulltext().connectRemoteSolr(solr);
} else {
sb.index.urlMetadata().disconnectRemoteSolr();
sb.index.fulltext().disconnectRemoteSolr();
}
} catch (final IOException e) {
Log.logException(e);
sb.index.urlMetadata().disconnectRemoteSolr();
sb.index.fulltext().disconnectRemoteSolr();
}
}
// read index scheme table flags
final Iterator<ConfigurationSet.Entry> i = sb.index.urlMetadata().getSolrScheme().entryIterator();
final Iterator<ConfigurationSet.Entry> i = sb.index.fulltext().getSolrScheme().entryIterator();
ConfigurationSet.Entry entry;
boolean modified = false; // flag to remember changes
while (i.hasNext()) {
@ -163,18 +163,18 @@ public class IndexFederated_p {
}
if (modified) { // save settings to config file if modified
try {
sb.index.urlMetadata().getSolrScheme().commit();
sb.index.fulltext().getSolrScheme().commit();
modified = false;
} catch (IOException ex) {}
}
}
// show solr host table
if (!sb.index.urlMetadata().connectedRemoteSolr()) {
if (!sb.index.fulltext().connectedRemoteSolr()) {
prop.put("table", 0);
} else {
prop.put("table", 1);
final SolrConnector solr = sb.index.urlMetadata().getRemoteSolr();
final SolrConnector solr = sb.index.fulltext().getRemoteSolr();
final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((SingleSolrConnector) solr).getSize()};
final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SingleSolrConnector) solr).getAdminInterface()};
boolean dark = false;
@ -194,7 +194,7 @@ public class IndexFederated_p {
// use enum SolrField to keep defined order
for(YaCySchema field : YaCySchema.values()) {
prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark;
prop.put("scheme_" + c + "_checked", sb.index.urlMetadata().getSolrScheme().contains(field.name()) ? 1 : 0);
prop.put("scheme_" + c + "_checked", sb.index.fulltext().getSolrScheme().contains(field.name()) ? 1 : 0);
prop.putHTML("scheme_" + c + "_key", field.name());
prop.putHTML("scheme_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName());
if (field.getComment() != null) prop.putHTML("scheme_" + c + "_comment",field.getComment());

@ -51,7 +51,7 @@ public class IndexShare_p {
prop.put("dtable", "");
prop.put("rtable", "");
prop.putNum("wcount", indexSegment.termIndex().sizesMax());
prop.putNum("ucount", indexSegment.urlMetadata().size());
prop.putNum("ucount", indexSegment.fulltext().size());
return prop; // be save
}
@ -64,7 +64,7 @@ public class IndexShare_p {
// insert constants
prop.putNum("wcount", indexSegment.termIndex().sizesMax());
prop.putNum("ucount", indexSegment.urlMetadata().size());
prop.putNum("ucount", indexSegment.fulltext().size());
// return rewrite properties
return prop;

@ -124,7 +124,7 @@ public class QuickCrawlLink_p {
}
final byte[] urlhash = crawlingStartURL.hash();
indexSegment.urlMetadata().remove(urlhash);
indexSegment.fulltext().remove(urlhash);
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
sb.crawlQueues.errorURL.remove(urlhash);

@ -138,7 +138,7 @@ public class ViewFile {
// get the urlEntry that belongs to the url hash
//boolean ue = urlHash.length() > 0 && indexSegment.exists(ASCII.getBytes(urlHash));
//if (ue) Log.logInfo("ViewFile", "exists(" + urlHash + ")");
if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().getMetadata(ASCII.getBytes(urlHash))) != null) {
if (urlHash.length() > 0 && (urlEntry = indexSegment.fulltext().getMetadata(ASCII.getBytes(urlHash))) != null) {
// get the url that belongs to the entry
if (urlEntry == null || urlEntry.url() == null) {
prop.put("error", "3");

@ -86,12 +86,12 @@ public class Vocabulary_p {
if (p >= 0) t = t.substring(p + 1);
}
if (discoverFromTitle || discoverFromTitleSplitted) {
URIMetadata m = segment.urlMetadata().getMetadata(u.hash());
URIMetadata m = segment.fulltext().getMetadata(u.hash());
if (m != null) t = m.dc_title();
if (t.endsWith(".jpg") || t.endsWith(".gif")) continue;
}
if (discoverFromAuthor) {
URIMetadata m = segment.urlMetadata().getMetadata(u.hash());
URIMetadata m = segment.fulltext().getMetadata(u.hash());
if (m != null) t = m.dc_creator();
}
t = t.replaceAll("_", " ").replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim();

@ -42,7 +42,7 @@ public class YBRFetch_p
// use an index segment to find hosts for given host hashes
final Segment segment = sb.index;
final Fulltext metadata = segment.urlMetadata();
final Fulltext metadata = segment.fulltext();
Map<String, HostStat> hostHashResolver;
try {
hostHashResolver = metadata.domainHashResolver(metadata.domainSampleCollector());

@ -38,7 +38,7 @@ public class schema_p {
// write scheme
int c = 0;
SolrConfiguration solrScheme = sb.index.urlMetadata().getSolrScheme();
SolrConfiguration solrScheme = sb.index.fulltext().getSolrScheme();
for (YaCySchema field : YaCySchema.values()) {
if (solrScheme.contains(field.name())) {
prop.put("fields_" + c + "_solrname", field.getSolrFieldName());

@ -76,7 +76,7 @@ public class status_p {
prop.put("trafficCrawler", ByteCount.getAccountCount(ByteCount.CRAWLER));
// index size
prop.putNum("urlpublictextSize", segment.urlMetadata().size());
prop.putNum("urlpublictextSize", segment.fulltext().size());
prop.putNum("rwipublictextSize", segment.termIndex().sizesMax());
// loader queue

@ -97,13 +97,13 @@ public class yacydoc {
}
if (urlhash == null || urlhash.isEmpty()) return prop;
final URIMetadata entry = segment.urlMetadata().getMetadata(urlhash.getBytes());
final URIMetadata entry = segment.fulltext().getMetadata(urlhash.getBytes());
if (entry == null) return prop;
if (entry.url() == null) {
return prop;
}
final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().getMetadata(entry.referrerHash());
final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.fulltext().getMetadata(entry.referrerHash());
prop.putXML("dc_title", entry.dc_title());
prop.putXML("dc_creator", entry.dc_creator());

@ -34,7 +34,7 @@ public class add_ymark {
if(post.containsKey("urlHash")) {
final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING);
final DigestURI url = sb.index.urlMetadata().getMetadata(urlHash.getBytes()).url();
final DigestURI url = sb.index.fulltext().getMetadata(urlHash.getBytes()).url();
final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt());
final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING);
try {

@ -96,7 +96,7 @@ public class searchresult {
post.put(CommonParams.ROWS, post.remove("num"));
// get the embedded connector
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.urlMetadata().getLocalSolr();
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.fulltext().getLocalSolr();
if (connector == null) return null;
// do the solr request

@ -146,7 +146,7 @@ public class select {
}
// get the embedded connector
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.urlMetadata().getLocalSolr();
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.fulltext().getLocalSolr();
if (connector == null) return null;
// do the solr request

@ -147,7 +147,7 @@ public final class crawlReceipt {
if ("fill".equals(result)) try {
// put new entry into database
sb.index.urlMetadata().putMetadata(entry);
sb.index.fulltext().putMetadata(entry);
ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true));

@ -109,7 +109,7 @@ public final class query {
if (obj.equals("lurlcount")) {
// return the number of all available l-url's
prop.put("response", sb.index.urlMetadata().size());
prop.put("response", sb.index.fulltext().size());
return prop;
}

@ -204,7 +204,7 @@ public final class transferRWI {
// check if we need to ask for the corresponding URL
if (!(knownURL.has(urlHash) || unknownURL.has(urlHash))) try {
if (sb.index.urlMetadata().exists(urlHash)) {
if (sb.index.fulltext().exists(urlHash)) {
knownURL.put(urlHash);
} else {
unknownURL.put(urlHash);

@ -84,7 +84,7 @@ public final class transferURL {
} else {
int received = 0;
int blocked = 0;
final int sizeBefore = sb.index.urlMetadata().size();
final int sizeBefore = sb.index.fulltext().size();
// read the urls from the other properties and store
String urls;
URIMetadata lEntry;
@ -141,7 +141,7 @@ public final class transferURL {
// write entry to database
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false));
try {
sb.index.urlMetadata().putMetadata(lEntry);
sb.index.fulltext().putMetadata(lEntry);
ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
received++;
@ -153,7 +153,7 @@ public final class transferURL {
sb.peers.mySeed().incRU(received);
// return rewrite properties
final int more = sb.index.urlMetadata().size() - sizeBefore;
final int more = sb.index.fulltext().size() - sizeBefore;
doublevalues = Integer.toString(received - more);
Network.log.logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, blocked " + blocked + " URLs");
EventChannel.channels(EventChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + ", blocked " + blocked + " URLs from peer " + otherPeerName, "", otherPeer.hash));

@ -113,7 +113,7 @@ public class urls {
URIMetadata entry;
DigestURI referrer;
for (int i = 0; i < count; i++) {
entry = sb.index.urlMetadata().getMetadata(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
entry = sb.index.fulltext().getMetadata(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
if (entry == null) continue;
// find referrer, if there is one
referrer = sb.getURL(entry.referrerHash());

@ -355,7 +355,7 @@ public class yacysearch {
// check available memory and clean up if necessary
if ( !MemoryControl.request(8000000L, false) ) {
indexSegment.urlMetadata().clearCache();
indexSegment.fulltext().clearCache();
SearchEventCache.cleanupEvents(false);
}
@ -660,7 +660,7 @@ public class yacysearch {
return prop;
}
final String recommendHash = post.get("recommendref", ""); // urlhash
final URIMetadata urlentry = indexSegment.urlMetadata().getMetadata(UTF8.getBytes(recommendHash));
final URIMetadata urlentry = indexSegment.fulltext().getMetadata(UTF8.getBytes(recommendHash));
if ( urlentry != null ) {
Document[] documents = null;
try {
@ -696,7 +696,7 @@ public class yacysearch {
return prop;
}
final String bookmarkHash = post.get("bookmarkref", ""); // urlhash
final URIMetadata urlentry = indexSegment.urlMetadata().getMetadata(UTF8.getBytes(bookmarkHash));
final URIMetadata urlentry = indexSegment.fulltext().getMetadata(UTF8.getBytes(bookmarkHash));
if ( urlentry != null ) {
try {
sb.tables.bookmarks.createBookmark(

@ -82,8 +82,8 @@ public class CrawlQueues {
this.log.logConfig("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(sb.index.urlMetadata().getSolr(), sb.index.urlMetadata().getSolrScheme(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.index.urlMetadata().getSolr(), sb.index.urlMetadata().getSolrScheme(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
this.errorURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrScheme(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrScheme(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
}
public void relocate(final File newQueuePath) {
@ -94,8 +94,8 @@ public class CrawlQueues {
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(this.sb.index.urlMetadata().getSolr(), this.sb.index.urlMetadata().getSolrScheme(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.index.urlMetadata().getSolr(), this.sb.index.urlMetadata().getSolrScheme(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
this.errorURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrScheme(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrScheme(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
}
public synchronized void close() {

@ -197,7 +197,7 @@ public final class CrawlStacker {
final DigestURI url = new DigestURI(e.getKey());
final byte[] urlhash = url.hash();
if (replace) {
this.indexSegment.urlMetadata().remove(urlhash);
this.indexSegment.fulltext().remove(urlhash);
this.nextQueue.urlRemove(urlhash);
String u = url.toNormalform(true, true);
if (u.endsWith("/")) {
@ -207,7 +207,7 @@ public final class CrawlStacker {
}
try {
final byte[] uh = new DigestURI(u).hash();
this.indexSegment.urlMetadata().remove(uh);
this.indexSegment.fulltext().remove(uh);
this.nextQueue.noticeURL.removeByURLHash(uh);
this.nextQueue.errorURL.remove(uh);
} catch (final MalformedURLException e1) {}
@ -255,7 +255,7 @@ public final class CrawlStacker {
}
final byte[] urlhash = url.hash();
if (replace) {
CrawlStacker.this.indexSegment.urlMetadata().remove(urlhash);
CrawlStacker.this.indexSegment.fulltext().remove(urlhash);
cq.noticeURL.removeByURLHash(urlhash);
cq.errorURL.remove(urlhash);
}
@ -437,7 +437,7 @@ public final class CrawlStacker {
// check if the url is double registered
final String dbocc = this.nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists
final URIMetadata oldEntry = this.indexSegment.urlMetadata().getMetadata(url.hash());
final URIMetadata oldEntry = this.indexSegment.fulltext().getMetadata(url.hash());
if (oldEntry == null) {
if (dbocc != null) {
// do double-check

@ -84,7 +84,7 @@ public class SitemapImporter extends Thread {
final String dbocc = this.sb.urlExists(nexturlhash);
if ((dbocc != null) && (dbocc.equalsIgnoreCase("loaded"))) {
// the url was already loaded. we need to check the date
final URIMetadata oldEntry = this.sb.index.urlMetadata().getMetadata(nexturlhash);
final URIMetadata oldEntry = this.sb.index.fulltext().getMetadata(nexturlhash);
if (oldEntry != null) {
final Date modDate = oldEntry.moddate();
// check if modDate is null

@ -82,7 +82,7 @@ public class YMarkMetadata {
public YMarkMetadata(final byte[] urlHash, final Segment indexSegment) {
this.document = null;
this.indexSegment = indexSegment;
this.uri = this.indexSegment.urlMetadata().getMetadata(urlHash).url();
this.uri = this.indexSegment.fulltext().getMetadata(urlHash).url();
}
public YMarkMetadata(final Document document) {
@ -106,7 +106,7 @@ public class YMarkMetadata {
public EnumMap<METADATA, String> getMetadata() {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
final URIMetadata urlEntry = this.indexSegment.urlMetadata().getMetadata(this.uri.hash());
final URIMetadata urlEntry = this.indexSegment.fulltext().getMetadata(this.uri.hash());
if (urlEntry != null) {
metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size()));
metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate()));

@ -754,7 +754,7 @@ public final class Protocol
// passed all checks, store url
try {
indexSegment.urlMetadata().putMetadata(urlEntry);
indexSegment.fulltext().putMetadata(urlEntry);
ResultURLs.stack(
urlEntry,
mySeed.hash.getBytes(),

@ -176,7 +176,7 @@ public class Transmission {
notFoundx.add(e.urlhash());
continue;
}
final URIMetadata r = Transmission.this.segment.urlMetadata().getMetadata(e.urlhash());
final URIMetadata r = Transmission.this.segment.fulltext().getMetadata(e.urlhash());
if (r == null) {
notFoundx.add(e.urlhash());
this.badReferences.put(e.urlhash());

@ -397,7 +397,7 @@ public final class Switchboard extends serverSwitch
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) {
this.index.connectUrlDb(this.useTailCache, this.exceed134217727);
this.index.urlMetadata().connectLocalSolr(connectWithinMs);
this.index.fulltext().connectLocalSolr(connectWithinMs);
}
// set up the solr interface
@ -411,7 +411,7 @@ public final class Switchboard extends serverSwitch
ShardSelection.Method.MODULO_HOST_MD5,
10000, true);
solr.setCommitWithinMs(connectWithinMs);
this.index.urlMetadata().connectRemoteSolr(solr);
this.index.fulltext().connectRemoteSolr(solr);
} catch ( final IOException e ) {
Log.logException(e);
}
@ -1133,7 +1133,7 @@ public final class Switchboard extends serverSwitch
synchronized ( this ) {
// remember the solr scheme
SolrConfiguration solrScheme = this.index.urlMetadata().getSolrScheme();
SolrConfiguration solrScheme = this.index.fulltext().getSolrScheme();
// shut down
this.crawler.close();
@ -1186,7 +1186,7 @@ public final class Switchboard extends serverSwitch
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) {
this.index.urlMetadata().connectLocalSolr(connectWithinMs);
this.index.fulltext().connectLocalSolr(connectWithinMs);
this.index.connectUrlDb(this.useTailCache, this.exceed134217727);
}
@ -1201,7 +1201,7 @@ public final class Switchboard extends serverSwitch
ShardSelection.Method.MODULO_HOST_MD5,
10000, true);
solr.setCommitWithinMs(connectWithinMs);
this.index.urlMetadata().connectRemoteSolr(solr);
this.index.fulltext().connectRemoteSolr(solr);
} catch ( final IOException e ) {
Log.logException(e);
}
@ -1436,14 +1436,14 @@ public final class Switchboard extends serverSwitch
// tests if hash occurrs in any database
// if it exists, the name of the database is returned,
// if it not exists, null is returned
if ( this.index.urlMetadata().exists(hash) ) {
if ( this.index.fulltext().exists(hash) ) {
return "loaded";
}
return this.crawlQueues.urlExists(hash);
}
public void urlRemove(final Segment segment, final byte[] hash) {
segment.urlMetadata().remove(hash);
segment.fulltext().remove(hash);
ResultURLs.remove(ASCII.String(hash));
this.crawlQueues.urlRemove(hash);
}
@ -1455,7 +1455,7 @@ public final class Switchboard extends serverSwitch
if ( urlhash.length == 0 ) {
return null;
}
final URIMetadata le = this.index.urlMetadata().getMetadata(urlhash);
final URIMetadata le = this.index.fulltext().getMetadata(urlhash);
if ( le != null ) {
return le.url();
}
@ -1885,7 +1885,7 @@ public final class Switchboard extends serverSwitch
// clear caches if necessary
if ( !MemoryControl.request(8000000L, false) ) {
sb.index.urlMetadata().clearCache();
sb.index.fulltext().clearCache();
SearchEventCache.cleanupEvents(false);
this.trail.clear();
}
@ -2932,7 +2932,7 @@ public final class Switchboard extends serverSwitch
return "no DHT distribution: not enabled (per setting)";
}
final Segment indexSegment = this.index;
int size = indexSegment.urlMetadata().size();
int size = indexSegment.fulltext().size();
if ( size < 10 ) {
return "no DHT distribution: loadedURL.size() = " + size;
}

@ -80,7 +80,7 @@ public class DocumentIndex extends Segment {
false, // useTailCache
false // exceed134217727
);
super.urlMetadata().connectLocalSolr(1000);
super.fulltext().connectLocalSolr(1000);
final int cores = Runtime.getRuntime().availableProcessors() + 1;
this.callback = callback;
this.queue = new LinkedBlockingQueue<DigestURI>(cores * 300);

@ -99,7 +99,7 @@ public class Segment {
private final Log log;
private final File segmentPath;
protected final Fulltext urlMetadata;
protected final Fulltext fulltext;
protected IndexCell<WordReference> termIndex;
protected IndexCell<CitationReference> urlCitationIndex;
@ -109,7 +109,7 @@ public class Segment {
this.segmentPath = segmentPath;
// create LURL-db
this.urlMetadata = new Fulltext(segmentPath, solrScheme);
this.fulltext = new Fulltext(segmentPath, solrScheme);
}
public boolean connectedRWI() {
@ -161,11 +161,11 @@ public class Segment {
}
public void connectUrlDb(final boolean useTailCache, final boolean exceed134217727) {
this.urlMetadata.connectUrlDb(UrlDbName, useTailCache, exceed134217727);
this.fulltext.connectUrlDb(UrlDbName, useTailCache, exceed134217727);
}
public Fulltext urlMetadata() {
return this.urlMetadata;
public Fulltext fulltext() {
return this.fulltext;
}
public IndexCell<WordReference> termIndex() {
@ -177,7 +177,7 @@ public class Segment {
}
public long URLCount() {
return this.urlMetadata.size();
return this.fulltext.size();
}
public long RWICount() {
@ -191,7 +191,7 @@ public class Segment {
}
public boolean exists(final byte[] urlhash) {
return this.urlMetadata.exists(urlhash);
return this.fulltext.exists(urlhash);
}
/**
@ -203,7 +203,7 @@ public class Segment {
private Iterator<byte[]> hostSelector(String host) {
String hh = DigestURI.hosthash(host);
final HandleSet ref = new RowHandleSet(12, Base64Order.enhancedCoder, 100);
for (byte[] b: this.urlMetadata) {
for (byte[] b: this.fulltext) {
if (hh.equals(ASCII.String(b, 6, 6))) {
try {
ref.putUnique(b);
@ -234,7 +234,7 @@ public class Segment {
}
@Override
public DigestURI next() {
URIMetadata umr = Segment.this.urlMetadata.getMetadata(bi.next());
URIMetadata umr = Segment.this.fulltext.getMetadata(bi.next());
return umr.url();
}
@Override
@ -260,7 +260,7 @@ public class Segment {
public void clear() {
try {
if (this.termIndex != null) this.termIndex.clear();
if (this.urlMetadata != null) this.urlMetadata.clear();
if (this.fulltext != null) this.fulltext.clear();
if (this.urlCitationIndex != null) this.urlCitationIndex.clear();
} catch (final IOException e) {
Log.logException(e);
@ -297,7 +297,7 @@ public class Segment {
public synchronized void close() {
if (this.termIndex != null) this.termIndex.close();
if (this.urlMetadata != null) this.urlMetadata.close();
if (this.fulltext != null) this.fulltext.close();
if (this.urlCitationIndex != null) this.urlCitationIndex.close();
}
@ -402,14 +402,14 @@ public class Segment {
// STORE TO SOLR
// we do not store the data in metadatadb any more if a solr is connected
if (this.urlMetadata.connectedSolr()) {
if (this.fulltext.connectedSolr()) {
try {
this.urlMetadata.putDocument(this.urlMetadata.getSolrScheme().yacy2solr(id, responseHeader, document, metadata));
this.fulltext.putDocument(this.fulltext.getSolrScheme().yacy2solr(id, responseHeader, document, metadata));
} catch ( final IOException e ) {
Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage());
}
} else {
this.urlMetadata.putMetadata(metadata);
this.fulltext.putMetadata(metadata);
}
final long storageEndTime = System.currentTimeMillis();
@ -514,7 +514,7 @@ public class Segment {
if (urlhash == null) return 0;
// determine the url string
final URIMetadata entry = urlMetadata().getMetadata(urlhash);
final URIMetadata entry = fulltext().getMetadata(urlhash);
if (entry == null) return 0;
if (entry.url() == null) return 0;
@ -523,7 +523,7 @@ public class Segment {
final Document document = Document.mergeDocuments(entry.url(), null, loader.loadDocuments(loader.request(entry.url(), true, false), cacheStrategy, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay));
if (document == null) {
// delete just the url entry
urlMetadata().remove(urlhash);
fulltext().remove(urlhash);
return 0;
}
// get the word set
@ -535,7 +535,7 @@ public class Segment {
if (words != null) count = termIndex().remove(Word.words2hashesHandles(words), urlhash);
// finally delete the url entry itself
urlMetadata().remove(urlhash);
fulltext().remove(urlhash);
return count;
} catch (final Parser.Failure e) {
return 0;

@ -209,7 +209,7 @@ public final class RWIProcess extends Thread
String solrQuery = RWIProcess.this.query.solrQuery();
try {
ReferenceContainer<WordReference> wr = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, null);
SolrDocumentList sdl = RWIProcess.this.query.getSegment().urlMetadata().getSolr().query(solrQuery, 0, 20);
SolrDocumentList sdl = RWIProcess.this.query.getSegment().fulltext().getSolr().query(solrQuery, 0, 20);
for (SolrDocument d : sdl) {
try {
URIMetadataNode md = new URIMetadataNode(d);
@ -663,7 +663,7 @@ public final class RWIProcess extends Thread
if ( obrwi == null ) {
return null; // all time was already wasted in takeRWI to get another element
}
final URIMetadata page = this.query.getSegment().urlMetadata().getMetadata(obrwi.getElement(), obrwi.getWeight());
final URIMetadata page = this.query.getSegment().fulltext().getMetadata(obrwi.getElement(), obrwi.getWeight());
if ( page == null ) {
try {
this.misses.putUnique(obrwi.getElement().urlhash());
@ -911,7 +911,7 @@ public final class RWIProcess extends Thread
continue;
}
urlhash = this.hostResolver.get(hosthash);
row = urlhash == null ? null : this.query.getSegment().urlMetadata().getMetadata(urlhash);
row = urlhash == null ? null : this.query.getSegment().fulltext().getMetadata(urlhash);
hostname = row == null ? null : row.url().getHost();
if ( hostname != null ) {
result.set(hostname, this.hostNavigator.get(hosthash));

@ -448,7 +448,7 @@ public class SnippetProcess {
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults;
this.shallrun = true;
this.metadata = SnippetProcess.this.rankingProcess.getQuery().getSegment().urlMetadata();
this.metadata = SnippetProcess.this.rankingProcess.getQuery().getSegment().fulltext();
}
@Override

@ -97,7 +97,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
} catch (IOException e) {
Log.logException(e);
}
indexSegment.urlMetadata().remove(urlentry.hash()); // clean up
indexSegment.fulltext().remove(urlentry.hash()); // clean up
throw new RuntimeException("index void");
}
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename;

Loading…
Cancel
Save