- refactoring (load -> getMetadata)

- added getDocument to retrieve Solr documents which shall replace
getMetadata
pull/1/head
Michael Peter Christen 12 years ago
parent 395b78a0d8
commit 18f989dfb1

@ -195,7 +195,7 @@ public class Bookmarks {
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash);
if (bookmark == null) {
// try to get the bookmark from the LURL database
final URIMetadata urlentry = sb.index.urlMetadata().load(ASCII.getBytes(urlHash));
final URIMetadata urlentry = sb.index.urlMetadata().getMetadata(ASCII.getBytes(urlHash));
if (urlentry != null) try {
final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, null, TextSnippet.snippetMinLoadDelay));
prop.put("mode_edit", "0"); // create mode

@ -186,7 +186,7 @@ public class CrawlResults {
while (i.hasNext()) {
entry = i.next();
try {
urle = sb.index.urlMetadata().load(UTF8.getBytes(entry.getKey()));
urle = sb.index.urlMetadata().getMetadata(UTF8.getBytes(entry.getKey()));
if (urle == null) {
Log.logWarning("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey());
urlstr = null;

@ -320,7 +320,7 @@ public class IndexControlRWIs_p {
URIMetadata lurl;
while (urlIter.hasNext()) {
iEntry = urlIter.next();
lurl = segment.urlMetadata().load(iEntry.urlhash());
lurl = segment.urlMetadata().getMetadata(iEntry.urlhash());
if (lurl == null) {
try {
unknownURLEntries.put(iEntry.urlhash());
@ -415,7 +415,7 @@ public class IndexControlRWIs_p {
} catch ( final SpaceExceededException e ) {
Log.logException(e);
}
final URIMetadata e = segment.urlMetadata().load(b);
final URIMetadata e = segment.urlMetadata().getMetadata(b);
segment.urlMetadata().remove(b);
if ( e != null ) {
url = e.url();
@ -450,7 +450,7 @@ public class IndexControlRWIs_p {
} catch ( final SpaceExceededException e ) {
Log.logException(e);
}
final URIMetadata e = segment.urlMetadata().load(b);
final URIMetadata e = segment.urlMetadata().getMetadata(b);
segment.urlMetadata().remove(b);
if ( e != null ) {
url = e.url();

@ -132,7 +132,7 @@ public class IndexControlURLs_p {
}
if (post.containsKey("urlhashdelete")) {
final URIMetadata entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
final URIMetadata entry = segment.urlMetadata().getMetadata(ASCII.getBytes(urlhash));
if (entry == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
} else {
@ -166,7 +166,7 @@ public class IndexControlURLs_p {
final DigestURI url = new DigestURI(urlstring);
urlhash = ASCII.String(url.hash());
prop.put("urlhash", urlhash);
final URIMetadata entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
final URIMetadata entry = segment.urlMetadata().getMetadata(ASCII.getBytes(urlhash));
if (entry == null) {
prop.putHTML("result", "No Entry for URL " + url.toNormalform(true, true));
prop.putHTML("urlstring", urlstring);
@ -184,7 +184,7 @@ public class IndexControlURLs_p {
}
if (post.containsKey("urlhashsearch")) {
final URIMetadata entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
final URIMetadata entry = segment.urlMetadata().getMetadata(ASCII.getBytes(urlhash));
if (entry == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash);
} else {
@ -310,7 +310,7 @@ public class IndexControlURLs_p {
prop.put("genUrlProfile_urlhash", urlhash);
return prop;
}
final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash());
final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().getMetadata(entry.referrerHash());
if (entry.url() == null) {
prop.put("genUrlProfile", "1");
prop.put("genUrlProfile_urlhash", urlhash);

@ -138,7 +138,7 @@ public class ViewFile {
// get the urlEntry that belongs to the url hash
//boolean ue = urlHash.length() > 0 && indexSegment.exists(ASCII.getBytes(urlHash));
//if (ue) Log.logInfo("ViewFile", "exists(" + urlHash + ")");
if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().load(ASCII.getBytes(urlHash))) != null) {
if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().getMetadata(ASCII.getBytes(urlHash))) != null) {
// get the url that belongs to the entry
if (urlEntry == null || urlEntry.url() == null) {
prop.put("error", "3");

@ -86,12 +86,12 @@ public class Vocabulary_p {
if (p >= 0) t = t.substring(p + 1);
}
if (discoverFromTitle || discoverFromTitleSplitted) {
URIMetadata m = segment.urlMetadata().load(u.hash());
URIMetadata m = segment.urlMetadata().getMetadata(u.hash());
if (m != null) t = m.dc_title();
if (t.endsWith(".jpg") || t.endsWith(".gif")) continue;
}
if (discoverFromAuthor) {
URIMetadata m = segment.urlMetadata().load(u.hash());
URIMetadata m = segment.urlMetadata().getMetadata(u.hash());
if (m != null) t = m.dc_creator();
}
t = t.replaceAll("_", " ").replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim();

@ -97,13 +97,13 @@ public class yacydoc {
}
if (urlhash == null || urlhash.isEmpty()) return prop;
final URIMetadata entry = segment.urlMetadata().load(urlhash.getBytes());
final URIMetadata entry = segment.urlMetadata().getMetadata(urlhash.getBytes());
if (entry == null) return prop;
if (entry.url() == null) {
return prop;
}
final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash());
final URIMetadata le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().getMetadata(entry.referrerHash());
prop.putXML("dc_title", entry.dc_title());
prop.putXML("dc_creator", entry.dc_creator());

@ -34,7 +34,7 @@ public class add_ymark {
if(post.containsKey("urlHash")) {
final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING);
final DigestURI url = sb.index.urlMetadata().load(urlHash.getBytes()).url();
final DigestURI url = sb.index.urlMetadata().getMetadata(urlHash.getBytes()).url();
final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt());
final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING);
try {

@ -147,7 +147,7 @@ public final class crawlReceipt {
if ("fill".equals(result)) try {
// put new entry into database
sb.index.urlMetadata().store(entry);
sb.index.urlMetadata().putMetadata(entry);
ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true));

@ -141,7 +141,7 @@ public final class transferURL {
// write entry to database
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false));
try {
sb.index.urlMetadata().store(lEntry);
sb.index.urlMetadata().putMetadata(lEntry);
ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
received++;

@ -113,7 +113,7 @@ public class urls {
URIMetadata entry;
DigestURI referrer;
for (int i = 0; i < count; i++) {
entry = sb.index.urlMetadata().load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
entry = sb.index.urlMetadata().getMetadata(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
if (entry == null) continue;
// find referrer, if there is one
referrer = sb.getURL(entry.referrerHash());

@ -660,7 +660,7 @@ public class yacysearch {
return prop;
}
final String recommendHash = post.get("recommendref", ""); // urlhash
final URIMetadata urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(recommendHash));
final URIMetadata urlentry = indexSegment.urlMetadata().getMetadata(UTF8.getBytes(recommendHash));
if ( urlentry != null ) {
Document[] documents = null;
try {
@ -696,7 +696,7 @@ public class yacysearch {
return prop;
}
final String bookmarkHash = post.get("bookmarkref", ""); // urlhash
final URIMetadata urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(bookmarkHash));
final URIMetadata urlentry = indexSegment.urlMetadata().getMetadata(UTF8.getBytes(bookmarkHash));
if ( urlentry != null ) {
try {
sb.tables.bookmarks.createBookmark(

@ -437,7 +437,7 @@ public final class CrawlStacker {
// check if the url is double registered
final String dbocc = this.nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists
final URIMetadata oldEntry = this.indexSegment.urlMetadata().load(url.hash());
final URIMetadata oldEntry = this.indexSegment.urlMetadata().getMetadata(url.hash());
if (oldEntry == null) {
if (dbocc != null) {
// do double-check

@ -84,7 +84,7 @@ public class SitemapImporter extends Thread {
final String dbocc = this.sb.urlExists(nexturlhash);
if ((dbocc != null) && (dbocc.equalsIgnoreCase("loaded"))) {
// the url was already loaded. we need to check the date
final URIMetadata oldEntry = this.sb.index.urlMetadata().load(nexturlhash);
final URIMetadata oldEntry = this.sb.index.urlMetadata().getMetadata(nexturlhash);
if (oldEntry != null) {
final Date modDate = oldEntry.moddate();
// check if modDate is null

@ -82,7 +82,7 @@ public class YMarkMetadata {
public YMarkMetadata(final byte[] urlHash, final Segment indexSegment) {
this.document = null;
this.indexSegment = indexSegment;
this.uri = this.indexSegment.urlMetadata().load(urlHash).url();
this.uri = this.indexSegment.urlMetadata().getMetadata(urlHash).url();
}
public YMarkMetadata(final Document document) {
@ -106,7 +106,7 @@ public class YMarkMetadata {
public EnumMap<METADATA, String> getMetadata() {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
final URIMetadata urlEntry = this.indexSegment.urlMetadata().load(this.uri.hash());
final URIMetadata urlEntry = this.indexSegment.urlMetadata().getMetadata(this.uri.hash());
if (urlEntry != null) {
metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size()));
metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate()));

@ -754,7 +754,7 @@ public final class Protocol
// passed all checks, store url
try {
indexSegment.urlMetadata().store(urlEntry);
indexSegment.urlMetadata().putMetadata(urlEntry);
ResultURLs.stack(
urlEntry,
mySeed.hash.getBytes(),

@ -176,7 +176,7 @@ public class Transmission {
notFoundx.add(e.urlhash());
continue;
}
final URIMetadata r = Transmission.this.segment.urlMetadata().load(e.urlhash());
final URIMetadata r = Transmission.this.segment.urlMetadata().getMetadata(e.urlhash());
if (r == null) {
notFoundx.add(e.urlhash());
this.badReferences.put(e.urlhash());

@ -1455,7 +1455,7 @@ public final class Switchboard extends serverSwitch
if ( urlhash.length == 0 ) {
return null;
}
final URIMetadata le = this.index.urlMetadata().load(urlhash);
final URIMetadata le = this.index.urlMetadata().getMetadata(urlhash);
if ( le != null ) {
return le.url();
}

@ -62,6 +62,7 @@ import net.yacy.search.Switchboard;
import net.yacy.search.solr.EmbeddedSolrConnector;
import org.apache.lucene.util.Version;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
@ -197,17 +198,17 @@ public final class MetadataRepository implements Iterable<byte[]> {
* @param obrwi
* @return
*/
public URIMetadata load(WordReference wre, long weight) {
public URIMetadata getMetadata(WordReference wre, long weight) {
if (wre == null) return null; // all time was already wasted in takeRWI to get another element
return load(wre.urlhash(), wre, weight);
return getMetadata(wre.urlhash(), wre, weight);
}
public URIMetadata load(final byte[] urlHash) {
public URIMetadata getMetadata(final byte[] urlHash) {
if (urlHash == null) return null;
return load(urlHash, null, 0);
return getMetadata(urlHash, null, 0);
}
private URIMetadata load(final byte[] urlHash, WordReference wre, long weight) {
private URIMetadata getMetadata(final byte[] urlHash, WordReference wre, long weight) {
// get the metadata from Solr
try {
@ -228,7 +229,39 @@ public final class MetadataRepository implements Iterable<byte[]> {
return null;
}
public void store(final URIMetadata entry) throws IOException {
public SolrDocument getDocument(WordReference wre, long weight) {
if (wre == null) return null; // all time was already wasted in takeRWI to get another element
return getDocument(wre.urlhash(), wre, weight);
}
public SolrDocument getDocument(final byte[] urlHash) {
if (urlHash == null) return null;
return getDocument(urlHash, null, 0);
}
private SolrDocument getDocument(final byte[] urlHash, WordReference wre, long weight) {
// get the document from Solr
try {
SolrDocument doc = this.solr.get(ASCII.String(urlHash));
if (doc != null) return doc;
} catch (IOException e) {
Log.logException(e);
}
// get the document from the old metadata index
if (this.urlIndexFile != null) try {
final Row.Entry entry = this.urlIndexFile.get(urlHash, false);
if (entry == null) return null;
return ClientUtils.toSolrDocument(getSolrScheme().metadata2solr(new URIMetadataRow(entry, wre, weight)));
} catch (final IOException e) {
Log.logException(e);
}
return null;
}
public void putMetadata(final URIMetadata entry) throws IOException {
if (this.connectedSolr()) {
try {
SolrDocument sd = getSolr().get(ASCII.String(entry.url().hash()));
@ -530,7 +563,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
final TreeSet<String> set = new TreeSet<String>();
for (final URLHashCounter hs: domainSamples.values()) {
if (hs == null) continue;
urlref = this.load(hs.urlhashb);
urlref = this.getMetadata(hs.urlhashb);
if (urlref == null || urlref.url() == null || urlref.url().getHost() == null) continue;
set.add(urlref.url().getHost());
count--;
@ -569,7 +602,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
}
DigestURI url;
for (final Map.Entry<String, URLHashCounter> e: domainSamples.entrySet()) {
urlref = this.load(e.getValue().urlhashb);
urlref = this.getMetadata(e.getValue().urlhashb);
url = urlref.url();
hostMap.put(e.getKey(), new HostStat(url.getHost(), url.getPort(), e.getKey(), hosthashScore.get(e.getKey())));
}
@ -591,7 +624,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
while (j.hasNext()) {
urlhash = j.next();
if (urlhash == null) continue;
urlref = this.load(ASCII.getBytes(urlhash));
urlref = this.getMetadata(ASCII.getBytes(urlhash));
if (urlref == null || urlref.url() == null || urlref.url().getHost() == null) continue;
if (this.statsDump == null) return new ArrayList<HostStat>().iterator(); // some other operation has destroyed the object
url = urlref.url();

@ -287,7 +287,7 @@ public class Segment {
}
@Override
public DigestURI next() {
URIMetadata umr = Segment.this.urlMetadata.load(bi.next());
URIMetadata umr = Segment.this.urlMetadata.getMetadata(bi.next());
return umr.url();
}
@Override
@ -463,7 +463,7 @@ public class Segment {
Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage());
}
} else {
this.urlMetadata.store(metadata);
this.urlMetadata.putMetadata(metadata);
}
final long storageEndTime = System.currentTimeMillis();
@ -568,7 +568,7 @@ public class Segment {
if (urlhash == null) return 0;
// determine the url string
final URIMetadata entry = urlMetadata().load(urlhash);
final URIMetadata entry = urlMetadata().getMetadata(urlhash);
if (entry == null) return 0;
if (entry.url() == null) return 0;

@ -658,7 +658,7 @@ public final class RWIProcess extends Thread
if ( obrwi == null ) {
return null; // all time was already wasted in takeRWI to get another element
}
final URIMetadata page = this.query.getSegment().urlMetadata().load(obrwi.getElement(), obrwi.getWeight());
final URIMetadata page = this.query.getSegment().urlMetadata().getMetadata(obrwi.getElement(), obrwi.getWeight());
if ( page == null ) {
try {
this.misses.putUnique(obrwi.getElement().urlhash());
@ -906,7 +906,7 @@ public final class RWIProcess extends Thread
continue;
}
urlhash = this.hostResolver.get(hosthash);
row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash);
row = urlhash == null ? null : this.query.getSegment().urlMetadata().getMetadata(urlhash);
hostname = row == null ? null : row.url().getHost();
if ( hostname != null ) {
result.set(hostname, this.hostNavigator.get(hosthash));

Loading…
Cancel
Save