removed overhead by preventing generation of full search results when

only the url is requested
pull/1/head
Michael Peter Christen 12 years ago
parent a114bb23bb
commit 4eab3aae60

@ -378,10 +378,9 @@ public class IndexControlRWIs_p {
} catch ( final SpaceExceededException e ) {
Log.logException(e);
}
final URIMetadataNode e = segment.fulltext().getMetadata(b);
url = segment.fulltext().getURL(b);
segment.fulltext().remove(b);
if ( e != null ) {
url = e.url();
if ( url != null ) {
pw.println(url.getHost() + "/" + url.getFile());
for ( final String supportedBlacklistType : supportedBlacklistTypes ) {
if ( ListManager.listSetContains(
@ -413,10 +412,9 @@ public class IndexControlRWIs_p {
} catch ( final SpaceExceededException e ) {
Log.logException(e);
}
final URIMetadataNode e = segment.fulltext().getMetadata(b);
url = segment.fulltext().getURL(b);
segment.fulltext().remove(b);
if ( e != null ) {
url = e.url();
if ( url != null ) {
pw.println(url.getHost() + "/.*");
for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) {
if ( ListManager.listSetContains(

@ -175,11 +175,11 @@ public class IndexControlURLs_p {
}
if (post.containsKey("urlhashdelete")) {
final URIMetadataNode entry = segment.fulltext().getMetadata(ASCII.getBytes(urlhash));
if (entry == null) {
final DigestURI url = segment.fulltext().getURL(ASCII.getBytes(urlhash));
if (url == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
} else {
urlstring = entry.url().toNormalform(true);
urlstring = url.toNormalform(true);
prop.put("urlstring", "");
sb.urlRemove(segment, urlhash.getBytes());
prop.putHTML("result", "Removed URL " + urlstring);
@ -233,9 +233,9 @@ public class IndexControlURLs_p {
// generate list
if (post.containsKey("urlhashsimilar")) {
final Iterator<URIMetadataNode> entryIt = new RotateIterator<URIMetadataNode>(segment.fulltext().entries(), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), (int) segment.RWICount());
final Iterator<DigestURI> entryIt = new RotateIterator<DigestURI>(segment.fulltext().urls(), ASCII.String(Base64Order.zero((urlhash == null ? 0 : urlhash.length()))), (int) segment.RWICount());
final StringBuilder result = new StringBuilder("Sequential List of URL-Hashes:<br />");
URIMetadataNode entry;
DigestURI entry;
int i = 0, rows = 0, cols = 0;
prop.put("urlhashsimilar", "1");
while (entryIt.hasNext() && i < 256) {

@ -33,7 +33,7 @@ public class add_ymark {
if(post.containsKey("urlHash")) {
final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING);
final DigestURI url = sb.index.fulltext().getMetadata(urlHash.getBytes()).url();
final DigestURI url = sb.index.fulltext().getURL(urlHash.getBytes());
final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.BOOKMARK.FOLDERS.deflt());
final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING);
try {

@ -115,7 +115,7 @@ public class searchresult {
post.put(CommonParams.ROWS, post.remove("num"));
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100));
post.put("defType", "edismax");
post.put("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^200.0"); // a bost query that moves double content to the back
post.put("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^100000.0"); // a bost query that moves double content to the back
post.put(CommonParams.FL,
YaCySchema.content_type.getSolrFieldName() + ',' +
YaCySchema.id.getSolrFieldName() + ',' +

@ -677,12 +677,12 @@ public class yacysearch {
return prop;
}
final String bookmarkHash = post.get("bookmarkref", ""); // urlhash
final URIMetadataNode urlentry = indexSegment.fulltext().getMetadata(UTF8.getBytes(bookmarkHash));
if ( urlentry != null ) {
final DigestURI url = indexSegment.fulltext().getURL(UTF8.getBytes(bookmarkHash));
if ( url != null ) {
try {
sb.tables.bookmarks.createBookmark(
sb.loader,
urlentry.url(),
url,
YMarkTables.USER_ADMIN,
true,
"searchresult",

@ -82,7 +82,7 @@ public class YMarkMetadata {
public YMarkMetadata(final byte[] urlHash, final Segment indexSegment) {
this.document = null;
this.indexSegment = indexSegment;
this.uri = this.indexSegment.fulltext().getMetadata(urlHash).url();
this.uri = this.indexSegment.fulltext().getURL(urlHash);
}
public YMarkMetadata(final Document document) {

@ -1521,16 +1521,10 @@ public final class Switchboard extends serverSwitch {
}
public DigestURI getURL(final byte[] urlhash) {
if ( urlhash == null ) {
return null;
}
if ( urlhash.length == 0 ) {
return null;
}
final URIMetadataNode le = this.index.fulltext().getMetadata(urlhash);
if ( le != null ) {
return le.url();
}
if (urlhash == null) return null;
if (urlhash.length == 0) return null;
final DigestURI url = this.index.fulltext().getURL(urlhash);
if (url != null) return url;
return this.crawlQueues.getURL(urlhash);
}

@ -227,13 +227,26 @@ public final class Fulltext implements Iterable<byte[]> {
Date now = new Date();
return x.after(now) ? now : x;
}
public DigestURI getURL(final byte[] urlHash) {
if (urlHash == null) return null;
SolrDocument doc;
try {
doc = this.solr.getById(ASCII.String(urlHash), YaCySchema.sku.getSolrFieldName());
} catch (IOException e) {
return null;
}
if (doc == null) return null;
String x = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName());
if (x == null) return null;
try {
DigestURI uri = new DigestURI(x, urlHash);
return uri;
} catch (MalformedURLException e) {
return null;
}
}
/**
* generates an plasmaLURLEntry using the url hash
* if the url cannot be found, this returns null
* @param obrwi
* @return
*/
public URIMetadataNode getMetadata(WordReference wre, long weight) {
if (wre == null) return null; // all time was already wasted in takeRWI to get another element
return getMetadata(wre.urlhash(), wre, weight);
@ -243,7 +256,7 @@ public final class Fulltext implements Iterable<byte[]> {
if (urlHash == null) return null;
return getMetadata(urlHash, null, 0);
}
private URIMetadataNode getMetadata(final byte[] urlHash, WordReference wre, long weight) {
// get the metadata from Solr
@ -519,9 +532,37 @@ public final class Fulltext implements Iterable<byte[]> {
true);
}
public CloneableIterator<DigestURI> urls() {
// enumerates entry elements
final Iterator<byte[]> ids = iterator();
return new CloneableIterator<DigestURI>() {
@Override
public CloneableIterator<DigestURI> clone(final Object secondHash) {
return this;
}
@Override
public final boolean hasNext() {
return ids.hasNext();
}
@Override
public final DigestURI next() {
byte[] id = ids.next();
if (id == null) return null;
return getURL(id);
}
@Override
public final void remove() {
ids.remove();
}
@Override
public void close() {
}
};
}
public CloneableIterator<URIMetadataNode> entries() {
// enumerates entry elements
final Iterator<byte[]> ids = iterator();
final Iterator<byte[]> ids = iterator();
return new CloneableIterator<URIMetadataNode>() {
@Override
public CloneableIterator<URIMetadataNode> clone(final Object secondHash) {
@ -783,15 +824,15 @@ public final class Fulltext implements Iterable<byte[]> {
// collect hashes from all domains
// fetch urls from the database to determine the host in clear text
URIMetadataNode urlref;
DigestURI url;
if (count < 0 || count > domainSamples.size()) count = domainSamples.size();
this.statsDump = new ArrayList<HostStat>();
final TreeSet<String> set = new TreeSet<String>();
for (final URLHashCounter hs: domainSamples.values()) {
if (hs == null) continue;
urlref = this.getMetadata(hs.urlhashb);
if (urlref == null || urlref.url() == null || urlref.url().getHost() == null) continue;
set.add(urlref.url().getHost());
url = this.getURL(hs.urlhashb);
if (url == null || url.getHost() == null) continue;
set.add(url.getHost());
count--;
if (count == 0) break;
}
@ -820,7 +861,6 @@ public final class Fulltext implements Iterable<byte[]> {
*/
public Map<String, HostStat> domainHashResolver(final Map<String, URLHashCounter> domainSamples) {
final HashMap<String, HostStat> hostMap = new HashMap<String, HostStat>();
URIMetadataNode urlref;
final ScoreMap<String> hosthashScore = new ConcurrentScoreMap<String>();
for (final Map.Entry<String, URLHashCounter> e: domainSamples.entrySet()) {
@ -828,8 +868,7 @@ public final class Fulltext implements Iterable<byte[]> {
}
DigestURI url;
for (final Map.Entry<String, URLHashCounter> e: domainSamples.entrySet()) {
urlref = this.getMetadata(e.getValue().urlhashb);
url = urlref.url();
url = this.getURL(e.getValue().urlhashb);
hostMap.put(e.getKey(), new HostStat(url.getHost(), url.getPort(), e.getKey(), hosthashScore.get(e.getKey())));
}
return hostMap;
@ -841,7 +880,6 @@ public final class Fulltext implements Iterable<byte[]> {
// fetch urls from the database to determine the host in clear text
final Iterator<String> j = domainScore.keys(false); // iterate urlhash-examples in reverse order (biggest first)
URIMetadataNode urlref;
String urlhash;
count += 10; // make some more to prevent that we have to do this again after deletions too soon.
if (count < 0 || domainScore.sizeSmaller(count)) count = domainScore.size();
@ -850,10 +888,9 @@ public final class Fulltext implements Iterable<byte[]> {
while (j.hasNext()) {
urlhash = j.next();
if (urlhash == null) continue;
urlref = this.getMetadata(ASCII.getBytes(urlhash));
if (urlref == null || urlref.url() == null || urlref.url().getHost() == null) continue;
url = this.getURL(ASCII.getBytes(urlhash));
if (url == null || url.getHost() == null) continue;
if (this.statsDump == null) return new ArrayList<HostStat>().iterator(); // some other operation has destroyed the object
url = urlref.url();
this.statsDump.add(new HostStat(url.getHost(), url.getPort(), urlhash.substring(6), domainScore.get(urlhash)));
count--;
if (count == 0) break;

@ -58,7 +58,6 @@ import net.yacy.document.Parser;
import net.yacy.kelondro.data.citation.CitationReference;
import net.yacy.kelondro.data.citation.CitationReferenceFactory;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory;
@ -235,7 +234,7 @@ public class Segment {
return null;
}
if (id == null || id == AbstractSolrConnector.POISON_ID) return null;
DigestURI u = Segment.this.fulltext.getMetadata(ASCII.getBytes(id)).url();
DigestURI u = Segment.this.fulltext.getURL(ASCII.getBytes(id));
if (u.toNormalform(true).startsWith(urlstub)) return u;
}
}
@ -508,13 +507,12 @@ public class Segment {
if (urlhash == null) return 0;
// determine the url string
final URIMetadataNode entry = fulltext().getMetadata(urlhash);
if (entry == null) return 0;
if (entry.url() == null) return 0;
final DigestURI url = fulltext().getURL(urlhash);
if (url == null) return 0;
try {
// parse the resource
final Document document = Document.mergeDocuments(entry.url(), null, loader.loadDocuments(loader.request(entry.url(), true, false), cacheStrategy, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay));
final Document document = Document.mergeDocuments(url, null, loader.loadDocuments(loader.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay));
if (document == null) {
// delete just the url entry
fulltext().remove(urlhash);

@ -429,6 +429,8 @@ public final class QueryParams {
// construct query
final SolrQuery params = new SolrQuery();
params.setParam("defType", "edismax");
params.setParam("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^100000.0"); // a bost query that moves double content to the back
params.setStart(this.offset);
params.setRows(this.itemsPerPage);
params.setFacet(false);

@ -51,7 +51,7 @@ import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
@ -411,7 +411,7 @@ public final class RankingProcess extends Thread {
final ScoreMap<String> result = new ConcurrentScoreMap<String>();
final Iterator<String> domhashs = this.hostHashNavigator.keys(false);
URIMetadataNode row;
DigestURI url;
byte[] urlhash;
String hosthash, hostname;
if ( this.hostHashResolver != null ) {
@ -421,8 +421,8 @@ public final class RankingProcess extends Thread {
continue;
}
urlhash = this.hostHashResolver.get(hosthash);
row = urlhash == null ? null : this.query.getSegment().fulltext().getMetadata(urlhash);
hostname = row == null ? null : row.url().getHost();
url = urlhash == null ? null : this.query.getSegment().fulltext().getURL(urlhash);
hostname = url == null ? null : url.getHost();
if ( hostname != null ) {
result.set(hostname, this.hostHashNavigator.get(hosthash));
}

Loading…
Cancel
Save