Eleminate duplication of values for search ResultEntry

by instatiation from URIMetadataNode, by eleminating differentiation of ResultEntry/URIMetadataNode.
- moved remaining ResultEntry functionallity to URIMetadataNode
   - for 1:1 functionallity added a function makeResultEntry() 
- removed ResultEntry 
- refactored related code

Main difference is after makeResultEntry the text_t content is removed and alternative title/url strings for display are calculated.


Main difference left is, that
pull/8/head
reger 10 years ago
parent 29c4aa3991
commit 000dde9511

@ -52,6 +52,7 @@ import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ByteBuffer; import net.yacy.cora.util.ByteBuffer;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.gui.Audio; import net.yacy.gui.Audio;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.data.word.WordReferenceFactory;
import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceRow;
@ -77,7 +78,6 @@ import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
import net.yacy.search.query.SearchEventType; import net.yacy.search.query.SearchEventType;
import net.yacy.search.ranking.RankingProfile; import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ResultEntry;
import net.yacy.server.serverCore; import net.yacy.server.serverCore;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
@ -221,7 +221,7 @@ public final class search {
int indexabstractContainercount = 0; int indexabstractContainercount = 0;
QueryParams theQuery = null; QueryParams theQuery = null;
SearchEvent theSearch = null; SearchEvent theSearch = null;
ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null; ArrayList<WeakPriorityBlockingQueue.Element<URIMetadataNode>> accu = null;
if (query.isEmpty() && abstractSet != null) { if (query.isEmpty() && abstractSet != null) {
// this is _not_ a normal search, only a request for index abstracts // this is _not_ a normal search, only a request for index abstracts
final Segment indexSegment = sb.index; final Segment indexSegment = sb.index;
@ -413,7 +413,7 @@ public final class search {
final long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
final StringBuilder links = new StringBuilder(6000); final StringBuilder links = new StringBuilder(6000);
String resource = null; String resource = null;
WeakPriorityBlockingQueue.Element<ResultEntry> entry; WeakPriorityBlockingQueue.Element<URIMetadataNode> entry;
for (int i = 0; i < accu.size(); i++) { for (int i = 0; i < accu.size(); i++) {
entry = accu.get(i); entry = accu.get(i);
resource = entry.getElement().resource(); resource = entry.getElement().resource();

@ -28,7 +28,6 @@ import java.net.MalformedURLException;
import java.util.Collection; import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.date.ISO8601Formatter;
@ -44,6 +43,7 @@ import net.yacy.cora.protocol.RequestHeader.FileType;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.Memory; import net.yacy.cora.util.Memory;
import net.yacy.crawler.data.Cache; import net.yacy.crawler.data.Cache;
import net.yacy.crawler.retrieval.Response;
import net.yacy.data.URLLicense; import net.yacy.data.URLLicense;
import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.util.Formatter; import net.yacy.kelondro.util.Formatter;
@ -58,7 +58,6 @@ import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
import net.yacy.search.query.SearchEventType; import net.yacy.search.query.SearchEventType;
import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet; import net.yacy.search.snippet.TextSnippet;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
@ -115,7 +114,7 @@ public class yacysearchitem {
// text search // text search
// generate result object // generate result object
final ResultEntry result = theSearch.oneResult(item, timeout); final URIMetadataNode result = theSearch.oneResult(item, timeout);
if (result == null) return prop; // no content if (result == null) return prop; // no content
final String resultUrlstring = result.urlstring(); final String resultUrlstring = result.urlstring();
final DigestURL resultURL = result.url(); final DigestURL resultURL = result.url();
@ -218,13 +217,11 @@ public class yacysearchitem {
prop.put("content_showProxy_link", resultUrlstring); prop.put("content_showProxy_link", resultUrlstring);
prop.put("content_showHostBrowser_link", resultUrlstring); prop.put("content_showHostBrowser_link", resultUrlstring);
if (sb.getConfigBool("search.result.show.vocabulary", true)) { if (sb.getConfigBool("search.result.show.vocabulary", true)) {
URIMetadataNode node = result;
int c = 0; int c = 0;
for (Map.Entry<String, Object> entry: node.entrySet()) { for (String key: result.getFieldNames()) {
String key = entry.getKey();
if (key.startsWith("vocabulary_") && key.endsWith("_sxt")) { if (key.startsWith("vocabulary_") && key.endsWith("_sxt")) {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
Collection<String> terms = (Collection<String>) entry.getValue(); Collection<Object> terms = result.getFieldValues(key);
prop.putHTML("content_showVocabulary_vocabulary_" + c + "_name", key.substring(11, key.length() - 4)); prop.putHTML("content_showVocabulary_vocabulary_" + c + "_name", key.substring(11, key.length() - 4));
prop.putHTML("content_showVocabulary_vocabulary_" + c + "_terms", terms.toString()); prop.putHTML("content_showVocabulary_vocabulary_" + c + "_terms", terms.toString());
c++; c++;
@ -276,8 +273,7 @@ public class yacysearchitem {
prop.put("content_heuristic_name", heuristic.heuristicName); prop.put("content_heuristic_name", heuristic.heuristicName);
} }
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theSearch.query.id(true), SearchEventType.FINALIZATION, "" + item, 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theSearch.query.id(true), SearchEventType.FINALIZATION, "" + item, 0, 0), false);
final String ext = MultiProtocolURL.getFileExtension(resultFileName); if (result.doctype() == Response.DT_IMAGE) {
if (MultiProtocolURL.isImage(ext)) {
final String license = URLLicense.aquireLicense(resultURL); final String license = URLLicense.aquireLicense(resultURL);
prop.put("content_code", license); prop.put("content_code", license);
} else { } else {
@ -343,7 +339,7 @@ public class yacysearchitem {
// any other media content // any other media content
// generate result object // generate result object
final ResultEntry ms = theSearch.oneResult(item, timeout); final URIMetadataNode ms = theSearch.oneResult(item, timeout);
prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content
if (ms == null) { if (ms == null) {
prop.put("content_item", "0"); prop.put("content_item", "0");

@ -22,6 +22,7 @@
package net.yacy.kelondro.data.meta; package net.yacy.kelondro.data.meta;
import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
@ -39,6 +40,7 @@ import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain; import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.SolrType; import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.lod.vocabulary.Tagging; import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
@ -46,14 +48,20 @@ import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.document.SentenceReader; import net.yacy.document.SentenceReader;
import net.yacy.document.parser.pdfParser;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.util.Bitfield; import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.MapTools; import net.yacy.kelondro.util.MapTools;
import net.yacy.kelondro.util.kelondroException; import net.yacy.kelondro.util.kelondroException;
import net.yacy.peers.Seed;
import net.yacy.peers.SeedDB;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.utils.crypt; import net.yacy.utils.crypt;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
@ -64,7 +72,7 @@ import org.apache.solr.common.SolrDocument;
* The purpose of this object is the migration from the old metadata structure to solr document. * The purpose of this object is the migration from the old metadata structure to solr document.
* Future implementations should try to replace URIMetadata objects completely by SolrDocument objects * Future implementations should try to replace URIMetadata objects completely by SolrDocument objects
*/ */
public class URIMetadataNode extends SolrDocument { public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMetadataNode>, Comparator<URIMetadataNode> */ {
private static final long serialVersionUID = -256046934741561968L; private static final long serialVersionUID = -256046934741561968L;
@ -77,6 +85,11 @@ public class URIMetadataNode extends SolrDocument {
protected String snippet = null; protected String snippet = null;
protected WordReferenceVars word = null; // this is only used if the url is transported via remote search requests protected WordReferenceVars word = null; // this is only used if the url is transported via remote search requests
// fields for search results (implemented from ResultEntry)
private String alternative_urlstring;
private String alternative_urlname;
private TextSnippet textSnippet = null;
public URIMetadataNode(final Properties prop, String collection) { public URIMetadataNode(final Properties prop, String collection) {
// generates an plasmaLURLEntry using the properties from the argument // generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString // the property names must correspond to the one from toString
@ -662,4 +675,145 @@ public class URIMetadataNode extends SolrDocument {
return a; return a;
} }
// --- implementation for use as search result ----------
/**
* Initialisize some variables only needed for search results
* and eleminates underlaying fields not needed for search results
*
* ! never put this back to the index because of the reduced content fields
* @param indexSegment
* @param peers
* @param textSnippet
* @return
*/
public URIMetadataNode makeResultEntry(
final Segment indexSegment,
SeedDB peers,
final TextSnippet textSnippet) {
this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
//this.indexSegment = indexSegment;
this.alternative_urlstring = null;
this.alternative_urlname = null;
this.textSnippet = textSnippet;
final String host = this.url().getHost();
if (host != null && host.endsWith(".yacyh")) {
// translate host into current IP
int p = host.indexOf('.');
final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
final Seed seed = peers.getConnected(hash);
final String path = this.url().getFile();
String address = null;
if ((seed == null) || ((address = seed.getPublicAddress(seed.getIP())) == null)) {
// seed is not known from here
try {
if (indexSegment.termIndex() != null) indexSegment.termIndex().remove(
Word.words2hashesHandles(Condenser.getWords(
("yacyshare " +
path.replace('?', ' ') +
" " +
this.dc_title()), null).keySet()),
this.hash());
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
indexSegment.fulltext().remove(this.hash()); // clean up
throw new RuntimeException("index void");
}
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path;
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + path;
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
}
return this;
}
/**
* used for search result entry
*/
public String urlstring() {
if (this.alternative_urlstring != null) return this.alternative_urlstring;
if (!pdfParser.individualPages) return this.url().toNormalform(true);
if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
// for pdf links we rewrite the url
// this is a special treatment of pdf files which can be splitted into subpages
String pageprop = pdfParser.individualPagePropertyname;
String resultUrlstring = this.url().toNormalform(true);
int p = resultUrlstring.lastIndexOf(pageprop + "=");
if (p > 0) {
return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1);
}
return resultUrlstring;
}
/**
* used for search result entry
*/
public String urlname() {
return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname;
}
/**
* used for search result entry
*/
public String title() {
String titlestr = this.dc_title();
// if title is empty use filename as title
if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
titlestr = this.url() != null ? this.url().getFileName() : "";
}
return titlestr;
}
/**
* used for search result entry
*/
public TextSnippet textSnippet() {
return this.textSnippet;
}
/**
* used for search result entry
*/
public Date[] events() {
return this.datesInContent();
}
/**
* used for search result entry
*/
public boolean hasTextSnippet() {
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
}
/**
* used for search result entry
*/
public String resource() {
// generate transport resource
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
return this.toString();
}
return this.toString(this.textSnippet.getLineRaw());
}
/*
taken from ResultEntry (should work without)
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.hash());
}
return this.hashCache;
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (!(obj instanceof URIMetadataNode)) return false;
URIMetadataNode other = (URIMetadataNode) obj;
return Base64Order.enhancedCoder.equal(this.hash(), other.hash());
}
@Override
public int compareTo(URIMetadataNode o) {
return Base64Order.enhancedCoder.compare(this.hash(), o.hash());
}
@Override
public int compare(URIMetadataNode o1, URIMetadataNode o2) {
return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash());
}*/
} }

@ -1,117 +0,0 @@
// BinSearch.java
// -----------------------
// part of The Kelondro Database
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// created 22.11.2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.index;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.order.NaturalOrder;
public final class BinSearch {
private final byte[] chunks;
private final int chunksize;
private final int count;
private static final ByteOrder objectOrder = new NaturalOrder(true); // the natural order is much faster than the b64Order
public BinSearch(final byte[] chunks, final int chunksize) {
this.chunks = chunks;
this.chunksize = chunksize;
this.count = chunks.length / chunksize;
}
public BinSearch(final List<byte[]> chunkList, final int chunksize) {
byte[][] chunksa = new byte[chunkList.size()][];
chunksa = chunkList.toArray(chunksa);
Arrays.sort(chunksa, objectOrder);
this.chunks = new byte[chunkList.size() * chunksize];
for (int i = 0; i < chunksa.length; i++) System.arraycopy(chunksa[i], 0, this.chunks, i * chunksize, chunksize);
this.chunksize = chunksize;
this.count = chunks.length / chunksize;
assert this.count == chunkList.size();
}
public final boolean contains(final byte[] t) {
return contains(t, 0, this.count);
}
private final boolean contains(final byte[] t, int beginPos, int endPos) {
// the endPos is exclusive, beginPos is inclusive
// this method is synchronized to make the use of the buffer possible
assert t.length == this.chunksize;
while (true) {
if (beginPos >= endPos) return false;
final int pivot = (beginPos + endPos) / 2;
if ((pivot < 0) || (pivot >= this.count)) return false;
assert this.chunksize == t.length;
final int c = objectOrder.compare(this.chunks, pivot * this.chunksize, t, 0, this.chunksize);
if (c == 0) return true;
if (c < 0) /* buffer < t */ {beginPos = pivot + 1; continue;}
if (c > 0) /* buffer > t */ {endPos = pivot; continue;}
return false;
}
}
public final int size() {
return count;
}
public final byte[] get(final int element) {
final byte[] a = new byte[chunksize];
System.arraycopy(this.chunks, element * this.chunksize, a, 0, chunksize);
return a;
}
public final byte[] get(final int element, byte[] a) {
assert a.length == chunksize;
System.arraycopy(this.chunks, element * this.chunksize, a, 0, chunksize);
return a;
}
public final void write(File f) throws IOException {
FileOutputStream os = new FileOutputStream(f);
os.write(this.chunks);
os.flush();
os.close();
}
public static void main(final String[] args) {
final String s = "4CEvsI8FRczRBo_ApRCkwfEbFLn1pIFXg39QGMgj5RHM6HpIMJq67QX3M5iQYr_LyI_5aGDaa_bYbRgJ9XnQjpmq6QkOoGWAoEaihRqhV3kItLFHjRtqauUR";
final BinSearch bs = new BinSearch(s.getBytes(), 6);
for (int i = 0; i + 6 <= s.length(); i = i + 6) {
System.out.println(s.substring(i, i + 6) + ":" + ((bs.contains(s.substring(i, i + 6).getBytes())) ? "drin" : "draussen"));
}
for (int i = 0; i + 7 <= s.length(); i = i + 6) {
System.out.println(s.substring(i + 1, i + 7) + ":" + ((bs.contains(s.substring(i + 1, i + 7).getBytes())) ? "drin" : "draussen"));
}
}
}

@ -99,7 +99,6 @@ import net.yacy.search.index.Segment;
import net.yacy.search.ranking.ReferenceOrder; import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet; import net.yacy.search.snippet.TextSnippet;
import net.yacy.search.snippet.TextSnippet.ResultClass; import net.yacy.search.snippet.TextSnippet.ResultClass;
@ -174,7 +173,7 @@ public final class SearchEvent {
private final Map<String, String> taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris private final Map<String, String> taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris
private final WeakPriorityBlockingQueue<WordReferenceVars> rwiStack; // thats the bag where the RWI search process writes to private final WeakPriorityBlockingQueue<WordReferenceVars> rwiStack; // thats the bag where the RWI search process writes to
private final WeakPriorityBlockingQueue<URIMetadataNode> nodeStack; // thats the bag where the solr results are written to private final WeakPriorityBlockingQueue<URIMetadataNode> nodeStack; // thats the bag where the solr results are written to
private final WeakPriorityBlockingQueue<ResultEntry> resultList; // thats the result list where the actual search result is waiting to be displayed private final WeakPriorityBlockingQueue<URIMetadataNode> resultList; // thats the result list where the actual search result is waiting to be displayed
private final boolean pollImmediately; // if this is true, then every entry in result List is polled immediately to prevent a re-ranking in the resultList. This is usefull if there is only one index source. private final boolean pollImmediately; // if this is true, then every entry in result List is polled immediately to prevent a re-ranking in the resultList. This is usefull if there is only one index source.
public final boolean excludeintext_image; public final boolean excludeintext_image;
@ -406,7 +405,7 @@ public final class SearchEvent {
this.deleteIfSnippetFail = deleteIfSnippetFail; this.deleteIfSnippetFail = deleteIfSnippetFail;
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
this.resultList = new WeakPriorityBlockingQueue<ResultEntry>(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking this.resultList = new WeakPriorityBlockingQueue<URIMetadataNode>(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking
// snippets do not need to match with the complete query hashes, // snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search // only with the query minus the stopwords which had not been used for the search
@ -1312,7 +1311,7 @@ public final class SearchEvent {
false); false);
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal()); final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal()); final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet); URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
addResult(re); addResult(re);
success = true; success = true;
} else { } else {
@ -1380,16 +1379,16 @@ public final class SearchEvent {
* place the result to the result vector and apply post-ranking * place the result to the result vector and apply post-ranking
* @param resultEntry * @param resultEntry
*/ */
public void addResult(ResultEntry resultEntry) { public void addResult(URIMetadataNode resultEntry) {
if (resultEntry == null) return; if (resultEntry == null) return;
float score = resultEntry.score(); float score = resultEntry.score();
final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap<String>() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/); final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap<String>() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/);
this.resultList.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow this.resultList.put(new ReverseElement<URIMetadataNode>(resultEntry, ranking)); // remove smallest in case of overflow
if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries. if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
this.addTopics(resultEntry); this.addTopics(resultEntry);
} }
private long postRanking(final ResultEntry rentry, final ScoreMap<String> topwords) { private long postRanking(final URIMetadataNode rentry, final ScoreMap<String> topwords) {
long r = 0; long r = 0;
// for media search: prefer pages with many links // for media search: prefer pages with many links
@ -1400,8 +1399,10 @@ public final class SearchEvent {
// apply citation count // apply citation count
//System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother()); //System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother());
r += (128 * rentry.referencesCount() / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation; if (this.query.getSegment().connectedCitation()) {
int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash());
r += (128 * referencesCount / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation;
} /* else r += 0; */
// prefer hit with 'prefer' pattern // prefer hit with 'prefer' pattern
if (this.query.prefer.matcher(rentry.url().toNormalform(true)).matches()) r += 256 << this.query.ranking.coeff_prefer; if (this.query.prefer.matcher(rentry.url().toNormalform(true)).matches()) r += 256 << this.query.ranking.coeff_prefer;
if (this.query.prefer.matcher(rentry.title()).matches()) r += 256 << this.query.ranking.coeff_prefer; if (this.query.prefer.matcher(rentry.title()).matches()) r += 256 << this.query.ranking.coeff_prefer;
@ -1432,7 +1433,7 @@ public final class SearchEvent {
return r; return r;
} }
public ResultEntry getSnippet(URIMetadataNode page, final CacheStrategy cacheStrategy) { public URIMetadataNode getSnippet(URIMetadataNode page, final CacheStrategy cacheStrategy) {
if (page == null) return null; if (page == null) return null;
if (cacheStrategy == null) { if (cacheStrategy == null) {
@ -1444,7 +1445,7 @@ public final class SearchEvent {
((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))), ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
SearchEvent.SNIPPET_MAX_LENGTH, SearchEvent.SNIPPET_MAX_LENGTH,
!this.query.isLocal()); !this.query.isLocal());
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet); // result without snippet return page.makeResultEntry(this.query.getSegment(), this.peers, snippet); // result without snippet
} }
// load snippet // load snippet
@ -1464,16 +1465,16 @@ public final class SearchEvent {
if (!snippet.getErrorCode().fail()) { if (!snippet.getErrorCode().fail()) {
// we loaded the file and found the snippet // we loaded the file and found the snippet
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet); // result with snippet attached return page.makeResultEntry(this.query.getSegment(), this.peers, snippet); // result with snippet attached
} else if (cacheStrategy.mustBeOffline()) { } else if (cacheStrategy.mustBeOffline()) {
// we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result // we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result
// this may happen during a remote search, because snippet loading is omitted to retrieve results faster // this may happen during a remote search, because snippet loading is omitted to retrieve results faster
return new ResultEntry(page, this.query.getSegment(), this.peers, null); // result without snippet return page.makeResultEntry(this.query.getSegment(), this.peers, null); // result without snippet
} else { } else {
// problems with snippet fetch // problems with snippet fetch
if (this.snippetFetchWordHashes.has(Segment.catchallHash)) { if (this.snippetFetchWordHashes.has(Segment.catchallHash)) {
// we accept that because the word cannot be on the page // we accept that because the word cannot be on the page
return new ResultEntry(page, this.query.getSegment(), this.peers, null); return page.makeResultEntry(this.query.getSegment(), this.peers, null);
} }
final String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
if (this.deleteIfSnippetFail) { if (this.deleteIfSnippetFail) {
@ -1483,10 +1484,10 @@ public final class SearchEvent {
return null; return null;
} }
} }
return new ResultEntry(page, this.query.getSegment(), this.peers, null); // result without snippet return page.makeResultEntry(this.query.getSegment(), this.peers, null); // result without snippet
} }
public ResultEntry oneResult(final int item, final long timeout) { public URIMetadataNode oneResult(final int item, final long timeout) {
// check if we already retrieved this item // check if we already retrieved this item
// (happens if a search pages is accessed a second time) // (happens if a search pages is accessed a second time)
final long finishTime = timeout == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + timeout; final long finishTime = timeout == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + timeout;
@ -1515,7 +1516,7 @@ public final class SearchEvent {
// check if we have a success // check if we have a success
if (this.resultList.sizeAvailable() > item) { if (this.resultList.sizeAvailable() > item) {
// we have the wanted result already in the result array .. return that // we have the wanted result already in the result array .. return that
final ResultEntry re = this.resultList.element(item).getElement(); final URIMetadataNode re = this.resultList.element(item).getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "fetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "fetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false);
if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) { if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) {
@ -1570,7 +1571,7 @@ public final class SearchEvent {
public ImageResult oneImageResult(final int item, final long timeout) throws MalformedURLException { public ImageResult oneImageResult(final int item, final long timeout) throws MalformedURLException {
if (item < imageViewed.size()) return nthImage(item); if (item < imageViewed.size()) return nthImage(item);
if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
ResultEntry doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare URIMetadataNode doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
// check if the match was made in the url or in the image links // check if the match was made in the url or in the image links
if (doc == null) { if (doc == null) {
if (hasSpare()) return nextSpare(); if (hasSpare()) return nextSpare();
@ -1641,12 +1642,12 @@ public final class SearchEvent {
} }
} }
public ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> completeResults(final long waitingtime) { public ArrayList<WeakPriorityBlockingQueue.Element<URIMetadataNode>> completeResults(final long waitingtime) {
final long timeout = waitingtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + waitingtime; final long timeout = waitingtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + waitingtime;
int i = 0; int i = 0;
while (this.resultList.sizeAvailable() < this.query.neededResults() && System.currentTimeMillis() < timeout) { while (this.resultList.sizeAvailable() < this.query.neededResults() && System.currentTimeMillis() < timeout) {
ResultEntry re = oneResult(i++, timeout - System.currentTimeMillis()); URIMetadataNode re = oneResult(i++, timeout - System.currentTimeMillis());
if (re == null) break; if (re == null) break;
} }
return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable())); return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable()));
@ -1659,8 +1660,8 @@ public final class SearchEvent {
* @return true if an entry was deleted, false otherwise * @return true if an entry was deleted, false otherwise
*/ */
protected boolean delete(final String urlhash) { protected boolean delete(final String urlhash) {
final Iterator<Element<ResultEntry>> i = this.resultList.iterator(); final Iterator<Element<URIMetadataNode>> i = this.resultList.iterator();
Element<ResultEntry> entry; Element<URIMetadataNode> entry;
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
if (urlhash.equals(ASCII.String(entry.getElement().url().hash()))) { if (urlhash.equals(ASCII.String(entry.getElement().url().hash()))) {
@ -1810,7 +1811,7 @@ public final class SearchEvent {
} }
} }
protected void addTopics(final ResultEntry resultEntry) { protected void addTopics(final URIMetadataNode resultEntry) {
// take out relevant information for reference computation // take out relevant information for reference computation
if ((resultEntry.url() == null) || (resultEntry.title() == null)) return; if ((resultEntry.url() == null) || (resultEntry.title() == null)) return;
final String[] descrcomps = MultiProtocolURL.splitpattern.split(resultEntry.title()); // words in the description final String[] descrcomps = MultiProtocolURL.splitpattern.split(resultEntry.title()); // words in the description

@ -1,166 +0,0 @@
// ResultEntry.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 10.10.2005 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.search.snippet;
import java.io.IOException;
import java.util.Comparator;
import java.util.Date;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.util.ByteArray;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.document.Condenser;
import net.yacy.document.parser.pdfParser;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.peers.Seed;
import net.yacy.peers.SeedDB;
import net.yacy.search.index.Segment;
import net.yacy.search.schema.CollectionSchema;
public class ResultEntry extends URIMetadataNode implements Comparable<ResultEntry>, Comparator<ResultEntry> {
private static final long serialVersionUID = -256046934741561978L;
// payload objects
private String alternative_urlstring;
private String alternative_urlname;
private final TextSnippet textSnippet;
private final Segment indexSegment;
public ResultEntry(final URIMetadataNode urlentry,
final Segment indexSegment,
SeedDB peers,
final TextSnippet textSnippet) {
super(urlentry);
this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
this.indexSegment = indexSegment;
this.alternative_urlstring = null;
this.alternative_urlname = null;
this.textSnippet = textSnippet;
final String host = urlentry.url().getHost();
if (host != null && host.endsWith(".yacyh")) {
// translate host into current IP
int p = host.indexOf('.');
final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
final Seed seed = peers.getConnected(hash);
final String path = urlentry.url().getFile();
String address = null;
if ((seed == null) || ((address = seed.getPublicAddress(seed.getIP())) == null)) {
// seed is not known from here
try {
if (indexSegment.termIndex() != null) indexSegment.termIndex().remove(
Word.words2hashesHandles(Condenser.getWords(
("yacyshare " +
path.replace('?', ' ') +
" " +
urlentry.dc_title()), null).keySet()),
urlentry.hash());
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
indexSegment.fulltext().remove(urlentry.hash()); // clean up
throw new RuntimeException("index void");
}
this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path;
this.alternative_urlname = "http://share." + seed.getName() + ".yacy" + path;
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
}
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.hash());
}
return this.hashCache;
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (!(obj instanceof ResultEntry)) return false;
ResultEntry other = (ResultEntry) obj;
return Base64Order.enhancedCoder.equal(this.hash(), other.hash());
}
public String urlstring() {
if (this.alternative_urlstring != null) return this.alternative_urlstring;
if (!pdfParser.individualPages) return this.url().toNormalform(true);
if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
// for pdf links we rewrite the url
// this is a special treatment of pdf files which can be splitted into subpages
String pageprop = pdfParser.individualPagePropertyname;
String resultUrlstring = this.url().toNormalform(true);
int p = resultUrlstring.lastIndexOf(pageprop + "=");
if (p > 0) {
return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1);
}
return resultUrlstring;
}
public String urlname() {
return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname;
}
public String title() {
String titlestr = this.dc_title();
// if title is empty use filename as title
if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
titlestr = this.url() != null ? this.url().getFileName() : "";
}
return titlestr;
}
public TextSnippet textSnippet() {
return this.textSnippet;
}
public Date[] events() {
return this.datesInContent();
}
public int referencesCount() {
// urlCitationIndex index might be null (= configuration option)
return this.indexSegment.connectedCitation() ? this.indexSegment.urlCitation().count(this.hash()) : 0;
}
public boolean hasTextSnippet() {
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
}
public String resource() {
// generate transport resource
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
return this.toString();
}
return this.toString(this.textSnippet.getLineRaw());
}
@Override
public int compareTo(ResultEntry o) {
return Base64Order.enhancedCoder.compare(this.hash(), o.hash());
}
@Override
public int compare(ResultEntry o1, ResultEntry o2) {
return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash());
}
}
Loading…
Cancel
Save