refactor ResultEntry to be based on MetadataNode/SolrDocument

to share/reuse common access routines
pull/8/head
reger 10 years ago
parent d882991bc5
commit 3d53da8236

@ -208,7 +208,7 @@ public class yacysearchitem {
prop.put("content_showVocabulary", sb.getConfigBool("search.result.show.vocabulary", true) ? 1 : 0);
if (showEvent) prop.put("content_showEvent_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(events[0]));
prop.put("content_showDate_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(result.modified()));
prop.put("content_showDate_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(result.moddate()));
prop.putHTML("content_showSize_sizename", RSSMessage.sizename(result.filesize()));
prop.put("content_showMetadata_urlhash", urlhash);
prop.put("content_showParser_urlhash", urlhash);
@ -218,7 +218,7 @@ public class yacysearchitem {
prop.put("content_showProxy_link", resultUrlstring);
prop.put("content_showHostBrowser_link", resultUrlstring);
if (sb.getConfigBool("search.result.show.vocabulary", true)) {
URIMetadataNode node = result.getNode();
URIMetadataNode node = result;
int c = 0;
for (Map.Entry<String, Object> entry: node.entrySet()) {
String key = entry.getKey();
@ -239,7 +239,7 @@ public class yacysearchitem {
}
prop.put("content_urlhexhash", Seed.b64Hash2hexHash(urlhash));
prop.putHTML("content_urlname", nxTools.shortenURLString(result.urlname(), MAX_URL_LENGTH));
prop.put("content_date822", isAtomFeed ? ISO8601Formatter.FORMATTER.format(result.modified()) : HeaderFramework.formatRFC1123(result.modified()));
prop.put("content_date822", isAtomFeed ? ISO8601Formatter.FORMATTER.format(result.moddate()) : HeaderFramework.formatRFC1123(result.moddate()));
if (showEvent) prop.put("content_showEvent_date822", isAtomFeed ? ISO8601Formatter.FORMATTER.format(events[0]) : HeaderFramework.formatRFC1123(events[0]));
//prop.put("content_ybr", RankingProcess.ybr(result.hash()));
prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
@ -248,9 +248,9 @@ public class yacysearchitem {
prop.putXML("content_file", resultFileName); // putXML for rss
prop.putXML("content_path", resultURL.getPath()); // putXML for rss
prop.put("content_nl", (item == theSearch.query.offset) ? 0 : 1);
prop.putHTML("content_publisher", result.publisher());
prop.putHTML("content_creator", result.creator());// author
prop.putHTML("content_subject", result.subject());
prop.putHTML("content_publisher", result.dc_publisher());
prop.putHTML("content_creator", result.dc_creator());// author
prop.putHTML("content_subject", result.dc_subject());
final Iterator<String> query = theSearch.query.getQueryGoal().getIncludeStrings();
final StringBuilder s = new StringBuilder(theSearch.query.getQueryGoal().getIncludeSize() * 20);
while (query.hasNext()) s.append('+').append(query.next());
@ -263,7 +263,7 @@ public class yacysearchitem {
prop.put("content_description", desc);
prop.putXML("content_description-xml", desc);
prop.putJSON("content_description-json", desc);
prop.put("content_mimetype",result.getNode().mime()); // for atom <link> type attribute
prop.put("content_mimetype", result.mime()); // for atom <link> type attribute
final HeuristicResult heuristic = theSearch.getHeuristic(result.hash());
if (heuristic == null) {
prop.put("content_heuristic", 0);

@ -1570,25 +1570,22 @@ public final class SearchEvent {
public ImageResult oneImageResult(final int item, final long timeout) throws MalformedURLException {
if (item < imageViewed.size()) return nthImage(item);
if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
ResultEntry ms = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
ResultEntry doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
// check if the match was made in the url or in the image links
if (ms == null) {
if (doc == null) {
if (hasSpare()) return nextSpare();
throw new MalformedURLException("no image url found");
}
// try to get more
SolrDocument doc = ms.getNode();
// there can be two different kinds of image hits: either the document itself is an image or images are embedded in the links of text documents.
String mime = (String) doc.getFirstValue(CollectionSchema.content_type.getSolrFieldName());
// boolean fakeImageHost = ms.url().getHost() != null && ms.url().getHost().indexOf("wikipedia") > 0; // pages with image extension from wikipedia do not contain image files but html files... I know this is a bad hack, but many results come from wikipedia and we must handle that
// generalize above hack (regarding url with file extension but beeing a html (with html mime)
char docType = Response.docType(mime); // first look at mime (as some html pages have img extension (like wikipedia)
if (docType == Response.DT_UNKNOWN) docType = Response.docType(ms.url()); // try extension if mime wasn't successful
if (docType == Response.DT_IMAGE) {
String id = ASCII.String(ms.hash());
if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(ms.url(), ms.url(), "", ms.title(), 0, 0, 0));
if (doc.doctype() == Response.DT_IMAGE) {
String id = ASCII.String(doc.hash());
if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), "", doc.title(), 0, 0, 0));
} else {
Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
@ -1612,7 +1609,7 @@ public final class SearchEvent {
boolean sizeok = h != null && w != null && h.intValue() > 16 && w.intValue() > 16;
String id = ASCII.String(imageUrl.hash());
if (!imageViewed.containsKey(id) && !containsSpare(id)) {
ImageResult imageResult = new ImageResult(ms.url(), imageUrl, "", image_alt, w == null ? 0 : w, h == null ? 0 : h, 0);
ImageResult imageResult = new ImageResult(doc.url(), imageUrl, "", image_alt, w == null ? 0 : w, h == null ? 0 : h, 0);
if (match || sizeok) imageSpareGood.put(id, imageResult); else imageSpareBad.put(id, imageResult);
}
} catch (MalformedURLException e) {

@ -30,7 +30,6 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.Date;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.util.ByteArray;
@ -39,21 +38,15 @@ import net.yacy.document.Condenser;
import net.yacy.document.parser.pdfParser;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.peers.Seed;
import net.yacy.peers.SeedDB;
import net.yacy.search.index.Segment;
import net.yacy.search.schema.CollectionSchema;
public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEntry> {
public class ResultEntry extends URIMetadataNode implements Comparable<ResultEntry>, Comparator<ResultEntry> {
// payload objects
private final URIMetadataNode urlentry;
private String alternative_urlstring;
private String alternative_urlname;
private final TextSnippet textSnippet;
@ -63,8 +56,8 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
final Segment indexSegment,
SeedDB peers,
final TextSnippet textSnippet) {
this.urlentry = urlentry;
this.urlentry.setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
super(urlentry);
this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
this.indexSegment = indexSegment;
this.alternative_urlstring = null;
this.alternative_urlname = null;
@ -102,7 +95,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
@Override
public int hashCode() {
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.urlentry.hash());
this.hashCache = ByteArray.hashCode(this.hash());
}
return this.hashCache;
}
@ -112,29 +105,18 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
if (obj == null) return false;
if (!(obj instanceof ResultEntry)) return false;
ResultEntry other = (ResultEntry) obj;
return Base64Order.enhancedCoder.equal(this.urlentry.hash(), other.urlentry.hash());
}
public URIMetadataNode getNode() {
return this.urlentry;
}
public byte[] hash() {
return this.urlentry.hash();
}
public DigestURL url() {
return this.urlentry.url();
}
public Bitfield flags() {
return this.urlentry.flags();
return Base64Order.enhancedCoder.equal(this.hash(), other.hash());
}
public String urlstring() {
if (this.alternative_urlstring != null) return this.alternative_urlstring;
if (!pdfParser.individualPages) return this.url().toNormalform(true);
if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.urlentry.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
// for pdf links we rewrite the url
// this is a special treatment of pdf files which can be splitted into subpages
String pageprop = pdfParser.individualPagePropertyname;
String resultUrlstring = this.urlentry.url().toNormalform(true);
String resultUrlstring = this.url().toNormalform(true);
int p = resultUrlstring.lastIndexOf(pageprop + "=");
if (p > 0) {
return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1);
@ -145,72 +127,22 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname;
}
public String title() {
String titlestr = this.urlentry.dc_title();
String titlestr = this.dc_title();
// if title is empty use filename as title
if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
titlestr = this.url() != null ? this.url().getFileName() : "";
}
return titlestr;
}
public String publisher() {
// dc:publisher
return this.urlentry.dc_publisher();
}
public String creator() {
// dc:creator, the author
return this.urlentry.dc_creator();
}
public String subject() {
// dc:subject, keywords
return this.urlentry.dc_subject();
}
public TextSnippet textSnippet() {
return this.textSnippet;
}
public Date modified() {
return this.urlentry.moddate();
}
public Date[] events() {
return this.urlentry.datesInContent();
}
public int filesize() {
return this.urlentry.filesize();
return this.datesInContent();
}
public int referencesCount() {
// urlCitationIndex index might be null (= configuration option)
return this.indexSegment.connectedCitation() ? this.indexSegment.urlCitation().count(this.urlentry.hash()) : 0;
}
public int llocal() {
return this.urlentry.llocal();
}
public int lother() {
return this.urlentry.lother();
}
public int limage() {
return this.urlentry.limage();
}
public int laudio() {
return this.urlentry.laudio();
}
public int lvideo() {
return this.urlentry.lvideo();
}
public int lapp() {
return this.urlentry.lapp();
}
public double lat() {
return this.urlentry.lat();
}
public double lon() {
return this.urlentry.lon();
}
public WordReference word() {
final Reference word = this.urlentry.word();
if (word == null) return null;
if (word instanceof WordReferenceVars) return (WordReferenceVars) word;
if (word instanceof WordReferenceRow) return (WordReferenceRow) word;
assert word instanceof WordReferenceRow || word instanceof WordReferenceVars : word == null ? "word = null" : "type = " + word.getClass().getCanonicalName();
return null;
return this.indexSegment.connectedCitation() ? this.indexSegment.urlCitation().count(this.hash()) : 0;
}
public boolean hasTextSnippet() {
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
@ -218,19 +150,16 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
public String resource() {
// generate transport resource
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
return this.urlentry.toString();
return this.toString();
}
return this.urlentry.toString(this.textSnippet.getLineRaw());
return this.toString(this.textSnippet.getLineRaw());
}
@Override
public int compareTo(ResultEntry o) {
return Base64Order.enhancedCoder.compare(this.urlentry.hash(), o.urlentry.hash());
return Base64Order.enhancedCoder.compare(this.hash(), o.hash());
}
@Override
public int compare(ResultEntry o1, ResultEntry o2) {
return Base64Order.enhancedCoder.compare(o1.urlentry.hash(), o2.urlentry.hash());
}
public float score() {
return this.urlentry.score();
return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash());
}
}

Loading…
Cancel
Save