refactor ResultEntry to be based on MetadataNode/SolrDocument

to share/reuse common access routines
10 years ago · 3d53da8236
parent d882991bc5
commit 3d53da8236
3 changed files with 29 additions and 103 deletions
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@ -208,7 +208,7 @@ public class yacysearchitem {
                prop.put("content_showVocabulary", sb.getConfigBool("search.result.show.vocabulary", true) ? 1 : 0);
                if (showEvent) prop.put("content_showEvent_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(events[0]));
-                prop.put("content_showDate_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(result.modified()));
+                prop.put("content_showDate_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(result.moddate()));
                prop.putHTML("content_showSize_sizename", RSSMessage.sizename(result.filesize()));
                prop.put("content_showMetadata_urlhash", urlhash);
                prop.put("content_showParser_urlhash", urlhash);
@ -218,7 +218,7 @@ public class yacysearchitem {
                prop.put("content_showProxy_link", resultUrlstring);
                prop.put("content_showHostBrowser_link", resultUrlstring);
                if (sb.getConfigBool("search.result.show.vocabulary", true)) {
-                    URIMetadataNode node = result.getNode();
+                    URIMetadataNode node = result;
                    int c = 0;
                    for (Map.Entry<String, Object> entry: node.entrySet()) {
                        String key = entry.getKey();
@ -239,7 +239,7 @@ public class yacysearchitem {
            }
            prop.put("content_urlhexhash", Seed.b64Hash2hexHash(urlhash));
            prop.putHTML("content_urlname", nxTools.shortenURLString(result.urlname(), MAX_URL_LENGTH));
-            prop.put("content_date822", isAtomFeed ? ISO8601Formatter.FORMATTER.format(result.modified()) : HeaderFramework.formatRFC1123(result.modified()));
+            prop.put("content_date822", isAtomFeed ? ISO8601Formatter.FORMATTER.format(result.moddate()) : HeaderFramework.formatRFC1123(result.moddate()));
            if (showEvent) prop.put("content_showEvent_date822", isAtomFeed ? ISO8601Formatter.FORMATTER.format(events[0]) : HeaderFramework.formatRFC1123(events[0]));
            //prop.put("content_ybr", RankingProcess.ybr(result.hash()));
            prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
@ -248,9 +248,9 @@ public class yacysearchitem {
            prop.putXML("content_file", resultFileName); // putXML for rss
            prop.putXML("content_path", resultURL.getPath()); // putXML for rss
            prop.put("content_nl", (item == theSearch.query.offset) ? 0 : 1);
-            prop.putHTML("content_publisher", result.publisher());
+            prop.putHTML("content_publisher", result.dc_publisher());
-            prop.putHTML("content_creator", result.creator());// author
+            prop.putHTML("content_creator", result.dc_creator());// author
-            prop.putHTML("content_subject", result.subject());
+            prop.putHTML("content_subject", result.dc_subject());
            final Iterator<String> query = theSearch.query.getQueryGoal().getIncludeStrings();
            final StringBuilder s = new StringBuilder(theSearch.query.getQueryGoal().getIncludeSize() * 20);
            while (query.hasNext()) s.append('+').append(query.next());
@ -263,7 +263,7 @@ public class yacysearchitem {
            prop.put("content_description", desc);
            prop.putXML("content_description-xml", desc);
            prop.putJSON("content_description-json", desc);
-            prop.put("content_mimetype",result.getNode().mime()); // for atom <link> type attribute
+            prop.put("content_mimetype", result.mime()); // for atom <link> type attribute
            final HeuristicResult heuristic = theSearch.getHeuristic(result.hash());
            if (heuristic == null) {
                prop.put("content_heuristic", 0);
--- a/source/net/yacy/search/query/SearchEvent.java
+++ b/source/net/yacy/search/query/SearchEvent.java
@ -1570,25 +1570,22 @@ public final class SearchEvent {
    public ImageResult oneImageResult(final int item, final long timeout) throws MalformedURLException {
        if (item < imageViewed.size()) return nthImage(item);
        if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
-        ResultEntry ms = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
+        ResultEntry doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
        // check if the match was made in the url or in the image links
-        if (ms == null) {
+        if (doc == null) {
            if (hasSpare()) return nextSpare();
            throw new MalformedURLException("no image url found");
        }
        // try to get more
-        SolrDocument doc = ms.getNode();
+
        // there can be two different kinds of image hits: either the document itself is an image or images are embedded in the links of text documents.
        String mime = (String) doc.getFirstValue(CollectionSchema.content_type.getSolrFieldName());
        // boolean fakeImageHost = ms.url().getHost() != null && ms.url().getHost().indexOf("wikipedia") > 0; // pages with image extension from wikipedia do not contain image files but html files... I know this is a bad hack, but many results come from wikipedia and we must handle that
        // generalize above hack (regarding url with file extension but beeing a html (with html mime)
-        char docType = Response.docType(mime); // first look at mime (as some html pages have img extension (like wikipedia)
+        if (doc.doctype() == Response.DT_IMAGE) {
-        if (docType == Response.DT_UNKNOWN) docType = Response.docType(ms.url()); // try extension if mime wasn't successful
+            String id = ASCII.String(doc.hash());
-
+            if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), "", doc.title(), 0, 0, 0));
        if (docType == Response.DT_IMAGE) {
            String id = ASCII.String(ms.hash());
            if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(ms.url(), ms.url(), "", ms.title(), 0, 0, 0));
        } else {
            Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
            Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
@ -1612,7 +1609,7 @@ public final class SearchEvent {
                        boolean sizeok = h != null && w != null && h.intValue() > 16 && w.intValue() > 16;
                        String id = ASCII.String(imageUrl.hash());
                        if (!imageViewed.containsKey(id) && !containsSpare(id)) {
-                            ImageResult imageResult = new ImageResult(ms.url(), imageUrl, "", image_alt, w == null ? 0 : w, h == null ? 0 : h, 0);
+                            ImageResult imageResult = new ImageResult(doc.url(), imageUrl, "", image_alt, w == null ? 0 : w, h == null ? 0 : h, 0);
                            if (match || sizeok) imageSpareGood.put(id, imageResult); else imageSpareBad.put(id, imageResult);
                        }
                    } catch (MalformedURLException e) {
--- a/source/net/yacy/search/snippet/ResultEntry.java
+++ b/source/net/yacy/search/snippet/ResultEntry.java
@ -30,7 +30,6 @@ import java.io.IOException;
 import java.util.Comparator;
 import java.util.Date;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
 import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.util.ByteArray;
@ -39,21 +38,15 @@ import net.yacy.document.Condenser;
 import net.yacy.document.parser.pdfParser;
 import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.kelondro.data.word.WordReference;
 import net.yacy.kelondro.data.word.WordReferenceRow;
 import net.yacy.kelondro.data.word.WordReferenceVars;
 import net.yacy.kelondro.rwi.Reference;
 import net.yacy.kelondro.util.Bitfield;
 import net.yacy.peers.Seed;
 import net.yacy.peers.SeedDB;
 import net.yacy.search.index.Segment;
 import net.yacy.search.schema.CollectionSchema;
-public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEntry> {
+public class ResultEntry extends URIMetadataNode implements Comparable<ResultEntry>, Comparator<ResultEntry> {
    // payload objects
    private final URIMetadataNode urlentry;
    private String alternative_urlstring;
    private String alternative_urlname;
    private final TextSnippet textSnippet;
@ -63,8 +56,8 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
                       final Segment indexSegment,
                       SeedDB peers,
                       final TextSnippet textSnippet) {
-        this.urlentry = urlentry;
+        super(urlentry);
-        this.urlentry.setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
+        this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
        this.indexSegment = indexSegment;
        this.alternative_urlstring = null;
        this.alternative_urlname = null;
@ -102,7 +95,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
    @Override
    public int hashCode() {
        if (this.hashCache == Integer.MIN_VALUE) {
-            this.hashCache = ByteArray.hashCode(this.urlentry.hash());
+            this.hashCache = ByteArray.hashCode(this.hash());
        }
        return this.hashCache;
    }
@ -112,29 +105,18 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
        if (obj == null) return false;
        if (!(obj instanceof ResultEntry)) return false;
        ResultEntry other = (ResultEntry) obj;
-        return Base64Order.enhancedCoder.equal(this.urlentry.hash(), other.urlentry.hash());
+        return Base64Order.enhancedCoder.equal(this.hash(), other.hash());
    }
    public URIMetadataNode getNode() {
        return this.urlentry;
    }
    public byte[] hash() {
        return this.urlentry.hash();
    }
    public DigestURL url() {
        return this.urlentry.url();
    }
    public Bitfield flags() {
        return this.urlentry.flags();
    }
    public String urlstring() {
        if (this.alternative_urlstring != null) return this.alternative_urlstring;
        if (!pdfParser.individualPages) return this.url().toNormalform(true);
-        if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.urlentry.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
+        if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
        // for pdf links we rewrite the url
        // this is a special treatment of pdf files which can be splitted into subpages
        String pageprop = pdfParser.individualPagePropertyname;
-        String resultUrlstring = this.urlentry.url().toNormalform(true);
+        String resultUrlstring = this.url().toNormalform(true);
        int p = resultUrlstring.lastIndexOf(pageprop + "=");
        if (p > 0) {
          return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1);
@ -145,72 +127,22 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
        return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname;
    }
    public String title() {
-        String titlestr = this.urlentry.dc_title();
+        String titlestr = this.dc_title();
        // if title is empty use filename as title
        if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
            titlestr = this.url() != null ? this.url().getFileName() : "";
        }
        return titlestr;
    }
    public String publisher() {
        // dc:publisher
        return this.urlentry.dc_publisher();
    }
    public String creator() {
        // dc:creator, the author
        return this.urlentry.dc_creator();
    }
    public String subject() {
        // dc:subject, keywords
        return this.urlentry.dc_subject();
    }
    public TextSnippet textSnippet() {
        return this.textSnippet;
    }
    public Date modified() {
        return this.urlentry.moddate();
    }
    public Date[] events() {
-        return this.urlentry.datesInContent();
+        return this.datesInContent();
    }
    public int filesize() {
        return this.urlentry.filesize();
    }
    public int referencesCount() {
        // urlCitationIndex index might be null (= configuration option)
-    	return this.indexSegment.connectedCitation() ? this.indexSegment.urlCitation().count(this.urlentry.hash()) : 0;
+    	return this.indexSegment.connectedCitation() ? this.indexSegment.urlCitation().count(this.hash()) : 0;
    }
    public int llocal() {
    	return this.urlentry.llocal();
    }
    public int lother() {
    	return this.urlentry.lother();
    }
    public int limage() {
        return this.urlentry.limage();
    }
    public int laudio() {
        return this.urlentry.laudio();
    }
    public int lvideo() {
        return this.urlentry.lvideo();
    }
    public int lapp() {
        return this.urlentry.lapp();
    }
    public double lat() {
        return this.urlentry.lat();
    }
    public double lon() {
        return this.urlentry.lon();
    }
    public WordReference word() {
        final Reference word = this.urlentry.word();
        if (word == null) return null;
        if (word instanceof WordReferenceVars) return (WordReferenceVars) word;
        if (word instanceof WordReferenceRow) return (WordReferenceRow) word;
        assert word instanceof WordReferenceRow || word instanceof WordReferenceVars : word == null ? "word = null" : "type = " + word.getClass().getCanonicalName();
        return null;
    }
    public boolean hasTextSnippet() {
        return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
@ -218,19 +150,16 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
    public String resource() {
        // generate transport resource
        if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
-            return this.urlentry.toString();
+            return this.toString();
        }
-        return this.urlentry.toString(this.textSnippet.getLineRaw());
+        return this.toString(this.textSnippet.getLineRaw());
    }
    @Override
    public int compareTo(ResultEntry o) {
-        return Base64Order.enhancedCoder.compare(this.urlentry.hash(), o.urlentry.hash());
+        return Base64Order.enhancedCoder.compare(this.hash(), o.hash());
    }
    @Override
    public int compare(ResultEntry o1, ResultEntry o2) {
-        return Base64Order.enhancedCoder.compare(o1.urlentry.hash(), o2.urlentry.hash());
+        return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash());
    }
    public float score() {
        return this.urlentry.score();
    }
 }