yacy_search_server/source/net/yacy/kelondro/data/meta/URIMetadataNode.java

/**
 *  URIMetadataNode
 *  Copyright 2012 by Michael Peter Christen
 *  First released 10.8.2012 at http://yacy.net
 *
 *  This file is part of YaCy Content Integration
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package net.yacy.kelondro.data.meta;

import java.io.IOException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;

import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.MicroDate;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.retrieval.Response;
import net.yacy.document.SentenceReader;
import net.yacy.document.Tokenizer;
import net.yacy.document.parser.pdfParser;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.MapTools;
import net.yacy.kelondro.util.kelondroException;
import net.yacy.peers.Seed;
import net.yacy.peers.SeedDB;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryParams;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.utils.crypt;

import org.apache.solr.common.SolrDocument;


/**
 * This is the URIMetadata object implementation for Solr documents.
 * The purpose of this object is the migration from the old metadata structure to solr document.
 * Future implementations should try to replace URIMetadata objects completely by SolrDocument objects
 */
public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMetadataNode>, Comparator<URIMetadataNode> */ {
    
    private static final long serialVersionUID = -256046934741561968L;
    
    protected String keywords = null;
    protected DigestURL url;
    protected Bitfield flags = null;
    protected int imagec = -1, audioc = -1, videoc = -1, appc = -1;
    protected double lat = Double.NaN, lon = Double.NaN;
    protected float score = 0; // during generation of a search result this value is set
    protected String snippet = null;
    protected WordReferenceVars word = null; // this is only used if the url is transported via remote search requests

    // fields for search results (implemented from ResultEntry)
    private String alternative_urlstring;
    private String alternative_urlname;
    private TextSnippet textSnippet = null;

    public URIMetadataNode(final Properties prop, String collection) {
        // generates an plasmaLURLEntry using the properties from the argument
        // the property names must correspond to the one from toString
        //System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
        super();
        final String urlRaw = crypt.simpleDecode(prop.getProperty("url", ""));
        try {
            url = new DigestURL(urlRaw);
        } catch (final MalformedURLException e) {
            ConcurrentLog.logException(e);
            this.url = null;
        }
        String descr = crypt.simpleDecode(prop.getProperty("descr", "")); if (descr == null) descr = "";
        String dc_creator = crypt.simpleDecode(prop.getProperty("author", "")); if (dc_creator == null) dc_creator = "";
        String tags = crypt.simpleDecode(prop.getProperty("tags", "")); if (tags == null) tags = "";
        this.keywords = Tagging.cleanTagFromAutotagging(tags);
        String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", "")); if (dc_publisher == null) dc_publisher = "";
        String lons = crypt.simpleDecode(prop.getProperty("lon"));
        String lats = crypt.simpleDecode(prop.getProperty("lat"));
        
        this.setField(CollectionSchema.title.name(), descr);
        this.setField(CollectionSchema.author.name(), dc_creator);
        this.setField(CollectionSchema.publisher_t.name(), dc_publisher);
        this.lon = (lons == null) ? 0.0d : Double.parseDouble(lons);
        this.lat = (lats == null) ? 0.0d : Double.parseDouble(lats);

        // create new formatters to make concurrency possible
        final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);

        try {
            this.setField(CollectionSchema.last_modified.name(), formatter.parse(prop.getProperty("mod", "20000101"), 0).getTime());
        } catch (final ParseException e) {
            this.setField(CollectionSchema.last_modified.name(), new Date());
        }
        try {
            this.setField(CollectionSchema.load_date_dt.name(), formatter.parse(prop.getProperty("load", "20000101"), 0).getTime());
        } catch (final ParseException e) {
            this.setField(CollectionSchema.load_date_dt.name(), new Date());
        }
        try {
            this.setField(CollectionSchema.fresh_date_dt.name(), formatter.parse(prop.getProperty("fresh", "20000101"), 0).getTime());
        } catch (final ParseException e) {
            this.setField(CollectionSchema.fresh_date_dt.name(), new Date());
        }
        this.setField(CollectionSchema.referrer_id_s.name(), prop.getProperty("referrer", ""));
        this.setField(CollectionSchema.md5_s.name(), prop.getProperty("md5", ""));
        this.setField(CollectionSchema.size_i.name(), Integer.parseInt(prop.getProperty("size", "0")));
        this.setField(CollectionSchema.wordcount_i.name(), Integer.parseInt(prop.getProperty("wc", "0")));
        final String dt = prop.getProperty("dt", "t");
        String[] mime = Response.doctype2mime(null,dt.charAt(0));
        this.setField(CollectionSchema.content_type.name(), mime);
        final String flagsp = prop.getProperty("flags", "AAAAAA");
        this.flags = (flagsp.length() > 6) ? QueryParams.empty_constraint : (new Bitfield(4, flagsp));
        this.setField(CollectionSchema.language_s.name(), prop.getProperty("lang", ""));
        this.setField(CollectionSchema.inboundlinkscount_i.name(), Integer.parseInt(prop.getProperty("llocal", "0")));
        this.setField(CollectionSchema.outboundlinkscount_i.name(), Integer.parseInt(prop.getProperty("lother", "0")));
        this.imagec = Integer.parseInt(prop.getProperty("limage", "0"));
        this.audioc = Integer.parseInt(prop.getProperty("laudio", "0"));
        this.videoc = Integer.parseInt(prop.getProperty("lvideo", "0"));
        this.appc = Integer.parseInt(prop.getProperty("lapp", "0"));
        this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""));
        this.score = Float.parseFloat(prop.getProperty("score", "0.0"));
        List<String> cs = new ArrayList<String>();
        cs.add(collection);
        this.setField(CollectionSchema.collection_sxt.name(), cs);
        this.word = null;
        if (prop.containsKey("wi")) {
            this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))), false);
        }
    }

    public URIMetadataNode(final SolrDocument doc) {
        super();
        for (String name : doc.getFieldNames()) {
            this.addField(name, doc.getFieldValue(name));
        }
        Float scorex = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result
        this.score = scorex == null ? 0.0f : scorex.floatValue();
        final byte[] hash = ASCII.getBytes(getString(CollectionSchema.id)); // TODO: can we trust this id ?
        final String urlRaw = getString(CollectionSchema.sku);
        try {
            this.url = new DigestURL(urlRaw, hash);
        } catch (final MalformedURLException e) {
            ConcurrentLog.logException(e);
            this.url = null;
        }
    }

    public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final float scorex) {
        this(doc);
        this.word = searchedWord;
        this.score = scorex;
    }

    public URIMetadataNode (final String urlstr) {
        super();
        try {
            url = new DigestURL(urlstr);
            this.setField(CollectionSchema.sku.name(), url.toNormalform(true));
            this.setField(CollectionSchema.id.name(), ASCII.String(url.hash()));
        } catch (final MalformedURLException e) {
            ConcurrentLog.logException(e);
            this.url = null;
        }
    }
    public URIMetadataNode(DigestURL theurl) {
        super();
        url = theurl;
        this.setField(CollectionSchema.sku.name(), url.toNormalform(true));
        this.setField(CollectionSchema.id.name(), ASCII.String(url.hash()));
    }
    
    /**
     * Get the content domain of a document. This tries to get the content domain from the mime type
     * and if this fails it uses alternatively the content domain from the file extension.
     * @return the content domain which classifies the content type
     */
    public ContentDomain getContentDomain() {
        String mime = mime();
        if (mime == null) return this.url.getContentDomainFromExt();
        ContentDomain contentDomain = Classification.getContentDomainFromMime(mime);
        if (contentDomain != ContentDomain.ALL) return contentDomain;
        return this.url.getContentDomainFromExt();
    }

    public byte[] hash() {
        return this.url.hash();
    }

    public String hosthash() {
        String hosthash = (String) this.getFieldValue(CollectionSchema.host_id_s.getSolrFieldName());
        if (hosthash == null) hosthash = ASCII.String(this.url.hash(), 6, 6);
        return hosthash;
    }

    public Date moddate() {
        return getDate(CollectionSchema.last_modified);
    }

    public Date[] datesInContent() {
        return getDates(CollectionSchema.dates_in_content_dts);
    }

    public DigestURL url() {
        return this.url;
    }

    public boolean matches(Pattern pattern) {
        return pattern.matcher(this.url.toNormalform(true).toLowerCase()).matches();
        //CharacterRunAutomaton automaton = new CharacterRunAutomaton(matcher);
        //boolean match = automaton.run(this.url.toNormalform(true).toLowerCase());
        //return match;
    }

    public String dc_title() {
        ArrayList<String> a = getStringList(CollectionSchema.title);
        if (a == null || a.size() == 0) return "";
        return a.get(0);
    }

    public List<String> h1() {
        ArrayList<String> a = getStringList(CollectionSchema.h1_txt);
        if (a == null || a.size() == 0) return new ArrayList<String>(0);
        return a;
    }

    public List<String> h2() {
        ArrayList<String> a = getStringList(CollectionSchema.h2_txt);
        if (a == null || a.size() == 0) return new ArrayList<String>(0);
        return a;
    }

    public List<String> h3() {
        ArrayList<String> a = getStringList(CollectionSchema.h3_txt);
        if (a == null || a.size() == 0) return new ArrayList<String>(0);
        return a;
    }

    public List<String> h4() {
        ArrayList<String> a = getStringList(CollectionSchema.h4_txt);
        if (a == null || a.size() == 0) return new ArrayList<String>(0);
        return a;
    }

    public List<String> h5() {
        ArrayList<String> a = getStringList(CollectionSchema.h5_txt);
        if (a == null || a.size() == 0) return new ArrayList<String>(0);
        return a;
    }

    public List<String> h6() {
        ArrayList<String> a = getStringList(CollectionSchema.h6_txt);
        if (a == null || a.size() == 0) return new ArrayList<String>(0);
        return a;
    }

    public String dc_creator() {
        return getString(CollectionSchema.author);
    }

    public String dc_publisher() {
        return getString(CollectionSchema.publisher_t);
    }

    public String dc_subject() {
        if (this.keywords == null) {
            this.keywords = getString(CollectionSchema.keywords);
        }
        return this.keywords;
    }

    public double lat() {
        if (Double.isNaN(this.lat)) {
            this.lon = 0.0d;
            this.lat = 0.0d;
            String latlon = (String) this.getFieldValue(CollectionSchema.coordinate_p.getSolrFieldName());
            if (latlon != null) {
                int p = latlon.indexOf(',');
                if (p > 0) {
                    // only needed if not already checked by solr coordinate
                    if (latlon.charAt(0) <= '9') { // prevent alpha's
                        this.lat = Double.parseDouble(latlon.substring(0, p));
                        if (this.lat < -90.0d || this.lat > 90.0d) this.lat = 0.0d;
                    }

                    if ( (p < latlon.length()-1) && (latlon.charAt(p+1) <= '9') ) { 
                        this.lon=Double.parseDouble(latlon.substring(p + 1));
                        if (this.lon < -180.0d || this.lon > 180.0d) this.lon = 0.0d;
                    }
                }
            }
        }
        return this.lat;
    }

    public double lon() {
        if (Double.isNaN(this.lon)) lat();
        return this.lon;
    }

    public float score() {
        return this.score;
    }

    public Date loaddate() {
        return getDate(CollectionSchema.load_date_dt);
    }

    /**
     * Get calculated date until resource shall be considered as fresh
     * this may be a date in future
     *
     * @return Date initally calculated to (loaddate + (loaddate - lastmodified)/2)
     */
    public Date freshdate() {
        // getDate() can't be used as it checks for date <= now
        Date x = (Date) this.getFieldValue(CollectionSchema.fresh_date_dt.getSolrFieldName());
        if (x == null) return new Date(0);
        return x;
    }

    public String md5() {
        return getString(CollectionSchema.md5_s);
    }

    public char doctype() {
        ArrayList<String> a = getStringList(CollectionSchema.content_type);
        if (a == null || a.size() == 0) return Response.docType(url());
        return Response.docType(a.get(0));
    }

    public String mime() {
        ArrayList<String> mime = getStringList(CollectionSchema.content_type);
        return mime == null || mime.size() == 0 ? null : mime.get(0);
    }

    public String language() {
        String language = getString(CollectionSchema.language_s);
        if (language == null || language.length() == 0) return "en";
        return language;
    }

    public byte[] referrerHash() {
        String  referrer = getString(CollectionSchema.referrer_id_s);
        if (referrer == null || referrer.length() == 0) return null;
        return ASCII.getBytes(referrer);
    }

    /**
     * gives the size in byte of the original url document
     * @return filesize of url
     */
    public int filesize() {
        return getInt(CollectionSchema.size_i);
    }

    public Bitfield flags() {
        if (flags == null) {
            this.flags = new Bitfield();
            if (dc_subject() != null && dc_subject().indexOf("indexof") >= 0) this.flags.set(Tokenizer.flag_cat_indexof, true);
            ContentDomain cd = getContentDomain();
            if (lon() != 0.0d || lat() != 0.0d) this.flags.set(Tokenizer.flag_cat_haslocation, true);
            if (cd == ContentDomain.IMAGE || limage() > 0) this.flags.set(Tokenizer.flag_cat_hasimage, true);
            if (cd == ContentDomain.AUDIO || laudio() > 0) this.flags.set(Tokenizer.flag_cat_hasaudio, true);
            if (cd == ContentDomain.VIDEO || lvideo() > 0) this.flags.set(Tokenizer.flag_cat_hasvideo, true);
            if (cd == ContentDomain.APP) this.flags.set(Tokenizer.flag_cat_hasapp, true);
            if (lapp() > 0) this.flags.set(Tokenizer.flag_cat_hasapp, true);
        }
        return this.flags;
    }

    public int wordCount() {
        return getInt(CollectionSchema.wordcount_i);
    }

    public int llocal() {
        return getInt(CollectionSchema.inboundlinkscount_i);
    }

    public int lother() {
        return getInt(CollectionSchema.outboundlinkscount_i);
    }

    public int limage() {
        if (this.imagec == -1) {
            this.imagec = getInt(CollectionSchema.imagescount_i);
        }
        return this.imagec;
    }

    public int laudio() {
        if (this.audioc == -1) {
            this.audioc = getInt(CollectionSchema.audiolinkscount_i);
        }
        return this.audioc;
    }

    public int lvideo() {
        if (this.videoc == -1) {
            this.videoc = getInt(CollectionSchema.videolinkscount_i);
        }
        return this.videoc;
    }

    public int lapp() {
        if (this.appc == -1) {
            this.appc = getInt(CollectionSchema.applinkscount_i);
        }
        return this.appc;
    }

    public int virtualAge() {
        return MicroDate.microDateDays(moddate());
    }

    public int wordsintitle() {
        ArrayList<Integer>  x = getIntList(CollectionSchema.title_words_val);
        if (x == null || x.size() == 0) return 0;
        return x.get(0).intValue();
    }

    public int urllength() {
        return getInt(CollectionSchema.url_chars_i);
    }

    public String snippet() {
        return this.snippet;
    }

    public String[] collections() {
        ArrayList<String> a = getStringList(CollectionSchema.collection_sxt);
        return a.toArray(new String[a.size()]);
    }

    public WordReferenceVars word() {
        return this.word;
    }

    public static Iterator<String> getLinks(SolrDocument doc, boolean inbound) {
        Collection<Object> urlstub = doc.getFieldValues((inbound ? CollectionSchema.inboundlinks_urlstub_sxt :  CollectionSchema.outboundlinks_urlstub_sxt).getSolrFieldName());
        Collection<String> urlprot = urlstub == null ? null : CollectionConfiguration.indexedList2protocolList(doc.getFieldValues((inbound ? CollectionSchema.inboundlinks_protocol_sxt : CollectionSchema.outboundlinks_protocol_sxt).getSolrFieldName()), urlstub.size());
        String u;
        LinkedHashSet<String> list = new LinkedHashSet<String>();
        if (urlprot != null && urlstub != null) {
            assert urlprot.size() == urlstub.size();
            Object[] urlprota = urlprot.toArray();
            Object[] urlstuba = urlstub.toArray();
            for (int i = 0; i < urlprota.length; i++) {
                u = ((String) urlprota[i]) + "://" + ((String) urlstuba[i]);
                int hp = u.indexOf('#');
                if (hp > 0) u = u.substring(0, hp);
                list.add(u);
            }
        }
        return list.iterator();
    }

    public static Date getDate(SolrDocument doc, final CollectionSchema key) {
        Date x = doc == null ? null : (Date) doc.getFieldValue(key.getSolrFieldName());
        Date now = new Date();
        return (x == null) ? new Date(0) : x.after(now) ? now : x;
    }

    public String getText() {
        return getString(CollectionSchema.text_t);
    }

    public List<StringBuilder> getSentences(final boolean pre) {
        List<StringBuilder> sentences = new ArrayList<>();
        String text = this.getText();
        if (text == null || text.length() == 0) return sentences;
        SentenceReader sr = new SentenceReader(text, pre);
        while (sr.hasNext()) sentences.add(sr.next());
        sr.close();
        sr = null;
        text = null;
        return sentences;
    }
    
    public ArrayList<String> getDescription() {
        return getStringList(CollectionSchema.description_txt);
    }    

    public static URIMetadataNode importEntry(final String propStr, String collection) {
        if (propStr == null || propStr.isEmpty() || propStr.charAt(0) != '{' || !propStr.endsWith("}")) {
            ConcurrentLog.severe("URIMetadataNode", "importEntry: propStr is not proper: " + propStr);
            return null;
        }
        try {
            return new URIMetadataNode(MapTools.s2p(propStr.substring(1, propStr.length() - 1)), collection);
        } catch (final kelondroException e) {
            // wrong format
            ConcurrentLog.severe("URIMetadataNode", e.getMessage());
            return null;
        }
    }

    protected StringBuilder corePropList() {
        // generate a parseable string; this is a simple property-list
        final StringBuilder s = new StringBuilder(300);

        // create new formatters to make concurrency possible
        final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);

        try {
            s.append("hash=").append(ASCII.String(this.hash()));
            s.append(",url=").append(crypt.simpleEncode(this.url().toNormalform(true)));
            s.append(",descr=").append(crypt.simpleEncode(this.dc_title()));
            s.append(",author=").append(crypt.simpleEncode(this.dc_creator()));
            s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(this.dc_subject())));
            s.append(",publisher=").append(crypt.simpleEncode(this.dc_publisher()));
            s.append(",lat=").append(this.lat());
            s.append(",lon=").append(this.lon());
            s.append(",mod=").append(formatter.format(this.moddate()));
            s.append(",load=").append(formatter.format(this.loaddate()));
            s.append(",fresh=").append(formatter.format(this.freshdate()));
            s.append(",referrer=").append(this.referrerHash() == null ? "" : ASCII.String(this.referrerHash()));
            s.append(",md5=").append(this.md5());
            s.append(",size=").append(this.filesize());
            s.append(",wc=").append(this.wordCount());
            s.append(",dt=").append(this.doctype());
            s.append(",flags=").append(this.flags().exportB64());
            s.append(",lang=").append(this.language());
            s.append(",llocal=").append(this.llocal());
            s.append(",lother=").append(this.lother());
            s.append(",limage=").append(this.limage());
            s.append(",laudio=").append(this.laudio());
            s.append(",lvideo=").append(this.lvideo());
            s.append(",lapp=").append(this.lapp());
            s.append(",score=").append(Float.toString(this.score()));
            if (this.word() != null) {
                // append also word properties
                final String wprop = this.word().toPropertyForm();
                s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
            }
            return s;
        } catch (final Throwable e) {
            ConcurrentLog.logException(e);
            return null;
        }
    }

    /**
     * the toString format must be completely identical to URIMetadataRow because that is used
     * to transport the data over p2p connections.
     */
    public String toString(String snippet) {
        // add information needed for remote transport
        final StringBuilder core = corePropList();
        if (core == null)
            return null;

        core.ensureCapacity(core.length() + snippet.length() * 2);
        core.insert(0, '{');
        core.append(",snippet=").append(crypt.simpleEncode(snippet));
        core.append('}');

        return core.toString();
        //return "{" + core + ",snippet=" + crypt.simpleEncode(snippet) + "}";
    }


    /**
     * @return the object as String.<br>
     * This e.g. looks like this:
     * <pre>{hash=jmqfMk7Y3NKw,referrer=------------,mod=20050610,load=20051003,size=51666,wc=1392,cc=0,local=true,q=AEn,dt=h,lang=uk,url=b|aHR0cDovL3d3dy50cmFuc3BhcmVuY3kub3JnL3N1cnZleXMv,descr=b|S25vd2xlZGdlIENlbnRyZTogQ29ycnVwdGlvbiBTdXJ2ZXlzIGFuZCBJbmRpY2Vz}</pre>
     */
    @Override
    public String toString() {
        final StringBuilder core = corePropList();
        if (core == null) return null;
        core.insert(0, '{');
        core.append('}');
        return core.toString();
    }
    
    private int getInt(CollectionSchema field) {
        assert !field.isMultiValued();
        assert field.getType() == SolrType.num_integer;
        Object x = this.getFieldValue(field.getSolrFieldName());
        if (x == null) return 0;
        if (x instanceof Integer) return ((Integer) x).intValue();
        if (x instanceof Long) return ((Long) x).intValue();
        return 0;
    }

    private Date getDate(CollectionSchema field) {
        assert !field.isMultiValued();
        assert field.getType() == SolrType.date;
        Date x = (Date) this.getFieldValue(field.getSolrFieldName());
        if (x == null) return new Date(0);
        Date now = new Date();
        return x.after(now) ? now : x;
    }

    private Date[] getDates(CollectionSchema field) {
        assert field.isMultiValued();
        assert field.getType() == SolrType.date;
        @SuppressWarnings("unchecked")
        List<Date> x = (List<Date>) this.getFieldValue(field.getSolrFieldName());
        if (x == null) return new Date[0];
        return x.toArray(new Date[x.size()]);
    }

    private String getString(CollectionSchema field) {
        assert !field.isMultiValued();
        assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight;
        Object x = this.getFieldValue(field.getSolrFieldName());
        if (x == null) return "";
        if (x instanceof ArrayList) {
            @SuppressWarnings("unchecked")
            ArrayList<String> xa = (ArrayList<String>) x;
            return xa.size() == 0 ? "" : xa.get(0);
        }
        return (String) x;
    }

    @SuppressWarnings("unchecked")
    private ArrayList<String> getStringList(CollectionSchema field) {
        assert field.isMultiValued();
        assert field.getType() == SolrType.string || field.getType() == SolrType.text_general;
        Object r = this.getFieldValue(field.getSolrFieldName());
        if (r == null) return new ArrayList<String>(0);
        if (r instanceof ArrayList) {
            return (ArrayList<String>) r;
        }
        ArrayList<String> a = new ArrayList<String>(1);
        a.add((String) r);
        return a;
    }
    
    @SuppressWarnings("unchecked")
    private ArrayList<Integer> getIntList(CollectionSchema field) {
        assert field.isMultiValued();
        assert field.getType() == SolrType.num_integer;
        Object r = this.getFieldValue(field.getSolrFieldName());
        if (r == null) return new ArrayList<Integer>(0);
        if (r instanceof ArrayList) {
            return (ArrayList<Integer>) r;
        }
        ArrayList<Integer> a = new ArrayList<Integer>(1);
        a.add((Integer) r);
        return a;
    }

    // --- implementation for use as search result ----------
    /**
     * Initialisize some variables only needed for search results
     * and eleminates underlaying fields not needed for search results
     *
     * ! never put this back to the index because of the reduced content fields
     * @param indexSegment
     * @param peers
     * @param textSnippet
     * @return
     */
    public URIMetadataNode makeResultEntry(
                       final Segment indexSegment,
                       SeedDB peers,
                       final TextSnippet textSnippet) {
        this.removeFields(CollectionSchema.text_t.getSolrFieldName()); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
        //this.indexSegment = indexSegment;
        this.alternative_urlstring = null;
        this.alternative_urlname = null;
        this.textSnippet = textSnippet;
        final String host = this.url().getHost();
        if (host != null && host.endsWith(".yacyh")) {
            // translate host into current IP
            int p = host.indexOf('.');
            final String hash = Seed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
            final Seed seed = peers.getConnected(hash);
            final String path = this.url().getFile();
            String address = null;
            if ((seed == null) || ((address = seed.getPublicAddress(seed.getIP())) == null)) {
                // seed is not known from here
                try {
                    if (indexSegment.termIndex() != null) indexSegment.termIndex().remove(
                        Word.words2hashesHandles(Tokenizer.getWords(
                            ("yacyshare " +
                             path.replace('?', ' ') +
                             " " +
                             this.dc_title()), null).keySet()),
                             this.hash());
                } catch (final IOException e) {
                    ConcurrentLog.logException(e);
                }
                indexSegment.fulltext().remove(this.hash()); // clean up
            }
            this.alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + path;
            this.alternative_urlname = "http://" + seed.getName() + ".yacy" + path;
            if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
        }
        return this;
    }
    /**
     * used for search result entry
     */
    public String urlstring() {
        if (this.alternative_urlstring != null) return this.alternative_urlstring;
        
        if (!pdfParser.individualPages) return this.url().toNormalform(true);
        if (!"pdf".equals(MultiProtocolURL.getFileExtension(this.url().getFileName()).toLowerCase())) return this.url().toNormalform(true);
        // for pdf links we rewrite the url
        // this is a special treatment of pdf files which can be splitted into subpages
        String pageprop = pdfParser.individualPagePropertyname;
        String resultUrlstring = this.url().toNormalform(true);
        int p = resultUrlstring.lastIndexOf(pageprop + "=");
        if (p > 0) {
          return resultUrlstring.substring(0, p - 1) + "#page=" + resultUrlstring.substring(p + pageprop.length() + 1);
        }
        return resultUrlstring;
    }
    /**
     * used for search result entry
     */
    public String urlname() {
        return (this.alternative_urlname == null) ? MultiProtocolURL.unescape(urlstring()) : this.alternative_urlname;
    }
    /**
     * used for search result entry
     */
    public String title() {
        String titlestr = this.dc_title();
        // if title is empty use filename as title
        if (titlestr.isEmpty()) { // if url has no filename, title is still empty (e.g. "www.host.com/" )
            titlestr = this.url() != null ? this.url().getFileName() : "";
        }
        return titlestr;
    }
    /**
     * used for search result entry
     */
    public TextSnippet textSnippet() {
        return this.textSnippet;
    }
    /**
     * used for search result entry
     */
    public Date[] events() {
        return this.datesInContent();
    }
    /**
     * used for search result entry
     */
    public boolean hasTextSnippet() {
        return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
    }
    /**
     * used for search result entry
     */
    public String resource() {
        // generate transport resource
        if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
            return this.toString();
        }
        return this.toString(this.textSnippet.getLineRaw());
    }
    
    @Override
    public int hashCode() {
        return this.url().hashCode();
    }
}