// QueryParams.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// Created: 10.10.2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

package net.yacy.search.query;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.RegExp;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.FacetParams;

import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.geo.GeoLocation;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.LibraryProvider;
import net.yacy.document.ProbabilisticClassifier;
import net.yacy.document.Tokenizer;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.Seed;
import net.yacy.search.index.Segment;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;

public final class QueryParams {

	/** The default max count of item lines in navigator */
    public static final int FACETS_STANDARD_MAXCOUNT_DEFAULT = 100;
    
    /** The default maximum number of date elements in the date navigator */
    public static final int FACETS_DATE_MAXCOUNT_DEFAULT = 640;
    
    public enum Searchdom {
        LOCAL, CLUSTER, GLOBAL;

        @Override
        public String toString() {
            if (this == LOCAL) return "local";
            else if (this == CLUSTER) return "global"; // yes thats right: global, not cluster because a cluster search is a global search
            else if (this == GLOBAL) return "global";
            return "local";
        }
    }

    private static final Map<String, CollectionSchema> defaultfacetfields = new HashMap<String, CollectionSchema>();
    static {
        // the key shall match with configuration property search.navigation
        // defaultfacetfields.put("location", CollectionSchema.coordinate_p_0_coordinate); // coordinate_p can't be used for facet (subfields), as value isn't used subfield can be used
        defaultfacetfields.put("hosts", CollectionSchema.host_s);
        defaultfacetfields.put("protocol", CollectionSchema.url_protocol_s);
        defaultfacetfields.put("filetype", CollectionSchema.url_file_ext_s);
        defaultfacetfields.put("date", CollectionSchema.dates_in_content_dts);
        defaultfacetfields.put("authors", CollectionSchema.author_sxt);
        defaultfacetfields.put("collections", CollectionSchema.collection_sxt);
        defaultfacetfields.put("language", CollectionSchema.language_s);
        //missing: namespace
    }
    
    public static final Bitfield empty_constraint    = new Bitfield(4, "AAAAAA");
    public static final Pattern catchall_pattern = Pattern.compile(".*");

    private final QueryGoal queryGoal;
    public int itemsPerPage;
    public int offset;
    
    /** The URL mask pattern compiled from the urlMasString. 
     * Null when the urlMaskString is not user provided but generated from the query modifiers */
    public Pattern urlMaskPattern;
    public Automaton urlMaskAutomaton;
    public String urlMaskString;

    public final Pattern prefer;
    public final String tld, inlink;
    
    /** true when the urlMasString is just a catch all pattern such as ".*" */
    boolean urlMask_isCatchall;
    
    /** Content-Type classification of expected results */
    public final Classification.ContentDomain contentdom;
    
	/**
	 * <p>When false, results can be extended to documents including links to documents
	 * of {@link #contentdom} type, whithout being themselves of that type.</p>
	 * Examples :
	 * <ul>
	 * <li>contentdom == IMAGE, strictContentDom == true
	 *  <ul>
	 *   <li>jpeg image : acceptable result</li>
	 * 	 <li>html page embedding images : rejected</li>
	 *  </ul>
	 * </li>
	 * <li>contentdom == IMAGE, strictContentDom == false
	 *  <ul>
	 *   <li>jpeg image : acceptable result</li>
	 * 	 <li>html page embedding images : acceptable result</li>
	 *  </ul>
	 * </li>
	 * </ul> 
	 */
    private boolean strictContentDom = false;
    
    public final String targetlang;
    protected final Collection<Tagging.Metatag> metatags;
    public final Searchdom domType;
    private final int zonecode;
    public final int maxDistance;
    public final Bitfield constraint;
    public final boolean allofconstraint;
    protected CacheStrategy snippetCacheStrategy;
    public final RankingProfile ranking;
    private final Segment indexSegment;
    public final String clienthost; // this is the client host that starts the query, not a site operator
    protected final Set<String> siteexcludes; // set of domain hashes that are excluded if not included by sitehash
    public final QueryModifier modifier;
    public Seed remotepeer;
    public final long starttime; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds)
    protected final long maxtime;
    // values that are set after a search:
    public int transmitcount; // number of results that had been shown to the user
    public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
    public final String userAgent;
    protected double lat, lon, radius;
    public LinkedHashSet<String> facetfields;
    private SolrQuery cachedQuery;
    private CollectionConfiguration solrSchema;
    public final int timezoneOffset;
    
    /** The max count of item lines in navigator */
    private int standardFacetsMaxCount;
    
    /** The maximum number of date elements in the date navigator */
    private int dateFacetMaxCount;
    

    public QueryParams(
        final QueryGoal queryGoal,
        final QueryModifier modifier,
        final int maxDistance,
        final String prefer,
        final ContentDomain contentdom,
        final String language,
        final int timezoneOffset,
        final Collection<Tagging.Metatag> metatags,
        final CacheStrategy snippetCacheStrategy,
        final int itemsPerPage,
        final int offset,
        final String urlMask,
        final String tld,
        final String inlink,
        final Searchdom domType,
        final Bitfield constraint,
        final boolean allofconstraint,
        final Set<String> siteexcludes,
        final int domainzone,
        final String host,
        final boolean specialRights,
        final Segment indexSegment,
        final RankingProfile ranking,
        final String userAgent,
        final double lat,
        final double lon,
        final double radius,
        final String[] search_navigation
        ) {
        this.queryGoal = queryGoal;
        this.modifier = modifier;
        this.ranking = ranking;
        this.maxDistance = maxDistance;
        this.contentdom = contentdom;
        this.timezoneOffset = timezoneOffset;
        this.itemsPerPage = Math.min((specialRights) ? 10000 : 1000, itemsPerPage);
        if(domType == Searchdom.LOCAL) {
        	/* No offset restriction on local index only requests, as only itemsPerPage will be loaded */
        	this.offset = Math.max(0, offset);
        } else {
        	/* Offset has to be limited on requests mixing local and remote results, because all results before offset are loaded */
        	this.offset = Math.max(0, Math.min((specialRights) ? 10000 - this.itemsPerPage : 1000 - this.itemsPerPage, offset));
        }
        try {
            this.urlMaskString = urlMask;
            // solr doesn't like slashes, backslashes or doublepoints; remove them // urlmask = ".*\\." + ft + "(\\?.*)?";
            int p;
            while ((p = this.urlMaskString.indexOf(':')) >= 0) this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 1);
            while ((p = this.urlMaskString.indexOf('/')) >= 0) this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 1);
            while ((p = this.urlMaskString.indexOf('\\')) >= 0) this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 2);
            this.urlMaskAutomaton = Automata.makeString(this.urlMaskString);
            this.urlMaskPattern = Pattern.compile(this.urlMaskString);
        } catch (final Throwable ex) {
            throw new IllegalArgumentException("Not a valid regular expression: " + urlMask, ex);
        }
        this.urlMask_isCatchall = this.urlMaskString.equals(catchall_pattern.toString());
        if (this.urlMask_isCatchall) {
            final String filter = QueryParams.buildApproximateURLFilter(modifier, tld);
            if (!QueryParams.catchall_pattern.toString().equals(filter)) {
                this.urlMaskString = filter;
                this.urlMaskAutomaton = Automata.makeString(filter);
                this.urlMask_isCatchall = false;
                /* We let here the urlMaskPattern null :
                 * final URL match checking will be made with the more accurate matchesURL function */
                this.urlMaskPattern = null;
            }
        }
        this.tld = tld;
        this.inlink = inlink;
        try {
            this.prefer = Pattern.compile(prefer);
        } catch (final PatternSyntaxException ex) {
            throw new IllegalArgumentException("Not a valid regular expression: " + prefer, ex);
        }
        assert language != null;
        this.targetlang = language;
        this.metatags = metatags;
        this.domType = domType;
        this.zonecode = domainzone;
        this.constraint = constraint;
        this.allofconstraint = allofconstraint;
        this.siteexcludes = siteexcludes != null && siteexcludes.isEmpty() ? null: siteexcludes;
        this.snippetCacheStrategy = snippetCacheStrategy;
        this.clienthost = host;
        this.remotepeer = null;
        this.starttime = Long.valueOf(System.currentTimeMillis());
        this.maxtime = 10000;
        this.indexSegment = indexSegment;
        this.userAgent = userAgent;
        this.transmitcount = 0;
        // we normalize here the location and radius because that should cause a better caching
        // and as surplus it will increase privacy
        this.lat = Math.floor(lat * this.kmNormal) / this.kmNormal;
        this.lon = Math.floor(lon * this.kmNormal) / this.kmNormal;
        this.radius = Math.floor(radius * this.kmNormal + 1) / this.kmNormal;
        this.facetfields = new LinkedHashSet<String>();
        
        this.solrSchema = indexSegment.fulltext().getDefaultConfiguration();
        for (String navkey: search_navigation) {
            CollectionSchema f = defaultfacetfields.get(navkey);
            // handle special field, authors_sxt (add to facet w/o contains check, as authors_sxt is not enabled (is copyfield))
            // dto. for coordinate_p_0_coordinate is not enabled but used for location facet (because coordinate_p not valid for facet field)
            if (f != null && (solrSchema.contains(f) || f.name().equals("author_sxt") || f.name().equals("coordinate_p_0_coordinate") ))
                this.facetfields.add(f.getSolrFieldName());
        }
        if (LibraryProvider.autotagging != null) for (Tagging v: LibraryProvider.autotagging.getVocabularies()) {
            if (v.isFacet()) {
                this.facetfields.add(CollectionSchema.VOCABULARY_PREFIX + v.getName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
            }
        }
        for (String context: ProbabilisticClassifier.getContextNames()) {
            this.facetfields.add(CollectionSchema.VOCABULARY_PREFIX + context + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
        }
        this.cachedQuery = null;
        this.standardFacetsMaxCount = FACETS_STANDARD_MAXCOUNT_DEFAULT;
        this.dateFacetMaxCount = FACETS_DATE_MAXCOUNT_DEFAULT;
    }

	/**
	 * Generate an URL filter from the query modifier and eventual tld, usable as a
	 * first approximation for filtering, and compatible with the yacy/search
	 * API.<br/>
	 * For truly accurate filtering, checking constraints against parsed URLs in 
	 * MultiprotocolURL instances is easier and more reliable than building a complex regular
	 * expression that must be both compatible with the JDK {@link Pattern} and with Lucene {@link RegExp}.
	 * 
	 * @param modifier
	 *            query modifier with eventual protocol, sitehost and filetype
	 *            constraints. The modifier parameter itselft must not be null.
	 * @param tld
	 *            an eventual Top Level Domain name
	 * @return an URL filter regular expression from the provided modifier and tld
	 *         constraints, matching anything when there are no constraints at all.
	 */
	protected static String buildApproximateURLFilter(final QueryModifier modifier, final String tld) {
		final String protocolfilter = modifier.protocol == null ? ".*" : modifier.protocol;
		final String defaulthostprefix = "www";
		final String hostfilter;
		if(modifier.sitehost == null && tld == null) {
			hostfilter = ".*";
		} else if(modifier.sitehost == null) {
			hostfilter = ".*\\." + tld;
		} else if(modifier.sitehost.startsWith(defaulthostprefix + ".")){
			hostfilter = "(" + defaulthostprefix + "\\.)?" + modifier.sitehost.substring(4);
		} else {
			hostfilter = "(" + defaulthostprefix + "\\.)?" + modifier.sitehost;
		}
		final String filefilter = modifier.filetype == null ? ".*" : ".*" + modifier.filetype + ".*"; // TODO: should be ".ext" but while/comment above suggests not -> add filetype contrain pullOneFilteredFromRWI()
		String filter = protocolfilter + "..." + hostfilter + "." + filefilter;
        if (!filter.equals(".*....*..*")) {
        	/* Remove redundant sequences of catch all expressions */
            Pattern r = Pattern.compile("(\\.|(\\.\\*))\\.\\*");
            Matcher m;
            while ((m = r.matcher(filter)).find()) {
            	filter = m.replaceAll(".*");
            }
        } else {
			filter = QueryParams.catchall_pattern.toString();
		}
		return filter;
	}

    private double kmNormal = 100.d; // 100 =ca 40000.d / 360.d == 111.11 - if lat/lon is multiplied with this, rounded and diveded by this, the location is normalized to a 1km grid

    public Segment getSegment() {
        return this.indexSegment;
    }

    public int neededResults() {
        // the number of result lines that must be computed
        return this.offset + this.itemsPerPage;
    }

    public int itemsPerPage() {
        // the number of result lines that are displayed at once (size of result page)
        return this.itemsPerPage;
    }
    
    public void setOffset(final int newOffset) {
        this.offset = newOffset;
    }

    public boolean isLocal() {
        return this.domType == Searchdom.LOCAL;
    }
    
    /**
     * @return the max count of item lines in standard navigators
     */
    public int getStandardFacetsMaxCount() {
		return this.standardFacetsMaxCount;
	}
    
    /**
     * @param standardFacetsMaxCount the max count of item lines in standard navigators
     */
    public void setStandardFacetsMaxCount(final int standardFacetsMaxCount) {
		this.standardFacetsMaxCount = standardFacetsMaxCount;
	}
    
    /**
     * @return the maximum number of date elements in the date navigator
     */
    public int getDateFacetMaxCount() {
		return this.dateFacetMaxCount;
	}
    
    /**
     * @param dateFacetMaxCount the maximum number of date elements in the date navigator
     */
    public void setDateFacetMaxCount(final int dateFacetMaxCount) {
		this.dateFacetMaxCount = dateFacetMaxCount;
	}
    
    /**
     * @return false when results can be extended to documents including links to documents ot contentdom type.
     */
    public boolean isStrictContentDom() {
		return this.strictContentDom;
	}
    
    /**
     * @param strictContentDom when false, results can be extended to documents including links to documents ot contentdom type.
     */
    public void setStrictContentDom(final boolean strictContentDom) {
		this.strictContentDom = strictContentDom;
	}

    public static HandleSet hashes2Set(final String query) {
        final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
        if (query != null) {
            for (int i = 0; i < (query.length() / Word.commonHashLength); i++) try {
                keyhashes.put(ASCII.getBytes(query.substring(i * Word.commonHashLength, (i + 1) * Word.commonHashLength)));
            } catch (final SpaceExceededException e) {
                ConcurrentLog.logException(e);
            }
        }
        return keyhashes;
    }

    public static String hashSet2hashString(final HandleSet hashes) {
        final byte[] bb = new byte[hashes.size() * Word.commonHashLength];
        int p = 0;
        for (final byte[] b : hashes) {
            assert b.length == Word.commonHashLength : "hash = " + ASCII.String(b);
            System.arraycopy(b, 0, bb, p, Word.commonHashLength);
            p += Word.commonHashLength;
        }
        return ASCII.String(bb);
    }

    public static String hashSet2hashString(final Set<String> hashes) {
        final byte[] bb = new byte[hashes.size() * Word.commonHashLength];
        int p = 0;
        for (final String s : hashes) {
            assert s.length() == Word.commonHashLength : "hash = " + s;
            System.arraycopy(ASCII.getBytes(s), 0, bb, p, Word.commonHashLength);
            p += Word.commonHashLength;
        }
        return ASCII.String(bb);
    }

    public static String anonymizedQueryHashes(final HandleSet hashes) {
        // create a more anonymized representation of a query hashes for logging
        final Iterator<byte[]> i = hashes.iterator();
        final StringBuilder sb = new StringBuilder(hashes.size() * (Word.commonHashLength + 2) + 2);
        sb.append("[");
        byte[] hash;
        if (i.hasNext()) {
            hash = i.next();
            sb.append(ASCII.String(hash).substring(0, 3)).append(".........");
        }
        while (i.hasNext()) {
            hash = i.next();
            sb.append(", ").append(ASCII.String(hash).substring(0, 3)).append(".........");
        }
        sb.append("]");
        return sb.toString();
    }
    
	/**
	 * Check wheter the given URL matches the eventual modifier and top-level domain
	 * constraints. Should be preferred as more accurate than the url mask pattern generated with
	 * {@link #buildApproximateURLFilter(QueryModifier, String)}.
	 * 
	 * @param modifier
	 *            the query modifier with eventual constraints on protocoln, host
	 *            name or file extension
	 * @param tld
	 *            an eventual top-level domain name to filter on
	 * @param url
	 *            the url to check
	 * @return the constraint that did not match ("url" when url is null,
	 *         "protocol", "sitehost", "tld", or "filetype"), or the empty string
	 *         when the url matches
	 */
	public static String matchesURL(final QueryModifier modifier, final String tld, final MultiProtocolURL url) {
		if (url == null) {
			return "url";
		}
		if (modifier != null) {
			if (modifier.protocol != null) {
				if (!modifier.protocol.equalsIgnoreCase(url.getProtocol())) {
					return "protocol";
				}
			}
			if (modifier.sitehost != null) {
				/*
				 * consider to search for hosts with 'www'-prefix, if not already part of the
				 * host name
				 */
				final String wwwPrefix = "www.";
				final String host;
				final String hostWithWwwPrefix;
				if (modifier.sitehost.startsWith(wwwPrefix)) {
					hostWithWwwPrefix = modifier.sitehost;
					host = modifier.sitehost.substring(wwwPrefix.length());
				} else {
					hostWithWwwPrefix = wwwPrefix + modifier.sitehost;
					host = modifier.sitehost;
				}
				if (!host.equalsIgnoreCase(url.getHost()) && !hostWithWwwPrefix.equals(url.getHost())) {
					return "sitehost";
				}
			}
			if (tld != null) {
				if (!tld.equalsIgnoreCase(url.getTLD())) {
					return "tld";
				}
			}
			if (modifier.filetype != null) {
				if (!modifier.filetype.equalsIgnoreCase(MultiProtocolURL.getFileExtension(url.getFileName()))) {
					return "filetype";
				}
			}
		}
		return "";
	}

    /**
     * check if the given text matches with the query
     * this checks inclusion and exclusion words
     * @param text
     * @return true if the query matches with the given text
     */
    private final boolean matchesText(final String text) {
        boolean ret = false;
        QueryGoal.NormalizedWords words = new QueryGoal.NormalizedWords(Tokenizer.getWords(text, null).keySet());
        if (!SetTools.anymatchByTest(this.queryGoal.getExcludeWords(), words)) {
            ret = SetTools.totalInclusion(this.queryGoal.getIncludeWords(), words);
        }
        return ret;
    }
    
    protected static final boolean anymatch(final String text, final Iterator<String> keywords) {
        if (keywords == null || !keywords.hasNext()) return false;
        final SortedSet<String> textwords = (SortedSet<String>) Tokenizer.getWords(text, null).keySet();
        return SetTools.anymatchByTest(keywords, textwords);
    }

    public SolrQuery solrQuery(final ContentDomain cd, final boolean strictContentDom, final boolean getFacets, final boolean excludeintext_image) {
        if (cd == ContentDomain.IMAGE) {
        	return solrImageQuery(getFacets, strictContentDom);
        }
        final List<String> filterQueries;
		switch (cd) {
		case AUDIO:
			filterQueries = this.queryGoal.collectionAudioFilterQuery(strictContentDom);
			break;
		case VIDEO:
			filterQueries = this.queryGoal.collectionVideoFilterQuery(strictContentDom);
			break;
		case APP:
			filterQueries = this.queryGoal.collectionApplicationFilterQuery(strictContentDom);
			break;
		default:
			filterQueries = this.queryGoal.collectionTextFilterQuery(excludeintext_image);
			break;
		}
        return solrQuery(getFacets, filterQueries);
    }
    
    /**
     * @param getFacets when true, generate facets for fiels given in this.facetfields
     * @param filterQueries a mutable list of filter queries, initialized with filters related to content domain. Must not be null.
     * @return a Solr query instance ready to use
     */
    private SolrQuery solrQuery(final boolean getFacets, final List<String> filterQueries) {
        if (this.cachedQuery != null) {
            this.cachedQuery.setStart(this.offset);
            if (!getFacets) this.cachedQuery.setFacet(false);
            return this.cachedQuery;
        }
        
        // construct query
        final SolrQuery params = getBasicParams(getFacets, filterQueries);
        int rankingProfile = this.ranking.coeff_date == RankingProfile.COEFF_MAX ? 1 : (this.modifier.sitehash != null || this.modifier.sitehost != null) ? 2 : 0;
        params.setQuery(this.queryGoal.collectionTextQuery().toString());
        Ranking actRanking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile

        String fq = actRanking.getFilterQuery();
        String bq = actRanking.getBoostQuery();
        String bf = actRanking.getBoostFunction();
        final String qf = actRanking.getQueryFields();
        if (!qf.isEmpty()) params.setParam(DisMaxParams.QF, qf);
        if (this.queryGoal.getIncludeSize() > 1) {
            // add boost on combined words
            if (bq.length() > 0) bq += "\n";
            bq += CollectionSchema.text_t.getSolrFieldName() + ":\"" + this.queryGoal.getIncludeString() + "\"^10";
        }
        if (fq.length() > 0) {
            String[] oldfq = params.getFilterQueries();
            ArrayList<String> newfq = new ArrayList<>(oldfq.length + 1);
            for (String x: oldfq) newfq.add(x);
            newfq.add(fq);
            params.setFilterQueries(newfq.toArray(new String[newfq.size()]));
        }
        if (bq.length() > 0) params.setParam(DisMaxParams.BQ, bq.split("[\\r\\n]+")); // split on any sequence consisting of CR and/or LF
        if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29
        
        // prepare result
        ConcurrentLog.info("Protocol", "SOLR QUERY: " + params.toString());
        this.cachedQuery = params;
        return params;
    }
    
    private SolrQuery solrImageQuery(final boolean getFacets, final boolean strictContentDom) {
        if (this.cachedQuery != null) {
            this.cachedQuery.setStart(this.offset);
            if (!getFacets) this.cachedQuery.setFacet(false);
            return this.cachedQuery;
        }
        
        // construct query
        final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionImageFilterQuery(strictContentDom));
        params.setQuery(this.queryGoal.collectionImageQuery(this.modifier).toString());
        
        if(!strictContentDom) {
        	// set boosts
        	StringBuilder bq = new StringBuilder();
        	bq.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"jpg\"");
        	bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tif\"");
        	bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tiff\"");
        	bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"png\"");
        	params.setParam(DisMaxParams.BQ, bq.toString());
        }
        
        // prepare result
        ConcurrentLog.info("Protocol", "SOLR QUERY: " + params.toString());
        this.cachedQuery = params;
        return params;
    }
    
    private SolrQuery getBasicParams(final boolean getFacets, final List<String> fqs) {
        final SolrQuery params = new SolrQuery();
        params.setParam("defType", "edismax");
        params.setParam(DisMaxParams.QF, CollectionSchema.text_t.getSolrFieldName() + "^1.0");
        params.setStart(this.offset);
        params.setRows(this.itemsPerPage);
        params.setFacet(false);

        if (this.ranking.coeff_date == RankingProfile.COEFF_MAX) {
            // set a most-recent ordering
            params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc));
            //params.setSortField(CollectionSchema.last_modified.getSolrFieldName(), ORDER.desc); // deprecated in Solr 4.2
        }
        
        // add site facets
        fqs.addAll(getFacetsFilterQueries());
        if (fqs.size() > 0) {
            params.setFilterQueries(fqs.toArray(new String[fqs.size()]));
        }
        
        // set facet query attributes
        if (getFacets && this.facetfields.size() > 0) {
            params.setFacet(true);
            params.setFacetMinCount(1);
            params.setFacetLimit(this.standardFacetsMaxCount);
            params.setFacetSort(FacetParams.FACET_SORT_COUNT);
            params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_enum); // fight the fieldcache
            for (String field: this.facetfields) params.addFacetField("{!ex=" + field + "}" + field); // params.addFacetField("{!ex=" + field + "}" + field);
            if (this.facetfields.contains(CollectionSchema.dates_in_content_dts.name())) {
            	params.setParam(FacetParams.FACET_RANGE, CollectionSchema.dates_in_content_dts.name());
                String start = new Date(System.currentTimeMillis() - 1000L * 60L * 60L * 24L * 3).toInstant().toString();
                String end = new Date(System.currentTimeMillis() + 1000L * 60L * 60L * 24L * 3).toInstant().toString();
                params.setParam("f." + CollectionSchema.dates_in_content_dts.getSolrFieldName() + ".facet.range.start", start);
                params.setParam("f." + CollectionSchema.dates_in_content_dts.getSolrFieldName() + ".facet.range.end", end);
                params.setParam("f." + CollectionSchema.dates_in_content_dts.getSolrFieldName() + ".facet.range.gap", "+1DAY");
                params.setParam("f." + CollectionSchema.dates_in_content_dts.getSolrFieldName() + ".facet.sort", "index");
                params.setParam("f." + CollectionSchema.dates_in_content_dts.getSolrFieldName() + ".facet.limit", Integer.toString(this.dateFacetMaxCount)); // the year constraint should cause that limitation already
            }
            //for (String k: params.getParameterNames()) {ArrayList<String> al = new ArrayList<>(); for (String s: params.getParams(k)) al.add(s); System.out.println("Parameter: " + k + "=" + al.toString());}
            //http://localhost:8090/solr/collection1/select?q=*:*&rows=0&facet=true&facet.field=dates_in_content_dts&f.dates_in_content_dts.facet.limit=730&f.dates_in_content_dts.facet.sort=index
        } else {
            params.setFacet(false);
        }
        params.setFields("*", "score"); // we need the score for post-ranking
        return params;
    }
    
    long year = 1000L * 60L * 60L * 24L * 365L;
    
    private List<String> getFacetsFilterQueries() {
        
        ArrayList<String> fqs = new ArrayList<>();
        
        // add site facets
        if (this.modifier.sitehash == null && this.modifier.sitehost == null) {
            if (this.siteexcludes != null) {
                for (String ex: this.siteexcludes) {
                    fqs.add("-" + CollectionSchema.host_id_s.getSolrFieldName() + ':' + ex);
                }
            }
        } else {
            if (this.modifier.sitehost != null) {
                // consider to search for hosts with 'www'-prefix, if not already part of the host name
                if (this.modifier.sitehost.startsWith("www.")) {
                    fqs.add(CollectionSchema.host_s.getSolrFieldName() + ":\"" + this.modifier.sitehost.substring(4) + "\" OR " + CollectionSchema.host_s.getSolrFieldName() + ":\"" + this.modifier.sitehost + "\"");
                } else {
                    fqs.add(CollectionSchema.host_s.getSolrFieldName() + ":\"" + this.modifier.sitehost + "\" OR " + CollectionSchema.host_s.getSolrFieldName() + ":\"www." + this.modifier.sitehost + "\"");
                }
            } else
                fqs.add(CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + this.modifier.sitehash + '\"');
        }

        // add vocabulary facets
        if (this.metatags != null) {
            for (Tagging.Metatag tag : this.metatags) {
                fqs.add(CollectionSchema.VOCABULARY_PREFIX + tag.getVocabularyName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX + ":\"" + tag.getObject() + '\"');
            }
        }

        // add language facet
        if (this.modifier.language != null && this.modifier.language.length() > 0 && this.solrSchema.contains((CollectionSchema.language_s))) {
            fqs.add(CollectionSchema.language_s.getSolrFieldName() + ":\"" + this.modifier.language + '\"');
        }

        // add author facets (check for contains(author) as author_sxt is omitted copyfield)
        if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(CollectionSchema.author)) {
            fqs.add(CollectionSchema.author_sxt.getSolrFieldName() + ":\"" + this.modifier.author + '\"');
        }

        // add keyword filter
        if (this.modifier.keyword != null && this.modifier.keyword.length() > 0 && this.solrSchema.contains(CollectionSchema.keywords)) {
            fqs.add(CollectionSchema.keywords.getSolrFieldName() + ":\"" + this.modifier.keyword + '\"');
        }

        // add collection facets
        if (this.modifier.collection != null && this.modifier.collection.length() > 0 && this.solrSchema.contains(CollectionSchema.collection_sxt)) {
            fqs.add(QueryModifier.parseCollectionExpression(this.modifier.collection));
        }
        
        if (this.solrSchema.contains(CollectionSchema.dates_in_content_dts)) {
            if (this.modifier.on != null && this.modifier.on.length() > 0) {
                fqs.add(QueryModifier.parseOnExpression(this.modifier.on, this.timezoneOffset));
            }
            
            if (this.modifier.from != null && this.modifier.from.length() > 0 && (this.modifier.to == null || this.modifier.to.equals("*"))) {
                fqs.add(QueryModifier.parseFromToExpression(this.modifier.from, null, this.timezoneOffset));
            }
            
            if ((this.modifier.from == null || this.modifier.from.equals("*")) && this.modifier.to != null && this.modifier.to.length() > 0) {
                fqs.add(QueryModifier.parseFromToExpression(null, this.modifier.to, this.timezoneOffset));
            }
            
            if (this.modifier.from != null && this.modifier.from.length() > 0 && this.modifier.to != null && this.modifier.to.length() > 0) {
                fqs.add(QueryModifier.parseFromToExpression(this.modifier.from, this.modifier.to, this.timezoneOffset));
            }
        }
        
        if (this.modifier.protocol != null) {
            fqs.add("{!tag=" + CollectionSchema.url_protocol_s.getSolrFieldName() + "}" + CollectionSchema.url_protocol_s.getSolrFieldName() + ':' + this.modifier.protocol);
        }
        
        if (this.tld != null) {
        	/* Use the host_s field which is mandatory, rather than the optional host_dnc_s field */
            fqs.add(CollectionSchema.host_s.getSolrFieldName() + ":*." + this.tld);
        }
        
        if (this.modifier.filetype != null) {
            fqs.add(CollectionSchema.url_file_ext_s.getSolrFieldName() + ":\"" + this.modifier.filetype + '\"');
        }
        
        if (this.inlink != null) {
            fqs.add(CollectionSchema.outboundlinks_urlstub_sxt.getSolrFieldName() + ":\"" + this.inlink + '\"');
        }
        
        if (!this.urlMask_isCatchall && this.urlMaskPattern != null) {
            // add a filter query on urls only if user custom and not generated from other modifiers
            fqs.add(CollectionSchema.sku.getSolrFieldName() + ":/" + this.urlMaskString + "/");
        }
        
        if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) {
            // localtion search, no special ranking
            // try http://localhost:8090/solr/select?q=*:*&fq={!bbox sfield=coordinate_p pt=50.17,8.65 d=1}

            //params.setQuery("!bbox " + q.toString());
            //params.set("sfield", YaCySchema.coordinate_p.name());
            //params.set("pt", Double.toString(this.lat) + "," + Double.toString(this.lon));
            //params.set("d", GeoLocation.degreeToKm(this.radius));
            fqs.add("{!bbox sfield=" + CollectionSchema.coordinate_p.getSolrFieldName() + " pt=" + Double.toString(this.lat) + "," + Double.toString(this.lon) + " d=" + GeoLocation.degreeToKm(this.radius) + "}");
            //params.setRows(Integer.MAX_VALUE);
        }
        
        return fqs;
    }
    
    public QueryGoal getQueryGoal() {
        return this.queryGoal;
    }

    public final Map<AnchorURL, String> separateMatches(final Map<AnchorURL, String> links) {
        final Map<AnchorURL, String> matcher = new HashMap<>();
        final Iterator <Map.Entry<AnchorURL, String>> i = links.entrySet().iterator();
        Map.Entry<AnchorURL, String> entry;
        AnchorURL url;
        String anchorText;
        while (i.hasNext()) {
            entry = i.next();
            url = entry.getKey();
            anchorText = entry.getValue();
            if (matchesText(anchorText)) {
                matcher.put(url, anchorText);
                i.remove();
            }
        }
        return matcher;
    }

    private volatile String idCacheAnon = null, idCache = null;
    final static private char asterisk = '*';
    public String id(final boolean anonymized) {
        if (anonymized) {
            if (this.idCacheAnon != null) return this.idCacheAnon;
        } else {
            if (this.idCache != null) return this.idCache;
        }
        synchronized (this) {
            // do a Double-Checked Locking
            if (anonymized) {
                if (this.idCacheAnon != null) return this.idCacheAnon;
            } else {
                if (this.idCache != null) return this.idCache;
            }
            // generate a string that identifies a search so results can be re-used in a cache
            final StringBuilder context = new StringBuilder(180);
            if (anonymized) {
                context.append(anonymizedQueryHashes(this.queryGoal.getIncludeHashes()));
                context.append('-');
                context.append(anonymizedQueryHashes(this.queryGoal.getExcludeHashes()));
            } else {
                context.append(hashSet2hashString(this.queryGoal.getIncludeHashes()));
                context.append('-');
                context.append(hashSet2hashString(this.queryGoal.getExcludeHashes()));
            }
            //context.append(asterisk);
            //context.append(this.domType);
            context.append(asterisk);
            context.append(this.contentdom).append(asterisk);
            context.append(this.strictContentDom).append(asterisk);
            context.append(this.zonecode).append(asterisk);
            context.append(ASCII.String(Word.word2hash(this.ranking.toExternalString()))).append(asterisk);
            context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk);
            context.append(Base64Order.enhancedCoder.encodeString(this.urlMaskString)).append(asterisk);
            context.append(this.modifier.sitehash).append(asterisk);
            context.append(this.modifier.author).append(asterisk);
            context.append(this.modifier.protocol).append(asterisk);
            context.append(this.modifier.filetype).append(asterisk);
            context.append(this.modifier.collection).append(asterisk);
            context.append(this.modifier.toString()).append(asterisk);
            context.append(this.siteexcludes).append(asterisk);
            context.append(this.targetlang).append(asterisk);
            context.append(this.domType).append(asterisk);
            context.append(this.constraint).append(asterisk);
            context.append(this.maxDistance).append(asterisk);
            context.append(this.tld).append(asterisk);
            context.append(this.inlink).append(asterisk);
            context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk);
            context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name());
            String result = context.toString();
            if (anonymized) {
                this.idCacheAnon = result;
            } else {
                this.idCache = result;
            }
            return result;
        }
    }

    /**
	 * Build a search query URL from the given parameters.
	 * 
	 * @param ext extension of the servlet to request (e.g. "html", "rss", "json"...)
	 * @param page index of the wanted page (first page is zero)
	 * @param theQuery holds the main query parameters. Must not be null.
	 * @param newModifier a eventual new modifier to append to the eventual ones already defined in theQuery QueryParams. Can be null.
	 * @param newModifierReplacesOld when newModifier is not null, it is appended in addition
	 *            to existing modifier(s) - if it is empty it overwrites (clears) existing
	 *            modifier(s)
	 * @param authenticatedFeatures
	 *            when true, access to authentication protected search features is
	 *            wanted
	 * @return a StringBuilder instance with the URL to the new search result page
	 */
	public static StringBuilder navurl(final RequestHeader.FileType ext, final int page, final QueryParams theQuery,
			final String newModifier, boolean newModifierReplacesOld, final boolean authenticatedFeatures) {

		final StringBuilder sb = navurlBase(ext, theQuery, newModifier, newModifierReplacesOld,
				authenticatedFeatures);

        sb.append("&startRecord=");
        sb.append(page * theQuery.itemsPerPage());

        return sb;
    }
	
    /**
	 * Build a search query URL from the given parameters, removing only the given single query modifier.
	 * 
	 * @param ext extension of the servlet to request (e.g. "html", "rss", "json"...)
	 * @param page index of the wanted page (first page is zero)
	 * @param theQuery holds the main query parameters. Must not be null.
	 * @param modifierToRemove the query modifier to remove (e.g. "keyword:word", "/language/en", "site:example.org"...)
	 * @param authenticatedFeatures
	 *            when true, access to authentication protected search features is
	 *            wanted
	 * @return the URL to the new search result page
	 */
	public static String navUrlWithSingleModifierRemoved(final RequestHeader.FileType ext, final int page, final QueryParams theQuery,
			final String modifierToRemove, final boolean authenticatedFeatures) {

        final StringBuilder sb = new StringBuilder(120);
        sb.append("yacysearch.");
        sb.append(ext.name().toLowerCase(Locale.ROOT));
        sb.append("?query=");

        sb.append(theQuery.getQueryGoal().getQueryString(true));
        
        if (!theQuery.modifier.isEmpty()) {
        	String modifierString = theQuery.modifier.toString();
        	if(StringUtils.isNotBlank(modifierToRemove)) {
        		if(modifierString.startsWith(modifierToRemove)) {
        			modifierString = modifierString.substring(modifierToRemove.length());
        		} else {
        			modifierString = modifierString.replace(" " + modifierToRemove, "");
        		}
        	}
        	if(StringUtils.isNotBlank(modifierString)) {
        		sb.append("+" + modifierString.trim());
        	}
        }
        
        appendNavUrlQueryParams(sb, theQuery, authenticatedFeatures);

        return sb.toString();
    }
	
    /**
	 * Build a search query URL with a new search query string, but keeping any already defined eventual modifiers.
	 * 
	 * @param ext extension of the servlet to request (e.g. "html", "rss", "json"...)
	 * @param page index of the wanted page (first page is zero)
	 * @param theQuery holds the main query parameters. Must not be null.
	 * @param authenticatedFeatures
	 *            when true, access to authentication protected search features is
	 *            wanted
	 * @return the URL to the new search result page
	 */
	public static String navUrlWithNewQueryString(final RequestHeader.FileType ext, final int page, final QueryParams theQuery,
			final String newQueryString, final boolean authenticatedFeatures) {

        final StringBuilder sb = new StringBuilder(120);
        sb.append("yacysearch.");
        sb.append(ext.name().toLowerCase(Locale.ROOT));
        sb.append("?query=");

        sb.append(new QueryGoal(newQueryString).getQueryString(true));
        
        if (!theQuery.modifier.isEmpty()) {
        	sb.append("+" + theQuery.modifier.toString());
        }
        
        appendNavUrlQueryParams(sb, theQuery, authenticatedFeatures);

        return sb.toString();
    }

     /**
	 * construct navigator url
	 *
	 * @param ext
	 *            extension of servlet (e.g. html, rss)
	 * @param theQuery
	 *            search query
	 * @param newModifier optional new modifier. - if null existing modifier(s) of theQuery are
	 *            appended - if not null this new modifier is appended in addition
	 *            to eventually existing modifier(s) - if isEmpty overwrites (clears) any eventual existing
	 *            modifier(s)
	 * @param newModifierReplacesOld considered only when newModifier is not null and not empty. When true, any existing modifiers with the same name are replaced with the new one.
	 * @param authenticatedFeatures
	 *            when true, access to authentication protected search features is
	 *            wanted
	 * @return url to new search result page
	 */
	public static StringBuilder navurlBase(final RequestHeader.FileType ext, final QueryParams theQuery,
			final String newModifier, final boolean newModifierReplacesOld, final boolean authenticatedFeatures) {

        final StringBuilder sb = new StringBuilder(120);
        sb.append("yacysearch.");
        sb.append(ext.name().toLowerCase(Locale.ROOT));
        sb.append("?query=");

        sb.append(theQuery.getQueryGoal().getQueryString(true));
        
		if (newModifier == null) {
            if (!theQuery.modifier.isEmpty()) {
            	sb.append("+" + theQuery.modifier.toString());
            }
        } else {
            if (!newModifier.isEmpty()) {
                if (!theQuery.modifier.isEmpty()) {
                	sb.append("+" + theQuery.modifier.toString());
                }
                if (newModifierReplacesOld) {
                    removeOldModifiersFromNavUrl(sb, newModifier);
                }
                try {
                	sb.append("+" + URLEncoder.encode(newModifier, StandardCharsets.UTF_8.name()));
                } catch (final UnsupportedEncodingException e) {
                	sb.append("+" + newModifier);
                }
            }
        }
		
        appendNavUrlQueryParams(sb, theQuery, authenticatedFeatures);

        return sb;
    }

    /**
	 * Append search query parameters to the URL builder already filled with the beginning of the URL.
	 * 
	 * @param sb the URL string builder to fill. Must not be null.
	 * @param theQuery holds the main query parameters. Must not be null.
	 * @param authenticatedFeatures
	 *            when true, access to authentication protected search features is
	 *            wanted
	 */
	protected static void appendNavUrlQueryParams(final StringBuilder sb, final QueryParams theQuery,
			final boolean authenticatedFeatures) {
        sb.append("&maximumRecords=");
        sb.append(theQuery.itemsPerPage());

        sb.append("&resource=");
        sb.append((theQuery.isLocal()) ? "local" : "global");

        sb.append("&verify=");
        sb.append(theQuery.snippetCacheStrategy == null ? "false" : theQuery.snippetCacheStrategy.toName());

        sb.append("&prefermaskfilter=");
        sb.append(theQuery.prefer);

        sb.append("&cat=href");

        sb.append("&constraint=");
        sb.append((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64());

        sb.append("&contentdom=");
        sb.append(theQuery.contentdom.toString());
        
        sb.append("&strictContentDom=");
        sb.append(String.valueOf(theQuery.isStrictContentDom()));

        sb.append("&former=");
        sb.append(theQuery.getQueryGoal().getQueryString(true));

        if(authenticatedFeatures) {
        	sb.append("&auth");
        }
	}

	/**
	 * Remove from the URL builder any query modifiers with the same name that the new modifier 
	 * @param sb
	 *            a StringBuilder holding the search URL navigation being built.
	 *            Must not be null and contain the URL base and the query string
	 *            with its eventual modifiers
	 * @param newModifier
	 *            a new modifier of form key:value. Must not be null.
	 */
	protected static void removeOldModifiersFromNavUrl(final StringBuilder sb, final String newModifier) {
		int nmpi = newModifier.indexOf(":");
		if (nmpi > 0) {
		    final String newModifierKey = newModifier.substring(0, nmpi) + ":";
		    int sameModifierIndex = sb.indexOf(newModifierKey);
		    while (sameModifierIndex > 0) {
		    	final int spaceModifierIndex = sb.indexOf(" ", sameModifierIndex);
		    	if(spaceModifierIndex > sameModifierIndex) {
		    		/* There are other modifiers after the matching one : we only remove the old matching modifier */
		    		sb.delete(sameModifierIndex, spaceModifierIndex + 1);
		    	} else {
		    		/* The matching modifier is the last : we truncate the builder */
		        	sb.setLength(sameModifierIndex);	
		    	}
		    	sameModifierIndex = sb.indexOf(newModifierKey);
		    }
		    if (sb.charAt(sb.length() - 1) == '+') {
		    	sb.setLength(sb.length() - 1);
		    }
		    if (sb.charAt(sb.length() - 1) == ' ') {
		    	sb.setLength(sb.length() - 1);
		    }
		}
	}

}