diff --git a/defaults/yacy.init b/defaults/yacy.init index 1d8c67449..a13b1a055 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -843,6 +843,17 @@ search.audio = false search.video = false search.app = false +# Strict content domain filtering : when false, results can be extended to documents including links to documents +# of contentdom type, whithout being themselves of that type. +# Examples : +# - contentdom search param == image, strictContentDom == true +# - jpeg image : acceptable result +# - html page embedding images : rejected +# - contentdom search param == image, strictContentDom == false +# - jpeg image : acceptable result +# - html page embedding images : acceptable result +search.strictContentDom = false + # number of search results per page displayed by default search.items = 10 diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 46bfef3a4..ed810d475 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -116,6 +116,7 @@ public final class search { final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE); final String prefer = post.get("prefer", ""); final String contentdom = post.get("contentdom", "all"); + final boolean strictContentDom = post.getBoolean("strictContentDom"); final String filter = post.get("filter", ".*"); // a filter on the url final int timezoneOffset = post.getInt("timezoneOffset", 0); QueryModifier modifier = new QueryModifier(timezoneOffset); @@ -255,6 +256,7 @@ public final class search { 0.0d, new String[0] ); + theQuery.setStrictContentDom(strictContentDom); Network.log.info("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links"); final long timer = System.currentTimeMillis(); @@ -319,6 +321,7 @@ public final class search { 0.0d, new String[0] ); + theQuery.setStrictContentDom(strictContentDom); Network.log.info("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links"); EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()), "")); if (sb.getConfigBool(SwitchboardConstants.DECORATION_AUDIO, false)) Audio.Soundclip.remotesearch.play(-10.0f); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 37415d878..df72f3fb7 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -290,6 +290,11 @@ public class yacysearch { // find search domain final Classification.ContentDomain contentdom = post == null || !post.containsKey("contentdom") ? ContentDomain.ALL : ContentDomain.contentdomParser(post.get("contentdom", "all")); + + // Strict/extended content domain constraint : configured setting may be overriden by request param + final boolean strictContentDom = !Boolean.FALSE.toString().equalsIgnoreCase(post.get("strictContentDom", + sb.getConfig(SwitchboardConstants.SEARCH_STRICT_CONTENT_DOM, + String.valueOf(SwitchboardConstants.SEARCH_STRICT_CONTENT_DOM_DEFAULT)))); // check the search tracker TreeSet trackerHandles = sb.localSearchTracker.get(client); @@ -692,6 +697,7 @@ public class yacysearch { header.get(HeaderFramework.USER_AGENT, ""), lat, lon, rad, sb.getConfigArray("search.navigation", "")); + theQuery.setStrictContentDom(strictContentDom); theQuery.setStandardFacetsMaxCount(sb.getConfigInt(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT, QueryParams.FACETS_STANDARD_MAXCOUNT_DEFAULT)); theQuery.setDateFacetMaxCount(sb.getConfigInt(SwitchboardConstants.SEARCH_NAVIGATION_DATES_MAXCOUNT, diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index d88d14751..62922428b 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -605,7 +605,7 @@ public class yacysearchitem { final SearchEvent theSearch, final String target_special_pattern, long timeout, boolean fullViewingRights, final boolean noreferrer) { prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content try { - SearchEvent.ImageResult image = theSearch.oneImageResult(item, timeout); + SearchEvent.ImageResult image = theSearch.oneImageResult(item, timeout, theSearch.query.isStrictContentDom()); final String imageUrlstring = image.imageUrl.toNormalform(true); final String imageUrlExt = MultiProtocolURL.getFileExtension(image.imageUrl.getFileName()); final String target = sb.getConfig(imageUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self"); diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 92bbd9e96..4ec2677e3 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -488,6 +488,7 @@ public final class Protocol { final String excludehashes, final String language, final ContentDomain contentdom, + final boolean strictContentDom, final int count, final long time, final int maxDistance, @@ -533,6 +534,7 @@ public final class Protocol { "", language, contentdom, + strictContentDom, count, time, maxDistance, @@ -600,6 +602,7 @@ public final class Protocol { final String wordhashes, final String urlhashes, final ContentDomain contentdom, + final boolean strictContentDom, final int count, final long time, final int maxDistance, @@ -624,6 +627,7 @@ public final class Protocol { urlhashes, "", contentdom, + strictContentDom, count, time, maxDistance, @@ -889,6 +893,7 @@ public final class Protocol { final String urlhashes, final String language, final ContentDomain contentdom, + final boolean strictContentDom, final int count, final long time, final int maxDistance, @@ -941,6 +946,9 @@ public final class Protocol { //parts.put("sitehost", UTF8.StringBody(event.query.modifier.sitehost)); parts.put("author", UTF8.StringBody(event.query.modifier.author)); parts.put("contentdom", UTF8.StringBody(contentdom == null ? ContentDomain.ALL.toString() : contentdom.toString())); + if(strictContentDom) { + parts.put("strictContentDom", UTF8.StringBody("true")); + } parts.put("maxdist", UTF8.StringBody(Integer.toString(maxDistance))); parts.put("profile", UTF8.StringBody(crypt.simpleEncode(event.query.ranking.toExternalString()))); parts.put("constraint", UTF8.StringBody((event.query.constraint == null) ? "" : event.query.constraint.exportB64())); diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 830d8ab30..36dd69150 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -61,6 +61,7 @@ public class RemoteSearch extends Thread { final private SearchEvent event; final private String wordhashes, excludehashes; final private ContentDomain contentdom; + final private boolean strictContentDom; final private int partitions; final private SecondarySearchSuperviser secondarySearchSuperviser; final private Blacklist blacklist; @@ -78,6 +79,7 @@ public class RemoteSearch extends Thread { final String excludehashes, final String language, final ContentDomain contentdom, + final boolean strictContentDom, final int count, final long time, final int maxDistance, @@ -91,6 +93,7 @@ public class RemoteSearch extends Thread { this.excludehashes = excludehashes; this.language = language; this.contentdom = contentdom; + this.strictContentDom = strictContentDom; this.partitions = partitions; this.secondarySearchSuperviser = secondarySearchSuperviser; this.blacklist = blacklist; @@ -114,6 +117,7 @@ public class RemoteSearch extends Thread { this.excludehashes, this.language, this.contentdom, + this.strictContentDom, this.count, this.time, this.maxDistance, @@ -264,7 +268,8 @@ public class RemoteSearch extends Thread { // start solr searches final int targets = dhtPeers.size() + robinsonPeers.size(); if (!sb.getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_OFF, false)) { - final SolrQuery solrQuery = event.query.solrQuery(event.getQuery().contentdom, useFacets, event.excludeintext_image); + final SolrQuery solrQuery = event.query.solrQuery(event.getQuery().contentdom, + event.query.isStrictContentDom(), useFacets, event.excludeintext_image); for (Seed s: robinsonPeers) { if (MemoryControl.shortStatus() || Memory.load() > sb.getConfigFloat(SwitchboardConstants.REMOTESEARCH_MAXLOAD_SOLR, @@ -292,6 +297,7 @@ public class RemoteSearch extends Thread { QueryParams.hashSet2hashString(event.query.getQueryGoal().getExcludeHashes()), event.query.targetlang == null ? "" : event.query.targetlang, event.query.contentdom == null ? ContentDomain.ALL : event.query.contentdom, + event.query.isStrictContentDom(), count, time, event.query.maxDistance, @@ -336,6 +342,7 @@ public class RemoteSearch extends Thread { QueryParams.hashSet2hashString(wordhashes), urlhashes, ContentDomain.ALL, + false, 20, time, 999, diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index 71b91cab9..38e86e435 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -560,6 +560,16 @@ public final class SwitchboardConstants { public static final String SEARCH_VERIFY = "search.verify"; public static final String SEARCH_VERIFY_DELETE = "search.verify.delete"; + /** + * Key of the setting controlling whether content domain filtering is strict : + * when false, results can be extended to documents including links to documents + * of contentdom type, whithout being themselves of that type. + */ + public static final String SEARCH_STRICT_CONTENT_DOM = "search.strictContentDom"; + + /** Default setting value controlling whether content domain filtering is strict. */ + public static final boolean SEARCH_STRICT_CONTENT_DOM_DEFAULT = false; + /** Key of the setting controlling whether search results resorting by browser JavaScript is enabled */ public static final String SEARCH_JS_RESORT = "search.jsresort"; diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java index b1ec4a5ec..9c94540fa 100644 --- a/source/net/yacy/search/query/QueryGoal.java +++ b/source/net/yacy/search/query/QueryGoal.java @@ -368,41 +368,49 @@ public class QueryGoal { /** * Generate a Solr filter query to receive valid image results. * - * This filters error-urls out and includes urls with mime image/* as well - * as urls with links to images. + * This filters error-urls out and includes urls with mime image/*, as well + * as urls with links to images when strict is false. * We use the mime (image/*) only to find images as the parser assigned the * best mime to index documents. This applies also to parsed file systems. * This ensures that no text urls with image-fileextension is returned * (as some large internet sites like to use such urls) * + * @param strict when true, do not include non-image urls with links to images * @return Solr filter query for image urls */ - public List collectionImageFilterQuery() { + public List collectionImageFilterQuery(final boolean strict) { final ArrayList fqs = new ArrayList<>(); // add filter to prevent that results come from failed urls fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK); - fqs.add( - CollectionSchema.content_type.getSolrFieldName() + ":(image/*) OR " + - CollectionSchema.images_urlstub_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); + StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*)"); + if (!strict) { + filter.append(" OR ").append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()) + .append(AbstractSolrConnector.CATCHALL_DTERM); + } + fqs.add(filter.toString()); return fqs; } /** - * Generate Solr filter queries to receive valid video content results. + * Generate Solr filter queries to receive valid audio content results. * - * This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix video/* as well - * docuemnts with links to video content. + * This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix audio/* as well + * documents with links to audio content when strict is false. * - * @return Solr filter queries for video content URLs + * @param strict when true, do not include non-audio urls with links to audio + * @return Solr filter queries for audio content URLs */ - public List collectionAudioFilterQuery() { + public List collectionAudioFilterQuery(final boolean strict) { final ArrayList fqs = new ArrayList<>(); // add filter to prevent that results come from failed urls fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK); - fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(audio/*) OR " - + CollectionSchema.audiolinkscount_i.getSolrFieldName() + ":[1 TO *]"); + StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(audio/*)"); + if (!strict) { + filter.append(" OR ").append(CollectionSchema.audiolinkscount_i.getSolrFieldName()).append(":[1 TO *]"); + } + fqs.add(filter.toString()); return fqs; } @@ -410,17 +418,21 @@ public class QueryGoal { * Generate Solr filter queries to receive valid video content results. * * This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix video/* as well - * docuemnts with links to video content. + * documents with links to video content when strict is false. * + * @param strict when true, do not include non-video urls with links to video * @return Solr filter queries for video content URLs */ - public List collectionVideoFilterQuery() { + public List collectionVideoFilterQuery(final boolean strict) { final ArrayList fqs = new ArrayList<>(); // add filter to prevent that results come from failed urls fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK); - fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(video/*) OR " - + CollectionSchema.videolinkscount_i.getSolrFieldName() + ":[1 TO *]"); + StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(video/*)"); + if (!strict) { + filter.append(" OR ").append(CollectionSchema.videolinkscount_i.getSolrFieldName()).append(":[1 TO *]"); + } + fqs.add(filter.toString()); return fqs; } @@ -428,17 +440,22 @@ public class QueryGoal { * Generate Solr filter queries to receive valid application specific content results. * * This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix application/* as well - * docuemnts with links to application specific content. + * docuemnts with links to application specific content when strict is false. * + * @param strict when true, do not include non-video urls with links to video * @return Solr filter queries for application specific content URLs */ - public List collectionApplicationFilterQuery() { + public List collectionApplicationFilterQuery(final boolean strict) { final ArrayList fqs = new ArrayList<>(); // add filter to prevent that results come from failed urls fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK); - fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(application/*) OR " - + CollectionSchema.applinkscount_i.getSolrFieldName() + ":[1 TO *]"); + StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()) + .append(":(application/*)"); + if (!strict) { + filter.append(" OR ").append(CollectionSchema.applinkscount_i.getSolrFieldName()).append(":[1 TO *]"); + } + fqs.add(filter.toString()); return fqs; } diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 6a6572096..c757e2bff 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -132,7 +132,31 @@ public final class QueryParams { /** true when the urlMasString is just a catch all pattern such as ".*" */ boolean urlMask_isCatchall; + + /** Content-Type classification of expected results */ public final Classification.ContentDomain contentdom; + + /** + *

When false, results can be extended to documents including links to documents + * of {@link #contentdom} type, whithout being themselves of that type.

+ * Examples : + *
    + *
  • contentdom == IMAGE, strictContentDom == true + *
      + *
    • jpeg image : acceptable result
    • + *
    • html page embedding images : rejected
    • + *
    + *
  • + *
  • contentdom == IMAGE, strictContentDom == false + *
      + *
    • jpeg image : acceptable result
    • + *
    • html page embedding images : acceptable result
    • + *
    + *
  • + *
+ */ + private boolean strictContentDom = false; + public final String targetlang; protected final Collection metatags; public final Searchdom domType; @@ -380,6 +404,20 @@ public final class QueryParams { public void setDateFacetMaxCount(final int dateFacetMaxCount) { this.dateFacetMaxCount = dateFacetMaxCount; } + + /** + * @return false when results can be extended to documents including links to documents ot contentdom type. + */ + public boolean isStrictContentDom() { + return this.strictContentDom; + } + + /** + * @param strictContentDom when false, results can be extended to documents including links to documents ot contentdom type. + */ + public void setStrictContentDom(final boolean strictContentDom) { + this.strictContentDom = strictContentDom; + } public static HandleSet hashes2Set(final String query) { final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0); @@ -513,20 +551,20 @@ public final class QueryParams { return SetTools.anymatchByTest(keywords, textwords); } - public SolrQuery solrQuery(final ContentDomain cd, final boolean getFacets, final boolean excludeintext_image) { + public SolrQuery solrQuery(final ContentDomain cd, final boolean strictContentDom, final boolean getFacets, final boolean excludeintext_image) { if (cd == ContentDomain.IMAGE) { - return solrImageQuery(getFacets); + return solrImageQuery(getFacets, strictContentDom); } final List filterQueries; switch (cd) { case AUDIO: - filterQueries = this.queryGoal.collectionAudioFilterQuery(); + filterQueries = this.queryGoal.collectionAudioFilterQuery(strictContentDom); break; case VIDEO: - filterQueries = this.queryGoal.collectionVideoFilterQuery(); + filterQueries = this.queryGoal.collectionVideoFilterQuery(strictContentDom); break; case APP: - filterQueries = this.queryGoal.collectionApplicationFilterQuery(); + filterQueries = this.queryGoal.collectionApplicationFilterQuery(strictContentDom); break; default: filterQueries = this.queryGoal.collectionTextFilterQuery(excludeintext_image); @@ -579,7 +617,7 @@ public final class QueryParams { return params; } - private SolrQuery solrImageQuery(boolean getFacets) { + private SolrQuery solrImageQuery(final boolean getFacets, final boolean strictContentDom) { if (this.cachedQuery != null) { this.cachedQuery.setStart(this.offset); if (!getFacets) this.cachedQuery.setFacet(false); @@ -587,16 +625,18 @@ public final class QueryParams { } // construct query - final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionImageFilterQuery()); + final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionImageFilterQuery(strictContentDom)); params.setQuery(this.queryGoal.collectionImageQuery(this.modifier).toString()); - // set boosts - StringBuilder bq = new StringBuilder(); - bq.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"jpg\""); - bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tif\""); - bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tiff\""); - bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"png\""); - params.setParam(DisMaxParams.BQ, bq.toString()); + if(!strictContentDom) { + // set boosts + StringBuilder bq = new StringBuilder(); + bq.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"jpg\""); + bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tif\""); + bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tiff\""); + bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"png\""); + params.setParam(DisMaxParams.BQ, bq.toString()); + } // prepare result ConcurrentLog.info("Protocol", "SOLR QUERY: " + params.toString()); @@ -810,6 +850,7 @@ public final class QueryParams { //context.append(this.domType); context.append(asterisk); context.append(this.contentdom).append(asterisk); + context.append(this.strictContentDom).append(asterisk); context.append(this.zonecode).append(asterisk); context.append(ASCII.String(Word.word2hash(this.ranking.toExternalString()))).append(asterisk); context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk); diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 7fd642956..a46b5b290 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -428,7 +428,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener { if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) { final boolean useSolrFacets = true; this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, - this.query.solrQuery(this.query.contentdom, useSolrFacets, this.excludeintext_image), this.query.offset, + this.query.solrQuery(this.query.contentdom, this.query.isStrictContentDom(), useSolrFacets, this.excludeintext_image), this.query.offset, this.query.itemsPerPage, null /* this peer */, 0, Switchboard.urlBlacklist, useSolrFacets, true); } this.localsolroffset = this.query.offset + this.query.itemsPerPage; @@ -734,13 +734,27 @@ public final class SearchEvent implements ScoreMapUpdatesListener { } // check document domain - if (this.query.contentdom.getCode() > 0 && - ((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Tokenizer.flag_cat_hasaudio))) || - (this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Tokenizer.flag_cat_hasvideo))) || - (this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Tokenizer.flag_cat_hasimage))) || - (this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp))))) { - if (log.isFine()) log.fine("dropped RWI: contentdom fail"); - continue pollloop; + if (this.query.contentdom.getCode() > 0) { + boolean domainMatch = true; + if(this.query.isStrictContentDom()) { + if((this.query.contentdom == ContentDomain.AUDIO && iEntry.getType() != Response.DT_AUDIO) || + (this.query.contentdom == ContentDomain.VIDEO && iEntry.getType() != Response.DT_MOVIE) || + (this.query.contentdom == ContentDomain.IMAGE && iEntry.getType() != Response.DT_IMAGE) || + (this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp)))) { + domainMatch = false; + } + } else if((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Tokenizer.flag_cat_hasaudio))) || + (this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Tokenizer.flag_cat_hasvideo))) || + (this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Tokenizer.flag_cat_hasimage))) || + (this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp)))) { + domainMatch = false; + } + if(!domainMatch) { + if (log.isFine()) { + log.fine("dropped RWI: contentdom fail"); + } + continue pollloop; + } } // check language @@ -1003,14 +1017,25 @@ public final class SearchEvent implements ScoreMapUpdatesListener { } // check document domain - if (this.query.contentdom.getCode() > 0 && - ((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Tokenizer.flag_cat_hasaudio))) || + if (this.query.contentdom.getCode() > 0) { + boolean domainMatch = true; + if(this.query.isStrictContentDom()) { + if(this.query.contentdom != iEntry.getContentDomain()) { + domainMatch = false; + } + } else if((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Tokenizer.flag_cat_hasaudio))) || (this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Tokenizer.flag_cat_hasvideo))) || (this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Tokenizer.flag_cat_hasimage))) || - (this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp))))) { - if (log.isFine()) log.fine("dropped Node: content domain does not match"); - updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators); - continue pollloop; + (this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp)))) { + domainMatch = false; + } + if(!domainMatch) { + if (log.isFine()) { + log.fine("dropped Node: content domain does not match"); + } + updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators); + continue pollloop; + } } // filter out media links in text search, if wanted @@ -2113,7 +2138,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener { final boolean useSolrFacets = (this.localsolrsearch == null); final boolean incrementNavigators = false; this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, - this.query.solrQuery(this.query.contentdom, useSolrFacets, this.excludeintext_image), + this.query.solrQuery(this.query.contentdom, this.query.isStrictContentDom(), useSolrFacets, this.excludeintext_image), this.localsolroffset, nextitems, null /* this peer */, 0, Switchboard.urlBlacklist, useSolrFacets, incrementNavigators); } this.localsolroffset += nextitems; @@ -2204,7 +2229,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener { return null; } - public ImageResult oneImageResult(final int item, final long timeout) throws MalformedURLException { + public ImageResult oneImageResult(final int item, final long timeout, final boolean strictContentDom) throws MalformedURLException { if (item < imageViewed.size()) return nthImage(item); if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare URIMetadataNode doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare @@ -2233,7 +2258,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener { if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), doc.mime(), doc.title(), w, h, 0)); } } - } else { + } else if(!strictContentDom) { Collection altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName()); Collection imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName()); if (imgO != null && imgO.size() > 0 && imgO instanceof List) {