Added optional search parameter/setting to control content domain filter

Thus allowing to choose at configuration or per search request, whether
extending or not results beyond strict content domain filter (image,
video, audio or application).

Related graphical controls to be added to user interface.
pull/154/head
luccioman 7 years ago
parent f52217c939
commit e6907fdab3

@ -843,6 +843,17 @@ search.audio = false
search.video = false
search.app = false
# Strict content domain filtering : when false, results can be extended to documents including links to documents
# of contentdom type, whithout being themselves of that type.
# Examples :
# - contentdom search param == image, strictContentDom == true
# - jpeg image : acceptable result
# - html page embedding images : rejected
# - contentdom search param == image, strictContentDom == false
# - jpeg image : acceptable result
# - html page embedding images : acceptable result
search.strictContentDom = false
# number of search results per page displayed by default
search.items = 10

@ -116,6 +116,7 @@ public final class search {
final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE);
final String prefer = post.get("prefer", "");
final String contentdom = post.get("contentdom", "all");
final boolean strictContentDom = post.getBoolean("strictContentDom");
final String filter = post.get("filter", ".*"); // a filter on the url
final int timezoneOffset = post.getInt("timezoneOffset", 0);
QueryModifier modifier = new QueryModifier(timezoneOffset);
@ -255,6 +256,7 @@ public final class search {
0.0d,
new String[0]
);
theQuery.setStrictContentDom(strictContentDom);
Network.log.info("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links");
final long timer = System.currentTimeMillis();
@ -319,6 +321,7 @@ public final class search {
0.0d,
new String[0]
);
theQuery.setStrictContentDom(strictContentDom);
Network.log.info("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links");
EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()), ""));
if (sb.getConfigBool(SwitchboardConstants.DECORATION_AUDIO, false)) Audio.Soundclip.remotesearch.play(-10.0f);

@ -290,6 +290,11 @@ public class yacysearch {
// find search domain
final Classification.ContentDomain contentdom = post == null || !post.containsKey("contentdom") ? ContentDomain.ALL : ContentDomain.contentdomParser(post.get("contentdom", "all"));
// Strict/extended content domain constraint : configured setting may be overriden by request param
final boolean strictContentDom = !Boolean.FALSE.toString().equalsIgnoreCase(post.get("strictContentDom",
sb.getConfig(SwitchboardConstants.SEARCH_STRICT_CONTENT_DOM,
String.valueOf(SwitchboardConstants.SEARCH_STRICT_CONTENT_DOM_DEFAULT))));
// check the search tracker
TreeSet<Long> trackerHandles = sb.localSearchTracker.get(client);
@ -692,6 +697,7 @@ public class yacysearch {
header.get(HeaderFramework.USER_AGENT, ""),
lat, lon, rad,
sb.getConfigArray("search.navigation", ""));
theQuery.setStrictContentDom(strictContentDom);
theQuery.setStandardFacetsMaxCount(sb.getConfigInt(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT,
QueryParams.FACETS_STANDARD_MAXCOUNT_DEFAULT));
theQuery.setDateFacetMaxCount(sb.getConfigInt(SwitchboardConstants.SEARCH_NAVIGATION_DATES_MAXCOUNT,

@ -605,7 +605,7 @@ public class yacysearchitem {
final SearchEvent theSearch, final String target_special_pattern, long timeout, boolean fullViewingRights, final boolean noreferrer) {
prop.put("content", theSearch.query.contentdom.getCode() + 1); // switch on specific content
try {
SearchEvent.ImageResult image = theSearch.oneImageResult(item, timeout);
SearchEvent.ImageResult image = theSearch.oneImageResult(item, timeout, theSearch.query.isStrictContentDom());
final String imageUrlstring = image.imageUrl.toNormalform(true);
final String imageUrlExt = MultiProtocolURL.getFileExtension(image.imageUrl.getFileName());
final String target = sb.getConfig(imageUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");

@ -488,6 +488,7 @@ public final class Protocol {
final String excludehashes,
final String language,
final ContentDomain contentdom,
final boolean strictContentDom,
final int count,
final long time,
final int maxDistance,
@ -533,6 +534,7 @@ public final class Protocol {
"",
language,
contentdom,
strictContentDom,
count,
time,
maxDistance,
@ -600,6 +602,7 @@ public final class Protocol {
final String wordhashes,
final String urlhashes,
final ContentDomain contentdom,
final boolean strictContentDom,
final int count,
final long time,
final int maxDistance,
@ -624,6 +627,7 @@ public final class Protocol {
urlhashes,
"",
contentdom,
strictContentDom,
count,
time,
maxDistance,
@ -889,6 +893,7 @@ public final class Protocol {
final String urlhashes,
final String language,
final ContentDomain contentdom,
final boolean strictContentDom,
final int count,
final long time,
final int maxDistance,
@ -941,6 +946,9 @@ public final class Protocol {
//parts.put("sitehost", UTF8.StringBody(event.query.modifier.sitehost));
parts.put("author", UTF8.StringBody(event.query.modifier.author));
parts.put("contentdom", UTF8.StringBody(contentdom == null ? ContentDomain.ALL.toString() : contentdom.toString()));
if(strictContentDom) {
parts.put("strictContentDom", UTF8.StringBody("true"));
}
parts.put("maxdist", UTF8.StringBody(Integer.toString(maxDistance)));
parts.put("profile", UTF8.StringBody(crypt.simpleEncode(event.query.ranking.toExternalString())));
parts.put("constraint", UTF8.StringBody((event.query.constraint == null) ? "" : event.query.constraint.exportB64()));

@ -61,6 +61,7 @@ public class RemoteSearch extends Thread {
final private SearchEvent event;
final private String wordhashes, excludehashes;
final private ContentDomain contentdom;
final private boolean strictContentDom;
final private int partitions;
final private SecondarySearchSuperviser secondarySearchSuperviser;
final private Blacklist blacklist;
@ -78,6 +79,7 @@ public class RemoteSearch extends Thread {
final String excludehashes,
final String language,
final ContentDomain contentdom,
final boolean strictContentDom,
final int count,
final long time,
final int maxDistance,
@ -91,6 +93,7 @@ public class RemoteSearch extends Thread {
this.excludehashes = excludehashes;
this.language = language;
this.contentdom = contentdom;
this.strictContentDom = strictContentDom;
this.partitions = partitions;
this.secondarySearchSuperviser = secondarySearchSuperviser;
this.blacklist = blacklist;
@ -114,6 +117,7 @@ public class RemoteSearch extends Thread {
this.excludehashes,
this.language,
this.contentdom,
this.strictContentDom,
this.count,
this.time,
this.maxDistance,
@ -264,7 +268,8 @@ public class RemoteSearch extends Thread {
// start solr searches
final int targets = dhtPeers.size() + robinsonPeers.size();
if (!sb.getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_OFF, false)) {
final SolrQuery solrQuery = event.query.solrQuery(event.getQuery().contentdom, useFacets, event.excludeintext_image);
final SolrQuery solrQuery = event.query.solrQuery(event.getQuery().contentdom,
event.query.isStrictContentDom(), useFacets, event.excludeintext_image);
for (Seed s: robinsonPeers) {
if (MemoryControl.shortStatus()
|| Memory.load() > sb.getConfigFloat(SwitchboardConstants.REMOTESEARCH_MAXLOAD_SOLR,
@ -292,6 +297,7 @@ public class RemoteSearch extends Thread {
QueryParams.hashSet2hashString(event.query.getQueryGoal().getExcludeHashes()),
event.query.targetlang == null ? "" : event.query.targetlang,
event.query.contentdom == null ? ContentDomain.ALL : event.query.contentdom,
event.query.isStrictContentDom(),
count,
time,
event.query.maxDistance,
@ -336,6 +342,7 @@ public class RemoteSearch extends Thread {
QueryParams.hashSet2hashString(wordhashes),
urlhashes,
ContentDomain.ALL,
false,
20,
time,
999,

@ -560,6 +560,16 @@ public final class SwitchboardConstants {
public static final String SEARCH_VERIFY = "search.verify";
public static final String SEARCH_VERIFY_DELETE = "search.verify.delete";
/**
* Key of the setting controlling whether content domain filtering is strict :
* when false, results can be extended to documents including links to documents
* of contentdom type, whithout being themselves of that type.
*/
public static final String SEARCH_STRICT_CONTENT_DOM = "search.strictContentDom";
/** Default setting value controlling whether content domain filtering is strict. */
public static final boolean SEARCH_STRICT_CONTENT_DOM_DEFAULT = false;
/** Key of the setting controlling whether search results resorting by browser JavaScript is enabled */
public static final String SEARCH_JS_RESORT = "search.jsresort";

@ -368,41 +368,49 @@ public class QueryGoal {
/**
* Generate a Solr filter query to receive valid image results.
*
* This filters error-urls out and includes urls with mime image/* as well
* as urls with links to images.
* This filters error-urls out and includes urls with mime image/*, as well
* as urls with links to images when strict is false.
* We use the mime (image/*) only to find images as the parser assigned the
* best mime to index documents. This applies also to parsed file systems.
* This ensures that no text urls with image-fileextension is returned
* (as some large internet sites like to use such urls)
*
* @param strict when true, do not include non-image urls with links to images
* @return Solr filter query for image urls
*/
public List<String> collectionImageFilterQuery() {
public List<String> collectionImageFilterQuery(final boolean strict) {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(
CollectionSchema.content_type.getSolrFieldName() + ":(image/*) OR " +
CollectionSchema.images_urlstub_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*)");
if (!strict) {
filter.append(" OR ").append(CollectionSchema.images_urlstub_sxt.getSolrFieldName())
.append(AbstractSolrConnector.CATCHALL_DTERM);
}
fqs.add(filter.toString());
return fqs;
}
/**
* Generate Solr filter queries to receive valid video content results.
* Generate Solr filter queries to receive valid audio content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix video/* as well
* docuemnts with links to video content.
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix audio/* as well
* documents with links to audio content when strict is false.
*
* @return Solr filter queries for video content URLs
* @param strict when true, do not include non-audio urls with links to audio
* @return Solr filter queries for audio content URLs
*/
public List<String> collectionAudioFilterQuery() {
public List<String> collectionAudioFilterQuery(final boolean strict) {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(audio/*) OR "
+ CollectionSchema.audiolinkscount_i.getSolrFieldName() + ":[1 TO *]");
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(audio/*)");
if (!strict) {
filter.append(" OR ").append(CollectionSchema.audiolinkscount_i.getSolrFieldName()).append(":[1 TO *]");
}
fqs.add(filter.toString());
return fqs;
}
@ -410,17 +418,21 @@ public class QueryGoal {
* Generate Solr filter queries to receive valid video content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix video/* as well
* docuemnts with links to video content.
* documents with links to video content when strict is false.
*
* @param strict when true, do not include non-video urls with links to video
* @return Solr filter queries for video content URLs
*/
public List<String> collectionVideoFilterQuery() {
public List<String> collectionVideoFilterQuery(final boolean strict) {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(video/*) OR "
+ CollectionSchema.videolinkscount_i.getSolrFieldName() + ":[1 TO *]");
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(video/*)");
if (!strict) {
filter.append(" OR ").append(CollectionSchema.videolinkscount_i.getSolrFieldName()).append(":[1 TO *]");
}
fqs.add(filter.toString());
return fqs;
}
@ -428,17 +440,22 @@ public class QueryGoal {
* Generate Solr filter queries to receive valid application specific content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix application/* as well
* docuemnts with links to application specific content.
* docuemnts with links to application specific content when strict is false.
*
* @param strict when true, do not include non-video urls with links to video
* @return Solr filter queries for application specific content URLs
*/
public List<String> collectionApplicationFilterQuery() {
public List<String> collectionApplicationFilterQuery(final boolean strict) {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(application/*) OR "
+ CollectionSchema.applinkscount_i.getSolrFieldName() + ":[1 TO *]");
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName())
.append(":(application/*)");
if (!strict) {
filter.append(" OR ").append(CollectionSchema.applinkscount_i.getSolrFieldName()).append(":[1 TO *]");
}
fqs.add(filter.toString());
return fqs;
}

@ -132,7 +132,31 @@ public final class QueryParams {
/** true when the urlMasString is just a catch all pattern such as ".*" */
boolean urlMask_isCatchall;
/** Content-Type classification of expected results */
public final Classification.ContentDomain contentdom;
/**
* <p>When false, results can be extended to documents including links to documents
* of {@link #contentdom} type, whithout being themselves of that type.</p>
* Examples :
* <ul>
* <li>contentdom == IMAGE, strictContentDom == true
* <ul>
* <li>jpeg image : acceptable result</li>
* <li>html page embedding images : rejected</li>
* </ul>
* </li>
* <li>contentdom == IMAGE, strictContentDom == false
* <ul>
* <li>jpeg image : acceptable result</li>
* <li>html page embedding images : acceptable result</li>
* </ul>
* </li>
* </ul>
*/
private boolean strictContentDom = false;
public final String targetlang;
protected final Collection<Tagging.Metatag> metatags;
public final Searchdom domType;
@ -380,6 +404,20 @@ public final class QueryParams {
public void setDateFacetMaxCount(final int dateFacetMaxCount) {
this.dateFacetMaxCount = dateFacetMaxCount;
}
/**
* @return false when results can be extended to documents including links to documents ot contentdom type.
*/
public boolean isStrictContentDom() {
return this.strictContentDom;
}
/**
* @param strictContentDom when false, results can be extended to documents including links to documents ot contentdom type.
*/
public void setStrictContentDom(final boolean strictContentDom) {
this.strictContentDom = strictContentDom;
}
public static HandleSet hashes2Set(final String query) {
final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
@ -513,20 +551,20 @@ public final class QueryParams {
return SetTools.anymatchByTest(keywords, textwords);
}
public SolrQuery solrQuery(final ContentDomain cd, final boolean getFacets, final boolean excludeintext_image) {
public SolrQuery solrQuery(final ContentDomain cd, final boolean strictContentDom, final boolean getFacets, final boolean excludeintext_image) {
if (cd == ContentDomain.IMAGE) {
return solrImageQuery(getFacets);
return solrImageQuery(getFacets, strictContentDom);
}
final List<String> filterQueries;
switch (cd) {
case AUDIO:
filterQueries = this.queryGoal.collectionAudioFilterQuery();
filterQueries = this.queryGoal.collectionAudioFilterQuery(strictContentDom);
break;
case VIDEO:
filterQueries = this.queryGoal.collectionVideoFilterQuery();
filterQueries = this.queryGoal.collectionVideoFilterQuery(strictContentDom);
break;
case APP:
filterQueries = this.queryGoal.collectionApplicationFilterQuery();
filterQueries = this.queryGoal.collectionApplicationFilterQuery(strictContentDom);
break;
default:
filterQueries = this.queryGoal.collectionTextFilterQuery(excludeintext_image);
@ -579,7 +617,7 @@ public final class QueryParams {
return params;
}
private SolrQuery solrImageQuery(boolean getFacets) {
private SolrQuery solrImageQuery(final boolean getFacets, final boolean strictContentDom) {
if (this.cachedQuery != null) {
this.cachedQuery.setStart(this.offset);
if (!getFacets) this.cachedQuery.setFacet(false);
@ -587,16 +625,18 @@ public final class QueryParams {
}
// construct query
final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionImageFilterQuery());
final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionImageFilterQuery(strictContentDom));
params.setQuery(this.queryGoal.collectionImageQuery(this.modifier).toString());
// set boosts
StringBuilder bq = new StringBuilder();
bq.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"jpg\"");
bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tif\"");
bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tiff\"");
bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"png\"");
params.setParam(DisMaxParams.BQ, bq.toString());
if(!strictContentDom) {
// set boosts
StringBuilder bq = new StringBuilder();
bq.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"jpg\"");
bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tif\"");
bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"tiff\"");
bq.append(" OR ").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":\"png\"");
params.setParam(DisMaxParams.BQ, bq.toString());
}
// prepare result
ConcurrentLog.info("Protocol", "SOLR QUERY: " + params.toString());
@ -810,6 +850,7 @@ public final class QueryParams {
//context.append(this.domType);
context.append(asterisk);
context.append(this.contentdom).append(asterisk);
context.append(this.strictContentDom).append(asterisk);
context.append(this.zonecode).append(asterisk);
context.append(ASCII.String(Word.word2hash(this.ranking.toExternalString()))).append(asterisk);
context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk);

@ -428,7 +428,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
final boolean useSolrFacets = true;
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this,
this.query.solrQuery(this.query.contentdom, useSolrFacets, this.excludeintext_image), this.query.offset,
this.query.solrQuery(this.query.contentdom, this.query.isStrictContentDom(), useSolrFacets, this.excludeintext_image), this.query.offset,
this.query.itemsPerPage, null /* this peer */, 0, Switchboard.urlBlacklist, useSolrFacets, true);
}
this.localsolroffset = this.query.offset + this.query.itemsPerPage;
@ -734,13 +734,27 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// check document domain
if (this.query.contentdom.getCode() > 0 &&
((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Tokenizer.flag_cat_hasaudio))) ||
(this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Tokenizer.flag_cat_hasvideo))) ||
(this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Tokenizer.flag_cat_hasimage))) ||
(this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp))))) {
if (log.isFine()) log.fine("dropped RWI: contentdom fail");
continue pollloop;
if (this.query.contentdom.getCode() > 0) {
boolean domainMatch = true;
if(this.query.isStrictContentDom()) {
if((this.query.contentdom == ContentDomain.AUDIO && iEntry.getType() != Response.DT_AUDIO) ||
(this.query.contentdom == ContentDomain.VIDEO && iEntry.getType() != Response.DT_MOVIE) ||
(this.query.contentdom == ContentDomain.IMAGE && iEntry.getType() != Response.DT_IMAGE) ||
(this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp)))) {
domainMatch = false;
}
} else if((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Tokenizer.flag_cat_hasaudio))) ||
(this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Tokenizer.flag_cat_hasvideo))) ||
(this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Tokenizer.flag_cat_hasimage))) ||
(this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp)))) {
domainMatch = false;
}
if(!domainMatch) {
if (log.isFine()) {
log.fine("dropped RWI: contentdom fail");
}
continue pollloop;
}
}
// check language
@ -1003,14 +1017,25 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// check document domain
if (this.query.contentdom.getCode() > 0 &&
((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Tokenizer.flag_cat_hasaudio))) ||
if (this.query.contentdom.getCode() > 0) {
boolean domainMatch = true;
if(this.query.isStrictContentDom()) {
if(this.query.contentdom != iEntry.getContentDomain()) {
domainMatch = false;
}
} else if((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Tokenizer.flag_cat_hasaudio))) ||
(this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Tokenizer.flag_cat_hasvideo))) ||
(this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Tokenizer.flag_cat_hasimage))) ||
(this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp))))) {
if (log.isFine()) log.fine("dropped Node: content domain does not match");
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
continue pollloop;
(this.query.contentdom == ContentDomain.APP && !(flags.get(Tokenizer.flag_cat_hasapp)))) {
domainMatch = false;
}
if(!domainMatch) {
if (log.isFine()) {
log.fine("dropped Node: content domain does not match");
}
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
continue pollloop;
}
}
// filter out media links in text search, if wanted
@ -2113,7 +2138,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
final boolean useSolrFacets = (this.localsolrsearch == null);
final boolean incrementNavigators = false;
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this,
this.query.solrQuery(this.query.contentdom, useSolrFacets, this.excludeintext_image),
this.query.solrQuery(this.query.contentdom, this.query.isStrictContentDom(), useSolrFacets, this.excludeintext_image),
this.localsolroffset, nextitems, null /* this peer */, 0, Switchboard.urlBlacklist, useSolrFacets, incrementNavigators);
}
this.localsolroffset += nextitems;
@ -2204,7 +2229,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
return null;
}
public ImageResult oneImageResult(final int item, final long timeout) throws MalformedURLException {
public ImageResult oneImageResult(final int item, final long timeout, final boolean strictContentDom) throws MalformedURLException {
if (item < imageViewed.size()) return nthImage(item);
if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
URIMetadataNode doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
@ -2233,7 +2258,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), doc.mime(), doc.title(), w, h, 0));
}
}
} else {
} else if(!strictContentDom) {
Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
if (imgO != null && imgO.size() > 0 && imgO instanceof List<?>) {

Loading…
Cancel
Save