@ -1494,10 +1496,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
try{
try{
Stringdoccountquery=
Stringdoccountquery=
CollectionSchema.host_id_s.getSolrFieldName()+":\""+hostid+"\" AND "+
CollectionSchema.host_id_s.getSolrFieldName()+":\""+hostid+"\" AND "+
"-"+CollectionSchema.robots_i.getSolrFieldName()+":8 AND "+// bit 3
"-"+CollectionSchema.robots_i.getSolrFieldName()+":8 AND "+// bit 3 (noindex)
"-"+CollectionSchema.robots_i.getSolrFieldName()+":24 AND "+// bit 3 + 4
"-"+CollectionSchema.robots_i.getSolrFieldName()+":24 AND "+// bit 3 + 4 (noindex + nofollow)
"-"+CollectionSchema.robots_i.getSolrFieldName()+":512 AND "+// bit 9
"-"+CollectionSchema.robots_i.getSolrFieldName()+":512 AND "+// bit 9 (noindex)
"-"+CollectionSchema.robots_i.getSolrFieldName()+":1536 AND "+// bit 9 + 10
"-"+CollectionSchema.robots_i.getSolrFieldName()+":1536 AND "+// bit 9 + 10 (noindex + nofollow)
"((-"+CollectionSchema.canonical_equal_sku_b.getSolrFieldName()+":"+AbstractSolrConnector.CATCHALL_TERM+") OR ("+CollectionSchema.canonical_equal_sku_b.getSolrFieldName()+":true)) AND "+
"((-"+CollectionSchema.canonical_equal_sku_b.getSolrFieldName()+":"+AbstractSolrConnector.CATCHALL_TERM+") OR ("+CollectionSchema.canonical_equal_sku_b.getSolrFieldName()+":true)) AND "+
CollectionSchema.httpstatus_i.getSolrFieldName()+":200 AND "+
CollectionSchema.httpstatus_i.getSolrFieldName()+":200 AND "+
"-"+CollectionSchema.id.getSolrFieldName()+":\""+urlhash+"\" AND "+
"-"+CollectionSchema.id.getSolrFieldName()+":\""+urlhash+"\" AND "+
@ -111,15 +111,21 @@ public enum CollectionSchema implements SchemaDeclaration {
scripts_sxt(SolrType.string,true,true,true,false,false,"normalized urls within a scripts tag"),
scripts_sxt(SolrType.string,true,true,true,false,false,"normalized urls within a scripts tag"),
scriptscount_i(SolrType.num_integer,true,true,false,false,false,"number of entries in scripts_sxt"),
scriptscount_i(SolrType.num_integer,true,true,false,false,false,"number of entries in scripts_sxt"),
// encoded as binary value into an integer:
// encoded as binary value into an integer:
// bit 0: "all" contained in html header meta
// bit 0: "all" contained in html header meta
// bit 1: "index" contained in html header meta
// bit 1: "index" contained in html header meta
// bit 2: "noindex" contained in html header meta
// bit 2: "follow" contained in html header meta
// bit 3: "nofollow" contained in html header meta
// bit 3: "noindex" contained in html header meta
// bit 8: "noarchive" contained in http header properties
// bit 4: "nofollow" contained in html header meta
// bit 9: "nosnippet" contained in http header properties
// bit 5: "noarchive" contained in html header meta
// bit 10: "noindex" contained in http header properties
// bit 8: "all" contained in http header X-Robots-Tag
// bit 11: "nofollow" contained in http header properties
// bit 9: "noindex" contained in http header X-Robots-Tag
// bit 12: "unavailable_after" contained in http header properties
// bit 10: "nofollow" contained in http header X-Robots-Tag
// bit 11: "noarchive" contained in http header X-Robots-Tag
// bit 12: "nosnippet" contained in http header X-Robots-Tag
// bit 13: "noodp" contained in http header X-Robots-Tag
// bit 14: "notranslate" contained in http header X-Robots-Tag
// bit 15: "noimageindex" contained in http header X-Robots-Tag
// bit 16: "unavailable_after" contained in http header X-Robots-Tag
robots_i(SolrType.num_integer,true,true,false,false,false,"content of <meta name=\"robots\" content=#content#> tag and the \"X-Robots-Tag\" HTTP property"),
robots_i(SolrType.num_integer,true,true,false,false,false,"content of <meta name=\"robots\" content=#content#> tag and the \"X-Robots-Tag\" HTTP property"),
metagenerator_t(SolrType.text_general,true,true,false,false,false,"content of <meta name=\"generator\" content=#content#> tag"),
metagenerator_t(SolrType.text_general,true,true,false,false,false,"content of <meta name=\"generator\" content=#content#> tag"),
inboundlinks_protocol_sxt(SolrType.string,true,true,true,false,false,"internal links, only the protocol"),
inboundlinks_protocol_sxt(SolrType.string,true,true,true,false,false,"internal links, only the protocol"),