added a collection attribute to crawls and searches:

- a solr field collection_sxt can be used to store a set of crawl tags - when this field is activated, a crawl tag can be assigned when crawls are started - the content of the collection field can be comma-separated, all of them are assigned to the documents when they are indexed as result of such a crawl start - a search result can be drilled down to a specific collection; this is currently only available in the solr interface and also in the gsa interface using the 'site' option - this adds a mandatory field for gsa queries (the google api demands that field all the time)
13 years ago · b2b516cc3e
parent 174530a9e0
commit b2b516cc3e
17 changed files with 96 additions and 33 deletions
--- a/defaults/solr.keys.list
+++ b/defaults/solr.keys.list
@ -140,6 +140,9 @@ h6_txt

 ### optional values, not part of standard YaCy handling (but useful for external applications)

+## tags that are attached to crawls/index generation to separate the search result into user-defined subsets
+#collection_sxt
+
 ## tags of css entries, normalized with absolute URL
 #css_tag_txt

--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -564,6 +564,13 @@ crawlingIPMustNotMatch=
 # the default country codes are all codes for countries in Europe
 crawlingCountryMustMatch=AD,AL,AT,BA,BE,BG,BY,CH,CY,CZ,DE,DK,EE,ES,FI,FO,FR,GG,GI,GR,HR,HU,IE,IM,IS,IT,JE,LI,LT,LU,LV,MC,MD,MK,MT,NL,NO,PL,PT,RO,RU,SE,SI,SJ,SK,SM,TR,UA,UK,VA,YU

+# collections for index data separation
+# these collections can either be used to produce search tenants.
+# The collection is used in the site-parameter in the GSA interface.
+# Collections are assigned during crawl-time and defined in the crawl start.
+# The YaCyScheme field collection_sxt must be switched on to use this field.
+collection=user
+
 # performance-settings
 # delay-times for permanent loops (milliseconds)
 # the idlesleep is the pause that an proces sleeps if the last call to the
--- a/htroot/CrawlStartExpert_p.html
+++ b/htroot/CrawlStartExpert_p.html
@ -296,6 +296,15 @@
            check this box.
          </td>
        </tr>
+        <tr valign="top" class="TableCellLight">
+          <td><label for="collection">Add Crawl result to collection(s)</label>:</td>
+          <td>
+			<input name="collection" id="collection" type="text" size="60" maxlength="100" value="#[collection]#" #(collectionEnabled)#disabled="disabled"::#(/collectionEnabled)# />
+	      </td>
+          <td>
+            A crawl result can be tagged with names which are candidates for a collection request. These tags can be selected with the <a href="/gsa/search?q=www&site=#[collection]#">GSA interface</a> using the 'site' operator. To use this option, the 'collection_sxt'-field must be switched on in the <a href="/IndexFederated_p.html">Solr Schema</a>
+          </td>
+        </tr>
        <!--
        <tr valign="top" class="TableCellDark">
          <td>Exclude <em>dynamic</em> Stop-Words</td>
@ -314,8 +323,8 @@
          </td>
        </tr>
        -->
-        <tr valign="top" class="TableCellLight">
-          <td colspan="5"><input type="submit" name="crawlingstart" value="Start New Crawl" /></td>
+        <tr valign="top" class="TableCellSummary">
+          <td colspan="5"><input type="submit" name="crawlingstart" value="Start New Crawl" class="submitready"/></td>
        </tr>
      </table>
    </form>
--- a/htroot/CrawlStartExpert_p.java
+++ b/htroot/CrawlStartExpert_p.java
@ -27,6 +27,7 @@
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.search.Switchboard;
 import net.yacy.search.SwitchboardConstants;
+import net.yacy.search.index.YaCySchema;
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@ -79,6 +80,10 @@ public class CrawlStartExpert_p {
        prop.put("xdstopwChecked", env.getConfigBool("xdstopw", true) ? "1" : "0");
        prop.put("xpstopwChecked", env.getConfigBool("xpstopw", true) ? "1" : "0");

+        boolean collectionEnabled = sb.index.fulltext().getSolrScheme().isEmpty() || sb.index.fulltext().getSolrScheme().contains(YaCySchema.collection_sxt);
+        prop.put("collectionEnabled", collectionEnabled ? 1 : 0);
+        prop.put("collection", collectionEnabled ? sb.getConfig("collection", "user") : "");
+
        // return rewrite properties
        return prop;
    }
--- a/htroot/CrawlStartSite_p.html
+++ b/htroot/CrawlStartSite_p.html
@ -104,10 +104,10 @@
        <input type="hidden" name="xsstopw" id="xsstopw" value="on" />
        <input type="hidden" name="xdstopw" id="xdstopw" value="off" />
        <input type="hidden" name="xpstopw" id="xpstopw" value="off" />
+        <input type="hidden" name="collection" id="collection" value="" />
        </dd>
-        <!-- <dt>&nbsp;</dt><dd>&nbsp;</dd><dt>&nbsp;</dt><dd>&nbsp;</dd> -->
        <dt><label>Start</label></dt>
-        <dd><input type="submit" name="crawlingstart" value="Start New Crawl" />
+        <dd><input type="submit" name="crawlingstart" value="Start New Crawl" class="submitready"/>
        </dd>
      </dl>
      
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -53,7 +53,6 @@ import net.yacy.peers.NewsPool;
 import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.search.Switchboard;
 import net.yacy.search.SwitchboardConstants;
-import net.yacy.search.index.Segment;
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.CrawlQueues;
 import de.anomic.crawler.SitemapImporter;
@ -95,9 +94,6 @@ public class Crawler_p {
        prop.put("list-remote", 0);
        prop.put("forwardToCrawlStart", "0");

-        // get segment
-        Segment indexSegment = sb.index;
-
        prop.put("info", "0");

        if (post != null && post.containsKey("continue")) {
@ -198,6 +194,9 @@ public class Crawler_p {
                final boolean directDocByURL = "on".equals(post.get("directDocByURL", "on")); // catch also all linked media documents without loading them
                env.setConfig("crawlingDirectDocByURL", directDocByURL);

+                final String collection = post.get("collection", sb.getConfig("collection", "user"));
+                env.setConfig("collection", collection);
+
                // recrawl
                final String recrawl = post.get("recrawl", "nodoubles"); // nodoubles, reload, scheduler
                boolean crawlingIfOlderCheck = "on".equals(post.get("crawlingIfOlderCheck", "off"));
@ -284,7 +283,8 @@ public class Crawler_p {
                                xsstopw,
                                xdstopw,
                                xpstopw,
-                                cachePolicy);
+                                cachePolicy,
+                                collection);
                        sb.crawler.putActive(profile.handle().getBytes(), profile);
                        sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
                        final DigestURI url = crawlingStartURL;
@ -319,7 +319,7 @@ public class Crawler_p {
                        // first delete old entry, if exists
                        final DigestURI url = new DigestURI(crawlingStart);
                        final byte[] urlhash = url.hash();
-                        indexSegment.fulltext().remove(urlhash);
+                        sb.index.fulltext().remove(urlhash);
                        sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
                        sb.crawlQueues.errorURL.remove(urlhash);

@ -349,7 +349,8 @@ public class Crawler_p {
                                xsstopw,
                                xdstopw,
                                xpstopw,
-                                cachePolicy);
+                                cachePolicy,
+                                collection);
                        sb.crawler.putActive(pe.handle().getBytes(), pe);
                        final String reasonString = sb.crawlStacker.stackCrawl(new Request(
                                sb.peers.mySeed().hash.getBytes(),
@ -496,7 +497,8 @@ public class Crawler_p {
                                    xsstopw,
                                    xdstopw,
                                    xpstopw,
-                                    cachePolicy);
+                                    cachePolicy,
+                                    collection);
                            sb.crawler.putActive(profile.handle().getBytes(), profile);
                            sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
                            sb.crawlStacker.enqueueEntriesAsynchronous(sb.peers.mySeed().hash.getBytes(), profile.handle(), hyperlinks);
@ -537,7 +539,8 @@ public class Crawler_p {
                				xsstopw,
                				xdstopw,
                				xpstopw,
-                				cachePolicy);
+                				cachePolicy,
+                				collection);
                		sb.crawler.putActive(pe.handle().getBytes(), pe);
                		final SitemapImporter importer = new SitemapImporter(sb, sitemapURL, pe);
                		importer.start();
@ -581,7 +584,8 @@ public class Crawler_p {
                                xsstopw,
                                xdstopw,
                                xpstopw,
-                                cachePolicy);
+                                cachePolicy,
+                                collection);
                        sb.crawler.putActive(profile.handle().getBytes(), profile);
                        sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
                        final Iterator<Map.Entry<MultiProtocolURI, Properties>> linkiterator = hyperlinks.entrySet().iterator();
@ -592,7 +596,7 @@ public class Crawler_p {
                            nexturl = new DigestURI(e.getKey());
                            // remove the url from the database to be prepared to crawl them again
                            final byte[] urlhash = nexturl.hash();
-                            indexSegment.fulltext().remove(urlhash);
+                            sb.index.fulltext().remove(urlhash);
                            sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
                            sb.crawlQueues.errorURL.remove(urlhash);
                            sb.crawlStacker.enqueueEntry(new Request(
@ -631,7 +635,6 @@ public class Crawler_p {
        prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? "1" : "0");
        prop.put("customPPMdefault", Integer.toString(LCppm));

-
        // generate crawl profile table
        int count = 0;
        boolean dark = true;
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@ -105,6 +105,7 @@ public class QuickCrawlLink_p {
        final boolean xsstopw        = post.get("xsstopw", "").equals("on");
        final boolean xdstopw        = post.get("xdstopw", "").equals("on");
        final boolean xpstopw        = post.get("xpstopw", "").equals("on");
+        final String collection      = post.get("collection", "user");

        prop.put("mode_url", (crawlingStart == null) ? "unknown" : crawlingStart);
        prop.putHTML("mode_title", (title == null) ? "unknown" : title);
@ -151,7 +152,8 @@ public class QuickCrawlLink_p {
                        xsstopw,
                        xdstopw,
                        xpstopw,
-                        CacheStrategy.IFFRESH);
+                        CacheStrategy.IFFRESH,
+                        collection);
                sb.crawler.putActive(pe.handle().getBytes(), pe);
            } catch (final Exception e) {
                // mist
--- a/htroot/api/ymarks/import_ymark.java
+++ b/htroot/api/ymarks/import_ymark.java
@ -268,7 +268,8 @@ public class import_ymark {
                "", depth, medialink,
                CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, crawlingQ,
                true, true, true, false, true, true, true,
-                CacheStrategy.IFFRESH);
+                CacheStrategy.IFFRESH,
+                "robot_import");
        sb.crawler.putActive(pe.handle().getBytes(), pe);
        return sb.crawlStacker.stackCrawl(new Request(
                sb.peers.mySeed().hash.getBytes(),
--- a/htroot/gsa/searchresult.java
+++ b/htroot/gsa/searchresult.java
@ -121,6 +121,11 @@ public class searchresult {
        String access = post.remove("access");
        String entqr = post.remove("entqr");

+        if (site != null && site.length() > 0) {
+            q = q + " AND " + YaCySchema.collection_sxt.name() + ":" + site;
+            post.put(CommonParams.Q, q);
+        }
+
        // get the embedded connector
        EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.fulltext().getLocalSolr();
        if (connector == null) return null;
--- a/source/de/anomic/crawler/CrawlProfile.java
+++ b/source/de/anomic/crawler/CrawlProfile.java
@ -75,6 +75,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
    public static final String FILTER_IP_MUSTMATCH      = "crawlingIPMustMatch";
    public static final String FILTER_IP_MUSTNOTMATCH   = "crawlingIPMustNotMatch";
    public static final String FILTER_COUNTRY_MUSTMATCH = "crawlingCountryMustMatch";
+    public static final String COLLECTIONS = "collections";

    private Pattern urlmustmatch = null, urlmustnotmatch = null, ipmustmatch = null, ipmustnotmatch = null;

@ -120,6 +121,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     * @param xdstopw true if dynamic stop words shall be ignored
     * @param xpstopw true if parent stop words shall be ignored
     * @param cacheStrategy determines if and how cache is used loading content
+     * @param collections a comma-separated list of tags which are attached to index entries
     */
    public CrawlProfile(
                 final String name,
@ -141,7 +143,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
                 final boolean xsstopw,
                 final boolean xdstopw,
                 final boolean xpstopw,
-                 final CacheStrategy cacheStrategy) {
+                 final CacheStrategy cacheStrategy,
+                 final String collections) {
        super(40);
        if (name == null || name.isEmpty()) {
            throw new NullPointerException("name must not be null or empty");
@ -172,6 +175,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
        put(XDSTOPW,          xdstopw); // exclude dynamic stop-word
        put(XPSTOPW,          xpstopw); // exclude parent stop-words
        put(CACHE_STRAGEGY,   cacheStrategy.toString());
+        put(COLLECTIONS,      collections.trim().replaceAll(" ", ""));
    }

    /**
@ -184,7 +188,6 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
        this.doms = new ConcurrentHashMap<String, DomProfile>();
    }

-
    public void domInc(final String domain, final String referrer, final int depth) {
        final DomProfile dp = this.doms.get(domain);
        if (dp == null) {
@ -259,6 +262,16 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
        return r;
    }

+    /**
+     * get the collections for this crawl
+     * @return a list of collection names
+     */
+    public String[] collections() {
+        final String r = get(COLLECTIONS);
+        if (r == null) return new String[0];
+        return r.split(",");
+    }
+
    /**
     * Gets the name of the CrawlProfile.
     * @return  name of the profile
--- a/source/de/anomic/crawler/CrawlSwitchboard.java
+++ b/source/de/anomic/crawler/CrawlSwitchboard.java
@ -291,7 +291,7 @@ public final class CrawlSwitchboard
            // generate new default entry for proxy crawling
            this.defaultProxyProfile =
                new CrawlProfile(
-                    "proxy",
+                    CRAWL_PROFILE_PROXY,
                    null,
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_NEVER_STRING,
@ -310,7 +310,8 @@ public final class CrawlSwitchboard
                    true,
                    true,
                    true,
-                    CacheStrategy.IFFRESH);
+                    CacheStrategy.IFFRESH,
+                    "robot_" + CRAWL_PROFILE_PROXY);
            this.profilesActiveCrawls.put(
                UTF8.getBytes(this.defaultProxyProfile.handle()),
                this.defaultProxyProfile);
@ -338,7 +339,8 @@ public final class CrawlSwitchboard
                    true,
                    true,
                    false,
-                    CacheStrategy.IFFRESH);
+                    CacheStrategy.IFFRESH,
+                    "robot_" + CRAWL_PROFILE_REMOTE);
            this.profilesActiveCrawls.put(
                UTF8.getBytes(this.defaultRemoteProfile.handle()),
                this.defaultRemoteProfile);
@ -366,7 +368,8 @@ public final class CrawlSwitchboard
                    true,
                    true,
                    false,
-                    CacheStrategy.IFEXIST);
+                    CacheStrategy.IFEXIST,
+                    "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_TEXT);
            this.profilesActiveCrawls.put(
                UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()),
                this.defaultTextSnippetLocalProfile);
@ -394,7 +397,8 @@ public final class CrawlSwitchboard
                    true,
                    true,
                    false,
-                    CacheStrategy.IFEXIST);
+                    CacheStrategy.IFEXIST,
+                    "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT);
            this.profilesActiveCrawls.put(
                UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()),
                this.defaultTextSnippetGlobalProfile);
@ -423,7 +427,8 @@ public final class CrawlSwitchboard
                    true,
                    true,
                    false,
-                    CacheStrategy.IFEXIST);
+                    CacheStrategy.IFEXIST,
+                    "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA);
            this.profilesActiveCrawls.put(
                UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()),
                this.defaultMediaSnippetLocalProfile);
@ -451,7 +456,8 @@ public final class CrawlSwitchboard
                    true,
                    true,
                    false,
-                    CacheStrategy.IFEXIST);
+                    CacheStrategy.IFEXIST,
+                    "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA);
            this.profilesActiveCrawls.put(
                UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()),
                this.defaultMediaSnippetGlobalProfile);
@ -479,7 +485,8 @@ public final class CrawlSwitchboard
                    true,
                    true,
                    false,
-                    CacheStrategy.NOCACHE);
+                    CacheStrategy.NOCACHE,
+                    "robot_" + CRAWL_PROFILE_SURROGATE);
            this.profilesActiveCrawls.put(
                UTF8.getBytes(this.defaultSurrogateProfile.handle()),
                this.defaultSurrogateProfile);
--- a/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java
+++ b/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java
@ -162,6 +162,7 @@ public class GSAResponseWriter implements QueryResponseWriter {
        // write header
        writer.write(XML_START);
        String query = request.getParams().get("q");
+        String site  = (String) context.get("site");
        OpensearchResponseWriter.solitaireTag(writer, "TM", Long.toString(System.currentTimeMillis() - start));
        OpensearchResponseWriter.solitaireTag(writer, "Q", query);
        paramTag(writer, "sort", (String) context.get("sort"));
@ -170,7 +171,7 @@ public class GSAResponseWriter implements QueryResponseWriter {
        paramTag(writer, "oe", "UTF-8");
        paramTag(writer, "client", (String) context.get("client"));
        paramTag(writer, "q", request.getParams().get("q"));
-        paramTag(writer, "site", (String) context.get("site"));
+        paramTag(writer, "site", site);
        paramTag(writer, "start", Integer.toString(resHead.offset));
        paramTag(writer, "num", Integer.toString(resHead.rows));
        paramTag(writer, "ip", (String) context.get("ip"));
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -2560,6 +2560,7 @@ public final class Switchboard extends serverSwitch
                    queueEntry.lastModified(),
                    new Date(),
                    queueEntry.size(),
+                    queueEntry.profile(),
                    queueEntry.getResponseHeader(),
                    document,
                    condenser,
--- a/source/net/yacy/search/index/DocumentIndex.java
+++ b/source/net/yacy/search/index/DocumentIndex.java
@ -173,6 +173,7 @@ public class DocumentIndex extends Segment {
                    new Date(),
                    url.length(),
                    null,
+                    null,
                    document,
                    condenser,
                    null,
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@ -68,6 +68,7 @@ import net.yacy.repository.LoaderDispatcher;
 import net.yacy.search.Switchboard;
 import net.yacy.search.query.RWIProcess;
 import net.yacy.search.query.SearchEvent;
+import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.CrawlQueues;
 import de.anomic.crawler.retrieval.Response;

@ -345,6 +346,7 @@ public class Segment {
            Date modDate,
            final Date loadDate,
            final long sourcesize,
+            final CrawlProfile profile,
            final ResponseHeader responseHeader,
            final Document document,
            final Condenser condenser,
@ -394,7 +396,7 @@ public class Segment {
        // we do not store the data in metadatadb any more if a solr is connected
        if (this.fulltext.connectedSolr()) {
            try {
-                this.fulltext.putDocument(this.fulltext.getSolrScheme().yacy2solr(id, responseHeader, document, metadata));
+                this.fulltext.putDocument(this.fulltext.getSolrScheme().yacy2solr(id, profile, responseHeader, document, metadata));
            } catch ( final IOException e ) {
                Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage());
            }
--- a/source/net/yacy/search/index/SolrConfiguration.java
+++ b/source/net/yacy/search/index/SolrConfiguration.java
@ -59,6 +59,7 @@ import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;

+import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.retrieval.Response;

 public class SolrConfiguration extends ConfigurationSet implements Serializable {
@ -105,7 +106,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
        this.lazy = lazy;
    }

-    private boolean contains(YaCySchema field) {
+    public boolean contains(YaCySchema field) {
    	return this.contains(field.name());
    }

@ -332,7 +333,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
    	if (!text.isEmpty() && text.charAt(text.length() - 1) == '.') sb.append(text); else sb.append(text).append('.');
    }

-    public SolrInputDocument yacy2solr(final String id, final ResponseHeader header, final Document yacydoc, final URIMetadata metadata) {
+    public SolrInputDocument yacy2solr(final String id, final CrawlProfile profile, final ResponseHeader header, final Document yacydoc, final URIMetadata metadata) {
        // we use the SolrCell design as index scheme
        final SolrInputDocument doc = new SolrInputDocument();
        final DigestURI digestURI = new DigestURI(yacydoc.dc_source());
@ -345,6 +346,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
            final InetAddress address = digestURI.getInetAddress();
            if (address != null) add(doc, YaCySchema.ip_s, address.getHostAddress());
        }
+        if (allAttr || contains(YaCySchema.collection_sxt) && profile != null) add(doc, YaCySchema.collection_sxt, profile.collections());
        if (allAttr || contains(YaCySchema.url_protocol_s)) add(doc, YaCySchema.url_protocol_s, digestURI.getProtocol());
        Map<String, String> searchpart = digestURI.getSearchpartMap();
        if (searchpart == null) {
--- a/source/net/yacy/search/index/YaCySchema.java
+++ b/source/net/yacy/search/index/YaCySchema.java
@ -80,7 +80,8 @@ public enum YaCySchema implements Schema {
    h5_txt(SolrType.text_general, true, true, true, "h5 header"),
    h6_txt(SolrType.text_general, true, true, true, "h6 header"),

-    // optional values
+    // optional values, not part of standard YaCy handling (but useful for external applications)
+    collection_sxt(SolrType.string, true, true, true, "tags that are attached to crawls/index generation to separate the search result into user-defined subsets"),
    csscount_i(SolrType.integer, true, true, false, "number of entries in css_tag_txt and css_url_txt"),
    css_tag_txt(SolrType.text_general, true, true, true, "full css tag with normalized url"),
    css_url_txt(SolrType.text_general, true, true, true, "normalized urls within a css tag"),