@ -185,7 +185,6 @@
<dynamicField name= "*_td" type= "tdouble" indexed= "true" stored= "true" />
<dynamicField name= "*_tdt" type= "tdate" indexed= "true" stored= "true" />
<dynamicField name= "*_pi" type= "pint" indexed= "true" stored= "true" />
<dynamicField name= "*_c" type= "currency" indexed= "true" stored= "true" />
<dynamicField name= "ignored_*" type= "ignored" multiValued= "true" />
@ -348,21 +347,6 @@
<!-- Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldtype name= "binary" class= "solr.BinaryField" />
<!--
Note:
These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
Plain numeric field types that store and index the text
value verbatim (and hence don't correctly support range queries, since the
lexicographic ordering isn't equal to the numeric ordering)
-->
<fieldType name= "pint" class= "solr.IntField" />
<fieldType name= "plong" class= "solr.LongField" />
<fieldType name= "pfloat" class= "solr.FloatField" />
<fieldType name= "pdouble" class= "solr.DoubleField" />
<fieldType name= "pdate" class= "solr.DateField" sortMissingLast= "true" />
<!-- The "RandomSortField" is not used to store or search any
data. You can declare fields of this type it in your schema
to generate pseudo-random orderings of your docs for sorting
@ -411,7 +395,7 @@
<fieldType name= "text_general" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer type= "index" >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "stopwords.txt" enablePositionIncrements= "true" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "stopwords.txt" />
<!-- in this example, we will only use synonyms at query time
<filter class= "solr.SynonymFilterFactory" synonyms= "index_synonyms.txt" ignoreCase= "true" expand= "false" />
-->
@ -419,7 +403,7 @@
</analyzer>
<analyzer type= "query" >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "stopwords.txt" enablePositionIncrements= "true" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "stopwords.txt" />
<filter class= "solr.SynonymFilterFactory" synonyms= "synonyms.txt" ignoreCase= "true" expand= "true" />
<filter class= "solr.LowerCaseFilterFactory" />
</analyzer>
@ -437,13 +421,10 @@
<filter class= "solr.SynonymFilterFactory" synonyms= "index_synonyms.txt" ignoreCase= "true" expand= "false" />
-->
<!-- Case insensitive stop word removal.
add enablePositionIncrements=true in both the index and query
analyzers to leave a 'gap' for more accurate phrase queries.
-->
<filter class= "solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
enablePositionIncrements="true"
/>
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.EnglishPossessiveFilterFactory" />
@ -459,7 +440,6 @@
<filter class= "solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
enablePositionIncrements="true"
/>
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.EnglishPossessiveFilterFactory" />
@ -487,13 +467,10 @@
<filter class= "solr.SynonymFilterFactory" synonyms= "index_synonyms.txt" ignoreCase= "true" expand= "false" />
-->
<!-- Case insensitive stop word removal.
add enablePositionIncrements=true in both the index and query
analyzers to leave a 'gap' for more accurate phrase queries.
-->
<filter class= "solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
enablePositionIncrements="true"
/>
<filter class= "solr.WordDelimiterFilterFactory" generateWordParts= "1" generateNumberParts= "1" catenateWords= "1" catenateNumbers= "1" catenateAll= "0" splitOnCaseChange= "1" />
<filter class= "solr.LowerCaseFilterFactory" />
@ -506,7 +483,6 @@
<filter class= "solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
enablePositionIncrements="true"
/>
<filter class= "solr.WordDelimiterFilterFactory" generateWordParts= "1" generateNumberParts= "1" catenateWords= "0" catenateNumbers= "0" catenateAll= "0" splitOnCaseChange= "1" />
<filter class= "solr.LowerCaseFilterFactory" />
@ -537,7 +513,7 @@
<fieldType name= "text_general_rev" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer type= "index" >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "stopwords.txt" enablePositionIncrements= "true" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "stopwords.txt" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.ReversedWildcardFilterFactory" withOriginal= "true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
@ -545,7 +521,7 @@
<analyzer type= "query" >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.SynonymFilterFactory" synonyms= "synonyms.txt" ignoreCase= "true" expand= "true" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "stopwords.txt" enablePositionIncrements= "true" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "stopwords.txt" />
<filter class= "solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
@ -672,341 +648,6 @@
-->
<fieldType name= "currency" class= "solr.CurrencyField" precisionStep= "8" defaultCurrency= "USD" currencyConfig= "currency.xml" />
<!-- some examples for different languages (generally ordered by ISO code) -->
<!-- Arabic -->
<fieldType name= "text_ar" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<!-- for any non - arabic -->
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_ar.txt" enablePositionIncrements= "true" />
<!-- normalizes ﻯ to ﻱ, etc -->
<filter class= "solr.ArabicNormalizationFilterFactory" />
<filter class= "solr.ArabicStemFilterFactory" />
</analyzer>
</fieldType>
<!-- Bulgarian -->
<fieldType name= "text_bg" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_bg.txt" enablePositionIncrements= "true" />
<filter class= "solr.BulgarianStemFilterFactory" />
</analyzer>
</fieldType>
<!-- Catalan -->
<fieldType name= "text_ca" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<!-- removes l', etc -->
<filter class= "solr.ElisionFilterFactory" ignoreCase= "true" articles= "lang/contractions_ca.txt" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_ca.txt" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Catalan" />
</analyzer>
</fieldType>
<!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
<fieldType name= "text_cjk" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<!-- normalize width before bigram, as e.g. half - width dakuten combine -->
<filter class= "solr.CJKWidthFilterFactory" />
<!-- for any non - CJK -->
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.CJKBigramFilterFactory" />
</analyzer>
</fieldType>
<!-- Czech -->
<fieldType name= "text_cz" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_cz.txt" enablePositionIncrements= "true" />
<filter class= "solr.CzechStemFilterFactory" />
</analyzer>
</fieldType>
<!-- Danish -->
<fieldType name= "text_da" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_da.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Danish" />
</analyzer>
</fieldType>
<!-- German -->
<fieldType name= "text_de" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_de.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.GermanNormalizationFilterFactory" />
<filter class= "solr.GermanLightStemFilterFactory" />
<!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
</analyzer>
</fieldType>
<!-- Greek -->
<fieldType name= "text_el" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<!-- greek specific lowercase for sigma -->
<filter class= "solr.GreekLowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "false" words= "lang/stopwords_el.txt" enablePositionIncrements= "true" />
<filter class= "solr.GreekStemFilterFactory" />
</analyzer>
</fieldType>
<!-- Spanish -->
<fieldType name= "text_es" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_es.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.SpanishLightStemFilterFactory" />
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
</analyzer>
</fieldType>
<!-- Basque -->
<fieldType name= "text_eu" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_eu.txt" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Basque" />
</analyzer>
</fieldType>
<!-- Persian -->
<fieldType name= "text_fa" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<!-- for ZWNJ -->
<charFilter class= "solr.PersianCharFilterFactory" />
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.ArabicNormalizationFilterFactory" />
<filter class= "solr.PersianNormalizationFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_fa.txt" enablePositionIncrements= "true" />
</analyzer>
</fieldType>
<!-- Finnish -->
<fieldType name= "text_fi" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_fi.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Finnish" />
<!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
</analyzer>
</fieldType>
<!-- French -->
<fieldType name= "text_fr" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<!-- removes l', etc -->
<filter class= "solr.ElisionFilterFactory" ignoreCase= "true" articles= "lang/contractions_fr.txt" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_fr.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.FrenchLightStemFilterFactory" />
<!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
</analyzer>
</fieldType>
<!-- Irish -->
<fieldType name= "text_ga" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<!-- removes d', etc -->
<filter class= "solr.ElisionFilterFactory" ignoreCase= "true" articles= "lang/contractions_ga.txt" />
<!-- removes n - , etc. position increments is intentionally false! -->
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/hyphenations_ga.txt" enablePositionIncrements= "false" />
<filter class= "solr.IrishLowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_ga.txt" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Irish" />
</analyzer>
</fieldType>
<!-- Galician -->
<fieldType name= "text_gl" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_gl.txt" enablePositionIncrements= "true" />
<filter class= "solr.GalicianStemFilterFactory" />
<!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
</analyzer>
</fieldType>
<!-- Hindi -->
<fieldType name= "text_hi" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<!-- normalizes unicode representation -->
<filter class= "solr.IndicNormalizationFilterFactory" />
<!-- normalizes variation in spelling -->
<filter class= "solr.HindiNormalizationFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_hi.txt" enablePositionIncrements= "true" />
<filter class= "solr.HindiStemFilterFactory" />
</analyzer>
</fieldType>
<!-- Hungarian -->
<fieldType name= "text_hu" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_hu.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Hungarian" />
<!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
</analyzer>
</fieldType>
<!-- Armenian -->
<fieldType name= "text_hy" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_hy.txt" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Armenian" />
</analyzer>
</fieldType>
<!-- Indonesian -->
<fieldType name= "text_id" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_id.txt" enablePositionIncrements= "true" />
<!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
<filter class= "solr.IndonesianStemFilterFactory" stemDerivational= "true" />
</analyzer>
</fieldType>
<!-- Italian -->
<fieldType name= "text_it" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<!-- removes l', etc -->
<filter class= "solr.ElisionFilterFactory" ignoreCase= "true" articles= "lang/contractions_it.txt" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_it.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.ItalianLightStemFilterFactory" />
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
</analyzer>
</fieldType>
<!-- Latvian -->
<fieldType name= "text_lv" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_lv.txt" enablePositionIncrements= "true" />
<filter class= "solr.LatvianStemFilterFactory" />
</analyzer>
</fieldType>
<!-- Dutch -->
<fieldType name= "text_nl" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_nl.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.StemmerOverrideFilterFactory" dictionary= "lang/stemdict_nl.txt" ignoreCase= "false" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Dutch" />
</analyzer>
</fieldType>
<!-- Norwegian -->
<fieldType name= "text_no" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_no.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Norwegian" />
<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
</analyzer>
</fieldType>
<!-- Portuguese -->
<fieldType name= "text_pt" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_pt.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.PortugueseLightStemFilterFactory" />
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
</analyzer>
</fieldType>
<!-- Romanian -->
<fieldType name= "text_ro" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_ro.txt" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Romanian" />
</analyzer>
</fieldType>
<!-- Russian -->
<fieldType name= "text_ru" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_ru.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Russian" />
<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
</analyzer>
</fieldType>
<!-- Swedish -->
<fieldType name= "text_sv" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_sv.txt" format= "snowball" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Swedish" />
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
</analyzer>
</fieldType>
<!-- Thai -->
<fieldType name= "text_th" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.LowerCaseFilterFactory" />
<filter class= "solr.ThaiWordFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "true" words= "lang/stopwords_th.txt" enablePositionIncrements= "true" />
</analyzer>
</fieldType>
<!-- Turkish -->
<fieldType name= "text_tr" class= "solr.TextField" positionIncrementGap= "100" >
<analyzer >
<tokenizer class= "solr.StandardTokenizerFactory" />
<filter class= "solr.TurkishLowerCaseFilterFactory" />
<filter class= "solr.StopFilterFactory" ignoreCase= "false" words= "lang/stopwords_tr.txt" enablePositionIncrements= "true" />
<filter class= "solr.SnowballPorterFilterFactory" language= "Turkish" />
</analyzer>
</fieldType>
</types>