added automatic generation of a solr schema.xml file

pull/1/head
Michael Peter Christen 13 years ago
parent 987b412491
commit 7053f8ab46

@ -0,0 +1,77 @@
/**
* schema_p
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 13.01.2012 at http://yacy.net
*
* $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $
* $LastChangedRevision: 7653 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.File;
import java.util.Iterator;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.solr.SolrScheme;
import net.yacy.cora.services.federated.solr.SolrScheme.Field;
import net.yacy.cora.storage.ConfigurationSet;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class schema_p {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
// write scheme
SolrScheme scheme = (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() == null) ? null : sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().getScheme();
final String schemename = sb.getConfig("federated.service.solr.indexing.schemefile", "solr.keys.default.list");
if (scheme == null) {
scheme = new SolrScheme(new File(env.getDataPath(), "DATA/SETTINGS/" + schemename));
}
final Iterator<ConfigurationSet.Entry> i = scheme.allIterator();
int c = 0;
ConfigurationSet.Entry entry;
Field field = null;
while (i.hasNext()) {
entry = i.next();
if (!entry.enabled()) continue; //scheme.contains(entry.key())
try {
field = Field.valueOf(entry.key());
} catch (IllegalArgumentException e) {
continue;
}
prop.put("fields_" + c + "_name", field.name());
prop.put("fields_" + c + "_type", field.getType().printName());
prop.put("fields_" + c + "_comment", scheme.commentHeadline(entry.key()));
prop.put("fields_" + c + "_indexedChecked", field.isIndexed() ? 1 : 0);
prop.put("fields_" + c + "_storedChecked", field.isStored() ? 1 : 0);
prop.put("fields_" + c + "_multiValuedChecked", field.isMultiValued() ? 1 : 0);
prop.put("fields_" + c + "_omitNormsChecked", field.isOmitNorms() ? 1 : 0);
c++;
}
prop.put("fields", c);
// return rewrite properties
return prop;
}
}

@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="YaCy2Solr" version="1.3">
<types>
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true" />
<fieldType name="boolean" class="solr.BoolField" omitNorms="true" sortMissingLast="true" />
<fieldType name="int" class="solr.TrieIntField" omitNorms="true" precisionStep="0" positionIncrementGap="0" />
<fieldType name="double" class="solr.TrieDoubleField" omitNorms="true" precisionStep="0" positionIncrementGap="0" />
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true" precisionStep="8" positionIncrementGap="0" />
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0" />
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/><filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
</types>
<fields>
#{fields}#
<!-- #[comment]# -->
<field name="#[name]#" type="#[type]#"#(indexedChecked)#:: indexed="true"#(/indexedChecked)##(storedChecked)#:: stored="true"#(/storedChecked)##(multiValuedChecked)#:: multiValued="true"#(/multiValuedChecked)##(omitNormsChecked)#:: omitNorms="true"#(/omitNormsChecked)#/>
#{/fields}#
</fields>
<uniqueKey>id</uniqueKey>
<defaultSearchField>description</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>
</schema>
Loading…
Cancel
Save