fixes for solr 8.8.1 migration

- replace new guava 30 with older 25 because that is the correct
dependency for solr 8.8.1. The newer one did actually not work!
- index will be crated in a DATA/INDEX/freeworld/SEGMENTS/solr_8_8_1
subfolder. The older solr_6_6 index is not touched but also not
migrated. The index starts with fresh (empty) content.
- Older indexes must be migrated by hand (export/import) so far until a
better solution is found.
- Large schema adoptions for lucene 8.8.1
pull/404/head
Michael Peter Christen 4 years ago
parent 3befaaf4f1
commit 8b4394a6c5

@ -18,7 +18,6 @@
<classpathentry kind="lib" path="lib/common-io-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/common-lang-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.14.jar"/>
<classpathentry kind="lib" path="lib/commons-collections-3.2.2.jar"/>
<classpathentry kind="lib" path="lib/commons-compress-1.20.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.4.jar"/>
<classpathentry kind="lib" path="lib/commons-io-2.7.jar"/>
@ -27,7 +26,6 @@
<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
<classpathentry kind="lib" path="lib/commons-math3-3.4.1.jar"/>
<classpathentry kind="lib" path="lib/fontbox-2.0.15.jar"/>
<classpathentry kind="lib" path="lib/guava-30.1-jre.jar"/>
<classpathentry kind="lib" path="lib/http2-client-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-common-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-http-client-transport-9.4.34.v20201102.jar"/>
@ -109,6 +107,8 @@
<classpathentry kind="lib" path="libt/hamcrest-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-core-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-library-2.2.jar"/>
<classpathentry kind="lib" path="lib/commons-collections4-4.4.jar"/>
<classpathentry kind="lib" path="lib/guava-25.1-jre.jar"/>
<classpathentry kind="src" path="htroot/api/blacklists"/>
<classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/>

@ -184,7 +184,7 @@
<pathelement location="${lib}/common-io-3.3.2.jar" />
<pathelement location="${lib}/common-lang-3.3.2.jar" />
<pathelement location="${lib}/commons-codec-1.14.jar" />
<pathelement location="${lib}/commons-collections-3.2.2.jar" />
<pathelement location="${lib}/commons-collections4-4.4.jar" />
<pathelement location="${lib}/commons-compress-1.20.jar" />
<pathelement location="${lib}/commons-fileupload-1.4.jar" />
<pathelement location="${lib}/commons-io-2.7.jar" />
@ -193,7 +193,7 @@
<pathelement location="${lib}/commons-logging-1.2.jar" />
<pathelement location="${lib}/commons-math3-3.4.1.jar" />
<pathelement location="${lib}/fontbox-2.0.15.jar" />
<pathelement location="${lib}/guava-30.1-jre.jar" />
<pathelement location="${lib}/guava-25.1-jre.jar" />
<pathelement location="${lib}/http2-client-9.4.34.v20201102.jar" />
<pathelement location="${lib}/http2-common-9.4.34.v20201102.jar" />
<pathelement location="${lib}/http2-http-client-transport-9.4.34.v20201102.jar" />

@ -4,16 +4,16 @@
<types>
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true" docValues="true" />
<fieldType name="boolean" class="solr.BoolField" omitNorms="true" sortMissingLast="true" />
<fieldType name="int" class="solr.TrieIntField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="double" class="solr.TrieDoubleField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
<fieldType name="int" class="solr.IntPointField" docValues="true" omitNorms="true" />
<fieldType name="tint" class="solr.IntPointField" docValues="true" />
<fieldType name="long" class="solr.LongPointField" docValues="true" />
<fieldType name="tlong" class="solr.LongPointField" docValues="true" />
<fieldType name="double" class="solr.DoublePointField" docValues="true" omitNorms="true" />
<fieldType name="tdouble" class="solr.DoublePointField" docValues="true" omitNorms="true" />
<fieldType name="date" class="solr.DatePointField" docValues="true" omitNorms="true" />
<fieldType name="float" class="solr.FloatPointField" docValues="true" />
<fieldType name="tfloat" class="solr.FloatPointField" docValues="true" />
<fieldType name="location" class="solr.LatLonPointSpatialField" />
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
@ -22,7 +22,7 @@
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
@ -36,7 +36,7 @@
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
@ -44,9 +44,9 @@
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/><filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>

@ -35,7 +35,7 @@
that you fully re-index after changing this setting as it can
affect both how text is indexed and queried.
-->
<luceneMatchVersion>6.6.0</luceneMatchVersion>
<luceneMatchVersion>8.8.1</luceneMatchVersion>
<!-- <lib/> directives can be used to instruct Solr to load any Jars
identified and use them to resolve any "plugins" specified in

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -90,8 +90,8 @@ import org.apache.solr.schema.IndexSchema;
public final class Fulltext {
private static final String SOLR_PATH = "solr_6_6"; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml
private static final String SOLR_OLD_PATH[] = new String[]{"solr_36", "solr_40", "solr_44", "solr_45", "solr_46", "solr_47", "solr_4_9", "solr_4_10", "solr_5_2", "solr_5_5"};
private static final String SOLR_PATH = "solr_8_8_1"; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml
private static final String SOLR_OLD_PATH[] = new String[]{"solr_36", "solr_40", "solr_44", "solr_45", "solr_46", "solr_47", "solr_4_9", "solr_4_10", "solr_5_2", "solr_5_5", "solr_6_6"};
// class objects
private final File segmentPath;
@ -140,7 +140,9 @@ public final class Fulltext {
public void connectLocalSolr() throws IOException {
File solrLocation = new File(this.segmentPath, SOLR_PATH);
// migrate old solr to new
/*
for (String oldVersion: SOLR_OLD_PATH) {
File oldLocation = new File(this.segmentPath, oldVersion);
if (oldLocation.exists()) {
@ -150,14 +152,16 @@ public final class Fulltext {
}
}
}
*/
EmbeddedInstance localCollectionInstance = new EmbeddedInstance(new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr"), solrLocation, CollectionSchema.CORE_NAME, new String[]{CollectionSchema.CORE_NAME, WebgraphSchema.CORE_NAME});
SolrConfig config = localCollectionInstance.getDefaultCore().getSolrConfig();
String versionValue = config.getVal(IndexSchema.LUCENE_MATCH_VERSION_PARAM, true);
Version luceneVersion = SolrConfig.parseLuceneVersionString(versionValue);
String lvn = luceneVersion.major + "_" + luceneVersion.minor;
ConcurrentLog.info("Fulltext", "using lucene version " + lvn);
String lvn = luceneVersion.major + "_" + luceneVersion.minor + "_" + luceneVersion.bugfix;
assert SOLR_PATH.endsWith(lvn) : "luceneVersion = " + lvn + ", solrPath = " + SOLR_PATH + ", check defaults/solr/solrconfig.xml";
ConcurrentLog.info("Fulltext", "using lucene version " + lvn);
ConcurrentLog.info("Fulltext", "connected solr in " + solrLocation.toString() + ", lucene version " + lvn);
this.solrInstances.connectEmbedded(localCollectionInstance);
}
@ -493,7 +497,6 @@ public final class Fulltext {
}
}
/**
* remove a full subpath from the index
* @param basepath the left path of the url; at least until the end of the host
@ -772,7 +775,6 @@ public final class Fulltext {
}
}
String s = new File(path, yacy_dump_prefix +
"f" + GenericFormatter.SHORT_MINUTE_FORMATTER.format(firstdate) + "_" +
"l" + GenericFormatter.SHORT_MINUTE_FORMATTER.format(lastdate) + "_" +

Loading…
Cancel
Save