fixes for solr 8.8.1 migration

- replace new guava 30 with older 25 because that is the correct
dependency for solr 8.8.1. The newer one did actually not work!
- index will be crated in a DATA/INDEX/freeworld/SEGMENTS/solr_8_8_1
subfolder. The older solr_6_6 index is not touched but also not
migrated. The index starts with fresh (empty) content.
- Older indexes must be migrated by hand (export/import) so far until a
better solution is found.
- Large schema adoptions for lucene 8.8.1
pull/404/head
Michael Peter Christen 4 years ago
parent 3befaaf4f1
commit 8b4394a6c5

@ -18,7 +18,6 @@
<classpathentry kind="lib" path="lib/common-io-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/common-lang-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.14.jar"/>
<classpathentry kind="lib" path="lib/commons-collections-3.2.2.jar"/>
<classpathentry kind="lib" path="lib/commons-compress-1.20.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.4.jar"/>
<classpathentry kind="lib" path="lib/commons-io-2.7.jar"/>
@ -27,7 +26,6 @@
<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
<classpathentry kind="lib" path="lib/commons-math3-3.4.1.jar"/>
<classpathentry kind="lib" path="lib/fontbox-2.0.15.jar"/>
<classpathentry kind="lib" path="lib/guava-30.1-jre.jar"/>
<classpathentry kind="lib" path="lib/http2-client-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-common-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-http-client-transport-9.4.34.v20201102.jar"/>
@ -109,6 +107,8 @@
<classpathentry kind="lib" path="libt/hamcrest-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-core-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-library-2.2.jar"/>
<classpathentry kind="lib" path="lib/commons-collections4-4.4.jar"/>
<classpathentry kind="lib" path="lib/guava-25.1-jre.jar"/>
<classpathentry kind="src" path="htroot/api/blacklists"/>
<classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/>

@ -184,7 +184,7 @@
<pathelement location="${lib}/common-io-3.3.2.jar" />
<pathelement location="${lib}/common-lang-3.3.2.jar" />
<pathelement location="${lib}/commons-codec-1.14.jar" />
<pathelement location="${lib}/commons-collections-3.2.2.jar" />
<pathelement location="${lib}/commons-collections4-4.4.jar" />
<pathelement location="${lib}/commons-compress-1.20.jar" />
<pathelement location="${lib}/commons-fileupload-1.4.jar" />
<pathelement location="${lib}/commons-io-2.7.jar" />
@ -193,7 +193,7 @@
<pathelement location="${lib}/commons-logging-1.2.jar" />
<pathelement location="${lib}/commons-math3-3.4.1.jar" />
<pathelement location="${lib}/fontbox-2.0.15.jar" />
<pathelement location="${lib}/guava-30.1-jre.jar" />
<pathelement location="${lib}/guava-25.1-jre.jar" />
<pathelement location="${lib}/http2-client-9.4.34.v20201102.jar" />
<pathelement location="${lib}/http2-common-9.4.34.v20201102.jar" />
<pathelement location="${lib}/http2-http-client-transport-9.4.34.v20201102.jar" />

@ -2,18 +2,18 @@
<schema name="YaCy2Solr" version="1.4">
<types>
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true" docValues="true" />
<fieldType name="boolean" class="solr.BoolField" omitNorms="true" sortMissingLast="true" />
<fieldType name="int" class="solr.TrieIntField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="double" class="solr.TrieDoubleField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true" docValues="true" />
<fieldType name="boolean" class="solr.BoolField" omitNorms="true" sortMissingLast="true" />
<fieldType name="int" class="solr.IntPointField" docValues="true" omitNorms="true" />
<fieldType name="tint" class="solr.IntPointField" docValues="true" />
<fieldType name="long" class="solr.LongPointField" docValues="true" />
<fieldType name="tlong" class="solr.LongPointField" docValues="true" />
<fieldType name="double" class="solr.DoublePointField" docValues="true" omitNorms="true" />
<fieldType name="tdouble" class="solr.DoublePointField" docValues="true" omitNorms="true" />
<fieldType name="date" class="solr.DatePointField" docValues="true" omitNorms="true" />
<fieldType name="float" class="solr.FloatPointField" docValues="true" />
<fieldType name="tfloat" class="solr.FloatPointField" docValues="true" />
<fieldType name="location" class="solr.LatLonPointSpatialField" />
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
@ -22,7 +22,7 @@
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
@ -36,7 +36,7 @@
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
@ -44,9 +44,9 @@
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/><filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>

@ -35,7 +35,7 @@
that you fully re-index after changing this setting as it can
affect both how text is indexed and queried.
-->
<luceneMatchVersion>6.6.0</luceneMatchVersion>
<luceneMatchVersion>8.8.1</luceneMatchVersion>
<!-- <lib/> directives can be used to instruct Solr to load any Jars
identified and use them to resolve any "plugins" specified in

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -39,7 +39,7 @@ import org.apache.solr.core.SolrCore;
import com.google.common.io.Files;
public class EmbeddedInstance implements SolrInstance {
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
// additional a optional solrcore.properties (or solrcore.x86.properties for 32bit systems is copied
private CoreContainer coreContainer;
@ -54,16 +54,16 @@ public class EmbeddedInstance implements SolrInstance {
super();
// copy the solrconfig.xml to the storage path
this.containerPath = containerPath;
// ensure that default core path exists
File defaultCorePath = new File(containerPath, givenDefaultCoreName);
if (!defaultCorePath.exists()) defaultCorePath.mkdirs();
// migrate old conf directory
File oldConf = new File(containerPath, "conf");
File confDir = new File(defaultCorePath, "conf");
if (oldConf.exists()) oldConf.renameTo(confDir);
// migrate old data directory
File oldData = new File(containerPath, "data");
File dataDir = new File(defaultCorePath, "data");
@ -72,12 +72,12 @@ public class EmbeddedInstance implements SolrInstance {
// create index subdirectory in data if it does not exist
File indexDir = new File(dataDir, "index");
if (!indexDir.exists()) indexDir.mkdirs();
// initialize the cores' configuration
for (String coreName: initializeCoreNames) {
initializeCoreConf(solr_config, containerPath, coreName);
}
// initialize the coreContainer
File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores
this.coreContainer = CoreContainer.createAndLoad(containerPath.toPath(), configFile.toPath()); // this may take indefinitely long if solr files are broken
@ -104,12 +104,12 @@ public class EmbeddedInstance implements SolrInstance {
public int hashCode() {
return this.containerPath.hashCode();
}
@Override
public boolean equals(Object o) {
return o instanceof EmbeddedInstance && this.containerPath.equals(((EmbeddedInstance) o).containerPath);
}
private static void initializeCoreConf(final File solr_config, final File containerPath, String coreName) {
// ensure that default core path exists
@ -121,7 +121,7 @@ public class EmbeddedInstance implements SolrInstance {
if (!core_properties.exists()) {
// create the file
try (
/* Automatically closed by this try-with-resources statement */
/* Automatically closed by this try-with-resources statement */
FileOutputStream fos = new FileOutputStream(core_properties);
) {
fos.write(ASCII.getBytes("name=" + coreName + "\n"));
@ -140,7 +140,7 @@ public class EmbeddedInstance implements SolrInstance {
conf.mkdirs();
File data = new File(corePath, "data");
data.mkdirs();
// (over-)write configuration into conf path
File source, target;
for (String cf: confFiles) {
@ -158,14 +158,14 @@ public class EmbeddedInstance implements SolrInstance {
} else {
target = new File(conf, cf);
target.getParentFile().mkdirs();
try {
try {
Files.copy(source, target);
} catch (final IOException e) {
e.printStackTrace();
}
}
}
// copy the solrcore.properties
// for 32bit systems (os.arch name not containing '64') take the solrcore.x86.properties as solrcore.properties if exists
String os = System.getProperty("os.arch");
@ -188,17 +188,17 @@ public class EmbeddedInstance implements SolrInstance {
ex.printStackTrace();
}
}
}
public File getContainerPath() {
return this.containerPath;
}
public CoreContainer getCoreContainer() {
return this.coreContainer;
}
@Override
public String getDefaultCoreName() {
return this.defaultCoreName;
@ -234,7 +234,7 @@ public class EmbeddedInstance implements SolrInstance {
this.cores.put(name, c);
return c;
}
@Override
protected void finalize() throws Throwable {
this.close();
@ -248,5 +248,5 @@ public class EmbeddedInstance implements SolrInstance {
this.coreContainer = null;
} catch (final Throwable e) {ConcurrentLog.logException(e);}
}
}

@ -90,18 +90,18 @@ import org.apache.solr.schema.IndexSchema;
public final class Fulltext {
private static final String SOLR_PATH = "solr_6_6"; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml
private static final String SOLR_OLD_PATH[] = new String[]{"solr_36", "solr_40", "solr_44", "solr_45", "solr_46", "solr_47", "solr_4_9", "solr_4_10", "solr_5_2", "solr_5_5"};
private static final String SOLR_PATH = "solr_8_8_1"; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml
private static final String SOLR_OLD_PATH[] = new String[]{"solr_36", "solr_40", "solr_44", "solr_45", "solr_46", "solr_47", "solr_4_9", "solr_4_10", "solr_5_2", "solr_5_5", "solr_6_6"};
// class objects
private final File segmentPath;
private final File archivePath;
private Export exportthread; // will have a export thread assigned if exporter is running
private InstanceMirror solrInstances;
/** Synchronization lock for solrInstances property */
private ReentrantLock solrInstancesLock;
private final CollectionConfiguration collectionConfiguration;
private final WebgraphConfiguration webgraphConfiguration;
private boolean writeWebgraph;
@ -117,11 +117,11 @@ public final class Fulltext {
this.webgraphConfiguration = webgraphConfiguration;
this.writeWebgraph = false;
}
public void setUseWebgraph(boolean check) {
this.writeWebgraph = check;
}
public boolean useWebgraph() {
return this.writeWebgraph;
}
@ -140,24 +140,28 @@ public final class Fulltext {
public void connectLocalSolr() throws IOException {
File solrLocation = new File(this.segmentPath, SOLR_PATH);
// migrate old solr to new
/*
for (String oldVersion: SOLR_OLD_PATH) {
File oldLocation = new File(this.segmentPath, oldVersion);
if (oldLocation.exists()) {
if(!oldLocation.renameTo(solrLocation)) {
ConcurrentLog.severe("Fulltext", "Failed renaming old Solr location ("
+ oldLocation.getAbsolutePath() + ") to new location : " + solrLocation.getAbsolutePath());
}
if(!oldLocation.renameTo(solrLocation)) {
ConcurrentLog.severe("Fulltext", "Failed renaming old Solr location ("
+ oldLocation.getAbsolutePath() + ") to new location : " + solrLocation.getAbsolutePath());
}
}
}
*/
EmbeddedInstance localCollectionInstance = new EmbeddedInstance(new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr"), solrLocation, CollectionSchema.CORE_NAME, new String[]{CollectionSchema.CORE_NAME, WebgraphSchema.CORE_NAME});
SolrConfig config = localCollectionInstance.getDefaultCore().getSolrConfig();
String versionValue = config.getVal(IndexSchema.LUCENE_MATCH_VERSION_PARAM, true);
Version luceneVersion = SolrConfig.parseLuceneVersionString(versionValue);
String lvn = luceneVersion.major + "_" + luceneVersion.minor;
ConcurrentLog.info("Fulltext", "using lucene version " + lvn);
String lvn = luceneVersion.major + "_" + luceneVersion.minor + "_" + luceneVersion.bugfix;
assert SOLR_PATH.endsWith(lvn) : "luceneVersion = " + lvn + ", solrPath = " + SOLR_PATH + ", check defaults/solr/solrconfig.xml";
ConcurrentLog.info("Fulltext", "using lucene version " + lvn);
ConcurrentLog.info("Fulltext", "connected solr in " + solrLocation.toString() + ", lucene version " + lvn);
this.solrInstances.connectEmbedded(localCollectionInstance);
}
@ -194,64 +198,64 @@ public final class Fulltext {
public RemoteSolrConnector getDefaultRemoteSolrConnector() {
try {
boolean useBinaryResponseWriter = SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT;
if (Switchboard.getSwitchboard() != null) {
useBinaryResponseWriter = Switchboard.getSwitchboard().getConfigBool(
SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED,
SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT);
}
boolean useBinaryResponseWriter = SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT;
if (Switchboard.getSwitchboard() != null) {
useBinaryResponseWriter = Switchboard.getSwitchboard().getConfigBool(
SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED,
SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT);
}
return this.solrInstances.getDefaultRemoteConnector(useBinaryResponseWriter);
} catch (IOException e) {
return null;
}
}
public EmbeddedInstance getEmbeddedInstance() {
this.solrInstancesLock.lock();
this.solrInstancesLock.lock();
try {
if (this.solrInstances.isConnectedEmbedded()) {
return this.solrInstances.getEmbedded();
return this.solrInstances.getEmbedded();
}
return null;
} finally {
this.solrInstancesLock.unlock();
this.solrInstancesLock.unlock();
}
}
public SolrConnector getDefaultConnector() {
this.solrInstancesLock.lock();
this.solrInstancesLock.lock();
try {
return this.solrInstances.getDefaultMirrorConnector();
} finally {
this.solrInstancesLock.unlock();
this.solrInstancesLock.unlock();
}
}
public SolrConnector getWebgraphConnector() {
if (!this.writeWebgraph) {
return null;
return null;
}
this.solrInstancesLock.lock();
this.solrInstancesLock.lock();
try {
return this.solrInstances.getGenericMirrorConnector(WebgraphSchema.CORE_NAME);
} finally {
this.solrInstancesLock.unlock();
this.solrInstancesLock.unlock();
}
}
public int bufferSize() {
return this.solrInstances.bufferSize();
}
public void clearCaches() {
this.solrInstances.clearCaches();
}
public void clearLocalSolr() throws IOException {
if (this.exportthread != null) {
this.exportthread.interrupt();
this.exportthread.interrupt();
}
this.solrInstancesLock.lock();
this.solrInstancesLock.lock();
try {
EmbeddedInstance instance = this.solrInstances.getEmbedded();
if (instance != null) {
@ -262,12 +266,12 @@ public final class Fulltext {
}
this.solrInstances.clearCaches();
} finally {
this.solrInstancesLock.unlock();
this.solrInstancesLock.unlock();
}
}
public void clearRemoteSolr() throws IOException {
this.solrInstancesLock.lock();
this.solrInstancesLock.lock();
try {
ShardInstance instance = this.solrInstances.getRemote();
if (instance != null) {
@ -277,7 +281,7 @@ public final class Fulltext {
}
this.solrInstances.clearCaches();
} finally {
this.solrInstancesLock.unlock();
this.solrInstancesLock.unlock();
}
}
@ -297,7 +301,7 @@ public final class Fulltext {
this.collectionSizeLastValue = size;
return size;
}
/**
* @return the size of the webgraph index
*/
@ -309,10 +313,10 @@ public final class Fulltext {
try {
this.solrInstances.close();
} catch (Throwable e) {
ConcurrentLog.logException(e);
ConcurrentLog.logException(e);
}
}
private long lastCommit = 0;
public void commit(boolean softCommit) {
long t = System.currentTimeMillis();
@ -346,15 +350,15 @@ public final class Fulltext {
if (urlHash == null) return null;
return getMetadata(urlHash, null, 0L);
}
private URIMetadataNode getMetadata(final byte[] urlHash, final WordReferenceVars wre, final long score) {
String u = ASCII.String(urlHash);
// get the metadata from Solr
try {
SolrDocument doc = this.getDefaultConnector().getDocumentById(u);
if (doc != null) {
return new URIMetadataNode(doc, wre, score);
return new URIMetadataNode(doc, wre, score);
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
@ -411,7 +415,7 @@ public final class Fulltext {
Collection<Object> collections = doc.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName());
// collection dht is used to identify metadata from full crawled documents (if "dht" exists don't overwrite rich crawldata with metadata
if (!collections.contains("dht")) return;
// passed all checks, overwrite document
putDocument(getDefaultConfiguration().metadata2solr(entry));
}
@ -451,7 +455,7 @@ public final class Fulltext {
(freshdate == null || freshdate.after(now)) ? null :
(WebgraphSchema.load_date_dt.getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(freshdate) + "]"));
}
/**
* delete all documents within a domain that are registered as error document
* @param hosthashes
@ -459,7 +463,7 @@ public final class Fulltext {
public void deleteDomainErrors(final Set<String> hosthashes) {
deleteDomainWithConstraint(this.getDefaultConnector(), CollectionSchema.host_id_s.getSolrFieldName(), hosthashes, CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
}
private static void deleteDomainWithConstraint(SolrConnector connector, String fieldname, final Set<String> hosthashes, String constraintQuery) {
if (hosthashes == null || hosthashes.size() == 0) return;
int subsetscount = 1 + (hosthashes.size() / 255); // if the list is too large, we get a "too many boolean clauses" exception
@ -492,8 +496,7 @@ public final class Fulltext {
} catch (final IOException e) {
}
}
/**
* remove a full subpath from the index
* @param basepath the left path of the url; at least until the end of the host
@ -522,7 +525,7 @@ public final class Fulltext {
} catch (final InterruptedException e) {}
return count.get();
}
/**
* remove a list of id's from the index (matching fulltext.id and webgraph.source_id_s
* @param deleteIDs a list of urlhashes; each denoting a document
@ -533,7 +536,7 @@ public final class Fulltext {
this.getDefaultConnector().deleteByIds(deleteIDs);
if (this.writeWebgraph) { // Webgraph.id is combination of sourceHash+targetHash+hexCounter, to be successful use source_id_s and/or target_id_s
for (String id : deleteIDs) {
/* Add quotes around the url hash to prevent Solr logging a ParseException stack trace when the hash start with a '-' character */
/* Add quotes around the url hash to prevent Solr logging a ParseException stack trace when the hash start with a '-' character */
this.getWebgraphConnector().deleteByQuery(WebgraphSchema.source_id_s.name() + ":\"" + id + "\"");
}
}
@ -554,7 +557,7 @@ public final class Fulltext {
String id = ASCII.String(urlHash);
this.getDefaultConnector().deleteById(id);
if (this.writeWebgraph) { // Webgraph.id is combination of sourceHash+targetHash+hexCounter, to be successful use source_id_s and/or target_id_s
/* Add quotes around the url hash to prevent Solr logging a ParseException stack trace when the hash start with a '-' character */
/* Add quotes around the url hash to prevent Solr logging a ParseException stack trace when the hash start with a '-' character */
this.getWebgraphConnector().deleteByQuery(WebgraphSchema.source_id_s + ":\"" + id + "\"");
}
} catch (final Throwable e) {
@ -565,12 +568,12 @@ public final class Fulltext {
public DigestURL getURL(final String urlHash) throws IOException {
if (urlHash == null || this.getDefaultConnector() == null) return null;
SolrConnector.LoadTimeURL md = this.getDefaultConnector().getLoadTimeURL(urlHash);
if (md == null) return null;
return new DigestURL(md.url, ASCII.getBytes(urlHash));
}
/**
* get the load time of a resource.
* @param urlHash
@ -582,7 +585,7 @@ public final class Fulltext {
if (md == null) return -1l;
return md.date;
}
public List<File> dumpFiles() {
EmbeddedInstance esc = this.solrInstances.getEmbedded();
ArrayList<File> zips = new ArrayList<File>();
@ -604,7 +607,7 @@ public final class Fulltext {
}
return zips;
}
/**
* Create a dump file from the current embedded solr directory
* @return file reference to the dump
@ -613,11 +616,11 @@ public final class Fulltext {
public File dumpEmbeddedSolr() throws SolrException {
final EmbeddedInstance esc = this.solrInstances.getEmbedded();
if(esc == null) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "No embedded Solr available.");
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "No embedded Solr available.");
}
final File storagePath = esc.getContainerPath();
final File zipOut = new File(this.archivePath, storagePath.getName() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip");
this.solrInstancesLock.lock();
this.solrInstancesLock.lock();
try {
this.disconnectLocalSolr();
try {
@ -632,11 +635,11 @@ public final class Fulltext {
}
}
} finally {
this.solrInstancesLock.unlock();
this.solrInstancesLock.unlock();
}
return zipOut;
}
/**
* Restore a solr dump to the current embedded solr directory
* @param solrDumpZipFile the dump file to use
@ -645,10 +648,10 @@ public final class Fulltext {
public void restoreEmbeddedSolr(final File solrDumpZipFile) {
final EmbeddedInstance esc = this.solrInstances.getEmbedded();
if(esc == null) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "No embedded Solr available.");
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "No embedded Solr available.");
}
final File storagePath = esc.getContainerPath();
this.solrInstancesLock.lock();
this.solrInstancesLock.lock();
try {
// this.disconnectLocalSolr(); // moved to (InstanceMirror) sorlInstances.close()
this.solrInstances.close();
@ -665,7 +668,7 @@ public final class Fulltext {
}
}
} finally {
this.solrInstancesLock.unlock();
this.solrInstancesLock.unlock();
}
}
@ -678,13 +681,13 @@ public final class Fulltext {
getDefaultConnector().optimize(size);
if (this.writeWebgraph) getWebgraphConnector().optimize(size);
}
/**
* Reboot the local embedded Solr (experimental to check resource management).
* Please check before that the local embedded Solr is enabled and no external remote Solr is attached.
*/
public void rebootEmbeddedLocalSolr() {
this.solrInstancesLock.lock();
this.solrInstancesLock.lock();
try {
this.disconnectLocalSolr();
// this.solrInstances.close(); // moved to (InstanceMirror) sorlInstances.close()
@ -695,7 +698,7 @@ public final class Fulltext {
ConcurrentLog.logException(e);
}
} finally {
this.solrInstancesLock.unlock();
this.solrInstancesLock.unlock();
}
}
@ -705,10 +708,10 @@ public final class Fulltext {
private ExportFormat(String ext) {this.ext = ext;}
public String getExt() {return this.ext;}
}
public final static String yacy_dump_prefix = "yacy_dump_";
public Export export(Fulltext.ExportFormat format, String filter, String query, final int maxseconds, File path, boolean dom, boolean text) throws IOException {
// modify query according to maxseconds
long now = System.currentTimeMillis();
if (maxseconds > 0) {
@ -720,7 +723,7 @@ public final class Fulltext {
} else {
query = query == null? AbstractSolrConnector.CATCHALL_QUERY : query;
}
// check the oldest and latest entry in the index for this query
SolrDocumentList firstdoclist, lastdoclist;
Object firstdateobject, lastdateobject;
@ -732,61 +735,60 @@ public final class Fulltext {
final long doccount;
final Date firstdate, lastdate;
if (firstdoclist.size() == 0 || lastdoclist.size() == 0) {
/* Now check again the number of documents without sorting, for compatibility with old fields indexed without DocValues fields (prior to YaCy 1.90)
* When the local Solr index contains such old documents, requests with sort query return nothing and trace in logs
* "java.lang.IllegalStateException: unexpected docvalues type NONE for field..." */
doccount = this.getDefaultConnector().getCountByQuery(query);
if(doccount == 0) {
/* Finally no document to export was found */
throw new IOException("number of exported documents == 0");
}
/* we use default date values just to generate a proper dump file path */
firstdate = new Date(0);
lastdate = new Date(0);
/* Now check again the number of documents without sorting, for compatibility with old fields indexed without DocValues fields (prior to YaCy 1.90)
* When the local Solr index contains such old documents, requests with sort query return nothing and trace in logs
* "java.lang.IllegalStateException: unexpected docvalues type NONE for field..." */
doccount = this.getDefaultConnector().getCountByQuery(query);
if(doccount == 0) {
/* Finally no document to export was found */
throw new IOException("number of exported documents == 0");
}
/* we use default date values just to generate a proper dump file path */
firstdate = new Date(0);
lastdate = new Date(0);
} else {
doccount = firstdoclist.getNumFound();
// create the export name
SolrDocument firstdoc = firstdoclist.get(0);
SolrDocument lastdoc = lastdoclist.get(0);
firstdateobject = firstdoc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
lastdateobject = lastdoc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
/* When firstdate or lastdate is null, we use a default one just to generate a proper dump file path
* This should not happen because load_date_dt field is mandatory in the main Solr schema,
* but for some reason some documents might end up here with an empty load_date_dt field value */
doccount = firstdoclist.getNumFound();
// create the export name
SolrDocument firstdoc = firstdoclist.get(0);
SolrDocument lastdoc = lastdoclist.get(0);
firstdateobject = firstdoc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
lastdateobject = lastdoc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
/* When firstdate or lastdate is null, we use a default one just to generate a proper dump file path
* This should not happen because load_date_dt field is mandatory in the main Solr schema,
* but for some reason some documents might end up here with an empty load_date_dt field value */
if(firstdateobject instanceof Date) {
firstdate = (Date) firstdateobject;
firstdate = (Date) firstdateobject;
} else {
ConcurrentLog.warn("Fulltext", "The required field " + CollectionSchema.load_date_dt.getSolrFieldName() + " is empty on document with id : "
+ firstdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
firstdate = new Date(0);
ConcurrentLog.warn("Fulltext", "The required field " + CollectionSchema.load_date_dt.getSolrFieldName() + " is empty on document with id : "
+ firstdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
firstdate = new Date(0);
}
if(lastdateobject instanceof Date) {
lastdate = (Date) lastdateobject;
lastdate = (Date) lastdateobject;
} else {
ConcurrentLog.warn("Fulltext", "The required field " + CollectionSchema.load_date_dt.getSolrFieldName() + " is empty on document with id : "
+ lastdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
lastdate = new Date(0);
ConcurrentLog.warn("Fulltext", "The required field " + CollectionSchema.load_date_dt.getSolrFieldName() + " is empty on document with id : "
+ lastdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
lastdate = new Date(0);
}
}
String s = new File(path, yacy_dump_prefix +
"f" + GenericFormatter.SHORT_MINUTE_FORMATTER.format(firstdate) + "_" +
"l" + GenericFormatter.SHORT_MINUTE_FORMATTER.format(lastdate) + "_" +
"n" + GenericFormatter.SHORT_MINUTE_FORMATTER.format(new Date(now)) + "_" +
"c" + String.format("%1$012d", doccount)).getAbsolutePath() + "_tc"; // the name ends with the transaction token ('c' = 'created')
// create export file name
if (s.indexOf('.',0) < 0) s += "." + format.getExt();
final File f = new File(s);
f.getParentFile().mkdirs();
return export(f, filter, query, format, dom, text);
}
// export methods
public Export export(final File f, final String filter, final String query, final ExportFormat format, final boolean dom, final boolean text) {
if ((this.exportthread != null) && (this.exportthread.isAlive())) {
@ -797,10 +799,10 @@ public final class Fulltext {
this.exportthread.start();
return this.exportthread;
}
public static void main(String args[]) {
Date firstdate = null;
System.out.println(GenericFormatter.SHORT_MINUTE_FORMATTER.format(firstdate));
Date firstdate = null;
System.out.println(GenericFormatter.SHORT_MINUTE_FORMATTER.format(firstdate));
}
public Export export() {
@ -816,7 +818,7 @@ public final class Fulltext {
private final boolean dom, text;
private Export(final File f, final String filter, final String query, final ExportFormat format, final boolean dom, final boolean text) {
super("Fulltext.Export");
super("Fulltext.Export");
// format: 0=text, 1=html, 2=rss/xml
this.f = f;
this.pattern = filter == null ? null : Pattern.compile(filter);
@ -831,20 +833,20 @@ public final class Fulltext {
@Override
public void run() {
try {
try {
final File parentf = this.f.getParentFile();
if (parentf != null) {
parentf.mkdirs();
parentf.mkdirs();
}
} catch(Exception e) {
} catch(Exception e) {
ConcurrentLog.logException(e);
this.failure = e.getMessage();
return;
}
}
try (/* Resources automatically closed by this try-with-resources statement */
final OutputStream os = new FileOutputStream(this.format == ExportFormat.solr ? new File(this.f.getAbsolutePath() + ".gz") : this.f);
final OutputStream wrappedStream = ((this.format == ExportFormat.solr)) ? new GZIPOutputStream(os, 65536){{def.setLevel(Deflater.BEST_COMPRESSION);}} : os;
final OutputStream wrappedStream = ((this.format == ExportFormat.solr)) ? new GZIPOutputStream(os, 65536){{def.setLevel(Deflater.BEST_COMPRESSION);}} : os;
final PrintWriter pw = new PrintWriter(new BufferedOutputStream(wrappedStream));
) {
if (this.format == ExportFormat.html) {
@ -946,7 +948,7 @@ public final class Fulltext {
pw.println("</response>");
}
} catch (final Exception e) {
/* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */
/* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */
ConcurrentLog.logException(e);
this.failure = e.getMessage();
}
@ -964,12 +966,12 @@ public final class Fulltext {
public int count() {
return this.count;
}
@SuppressWarnings("unchecked")
private String getStringFrom(final Object o) {
if (o == null) return "";
if (o instanceof ArrayList) return ((ArrayList<String>) o).get(0);
return (String) o;
private String getStringFrom(final Object o) {
if (o == null) return "";
if (o instanceof ArrayList) return ((ArrayList<String>) o).get(0);
return (String) o;
}
}

Loading…
Cancel
Save