@ -90,18 +90,18 @@ import org.apache.solr.schema.IndexSchema;
public final class Fulltext {
public final class Fulltext {
private static final String SOLR_PATH = "solr_ 6_6 "; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml
private static final String SOLR_PATH = "solr_ 8_8_1 "; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml
private static final String SOLR_OLD_PATH [ ] = new String [ ] { "solr_36" , "solr_40" , "solr_44" , "solr_45" , "solr_46" , "solr_47" , "solr_4_9" , "solr_4_10" , "solr_5_2" , "solr_5_5" };
private static final String SOLR_OLD_PATH [ ] = new String [ ] { "solr_36" , "solr_40" , "solr_44" , "solr_45" , "solr_46" , "solr_47" , "solr_4_9" , "solr_4_10" , "solr_5_2" , "solr_5_5" , "solr_6_6" };
// class objects
// class objects
private final File segmentPath ;
private final File segmentPath ;
private final File archivePath ;
private final File archivePath ;
private Export exportthread ; // will have a export thread assigned if exporter is running
private Export exportthread ; // will have a export thread assigned if exporter is running
private InstanceMirror solrInstances ;
private InstanceMirror solrInstances ;
/** Synchronization lock for solrInstances property */
/** Synchronization lock for solrInstances property */
private ReentrantLock solrInstancesLock ;
private ReentrantLock solrInstancesLock ;
private final CollectionConfiguration collectionConfiguration ;
private final CollectionConfiguration collectionConfiguration ;
private final WebgraphConfiguration webgraphConfiguration ;
private final WebgraphConfiguration webgraphConfiguration ;
private boolean writeWebgraph ;
private boolean writeWebgraph ;
@ -117,11 +117,11 @@ public final class Fulltext {
this . webgraphConfiguration = webgraphConfiguration ;
this . webgraphConfiguration = webgraphConfiguration ;
this . writeWebgraph = false ;
this . writeWebgraph = false ;
}
}
public void setUseWebgraph ( boolean check ) {
public void setUseWebgraph ( boolean check ) {
this . writeWebgraph = check ;
this . writeWebgraph = check ;
}
}
public boolean useWebgraph ( ) {
public boolean useWebgraph ( ) {
return this . writeWebgraph ;
return this . writeWebgraph ;
}
}
@ -140,24 +140,28 @@ public final class Fulltext {
public void connectLocalSolr ( ) throws IOException {
public void connectLocalSolr ( ) throws IOException {
File solrLocation = new File ( this . segmentPath , SOLR_PATH ) ;
File solrLocation = new File ( this . segmentPath , SOLR_PATH ) ;
// migrate old solr to new
// migrate old solr to new
/ *
for ( String oldVersion : SOLR_OLD_PATH ) {
for ( String oldVersion : SOLR_OLD_PATH ) {
File oldLocation = new File ( this . segmentPath , oldVersion ) ;
File oldLocation = new File ( this . segmentPath , oldVersion ) ;
if ( oldLocation . exists ( ) ) {
if ( oldLocation . exists ( ) ) {
if ( ! oldLocation . renameTo ( solrLocation ) ) {
if ( ! oldLocation . renameTo ( solrLocation ) ) {
ConcurrentLog . severe ( "Fulltext" , "Failed renaming old Solr location ("
ConcurrentLog . severe ( "Fulltext" , "Failed renaming old Solr location ("
+ oldLocation . getAbsolutePath ( ) + ") to new location : " + solrLocation . getAbsolutePath ( ) ) ;
+ oldLocation . getAbsolutePath ( ) + ") to new location : " + solrLocation . getAbsolutePath ( ) ) ;
}
}
}
}
}
}
* /
EmbeddedInstance localCollectionInstance = new EmbeddedInstance ( new File ( new File ( Switchboard . getSwitchboard ( ) . appPath , "defaults" ) , "solr" ) , solrLocation , CollectionSchema . CORE_NAME , new String [ ] { CollectionSchema . CORE_NAME , WebgraphSchema . CORE_NAME } ) ;
EmbeddedInstance localCollectionInstance = new EmbeddedInstance ( new File ( new File ( Switchboard . getSwitchboard ( ) . appPath , "defaults" ) , "solr" ) , solrLocation , CollectionSchema . CORE_NAME , new String [ ] { CollectionSchema . CORE_NAME , WebgraphSchema . CORE_NAME } ) ;
SolrConfig config = localCollectionInstance . getDefaultCore ( ) . getSolrConfig ( ) ;
SolrConfig config = localCollectionInstance . getDefaultCore ( ) . getSolrConfig ( ) ;
String versionValue = config . getVal ( IndexSchema . LUCENE_MATCH_VERSION_PARAM , true ) ;
String versionValue = config . getVal ( IndexSchema . LUCENE_MATCH_VERSION_PARAM , true ) ;
Version luceneVersion = SolrConfig . parseLuceneVersionString ( versionValue ) ;
Version luceneVersion = SolrConfig . parseLuceneVersionString ( versionValue ) ;
String lvn = luceneVersion . major + "_" + luceneVersion . minor ;
String lvn = luceneVersion . major + "_" + luceneVersion . minor + "_" + luceneVersion . bugfix ;
ConcurrentLog . info ( "Fulltext" , "using lucene version " + lvn ) ;
assert SOLR_PATH . endsWith ( lvn ) : "luceneVersion = " + lvn + ", solrPath = " + SOLR_PATH + ", check defaults/solr/solrconfig.xml" ;
assert SOLR_PATH . endsWith ( lvn ) : "luceneVersion = " + lvn + ", solrPath = " + SOLR_PATH + ", check defaults/solr/solrconfig.xml" ;
ConcurrentLog . info ( "Fulltext" , "using lucene version " + lvn ) ;
ConcurrentLog . info ( "Fulltext" , "connected solr in " + solrLocation . toString ( ) + ", lucene version " + lvn ) ;
ConcurrentLog . info ( "Fulltext" , "connected solr in " + solrLocation . toString ( ) + ", lucene version " + lvn ) ;
this . solrInstances . connectEmbedded ( localCollectionInstance ) ;
this . solrInstances . connectEmbedded ( localCollectionInstance ) ;
}
}
@ -194,64 +198,64 @@ public final class Fulltext {
public RemoteSolrConnector getDefaultRemoteSolrConnector ( ) {
public RemoteSolrConnector getDefaultRemoteSolrConnector ( ) {
try {
try {
boolean useBinaryResponseWriter = SwitchboardConstants . REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT ;
boolean useBinaryResponseWriter = SwitchboardConstants . REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT ;
if ( Switchboard . getSwitchboard ( ) ! = null ) {
if ( Switchboard . getSwitchboard ( ) ! = null ) {
useBinaryResponseWriter = Switchboard . getSwitchboard ( ) . getConfigBool (
useBinaryResponseWriter = Switchboard . getSwitchboard ( ) . getConfigBool (
SwitchboardConstants . REMOTE_SOLR_BINARY_RESPONSE_ENABLED ,
SwitchboardConstants . REMOTE_SOLR_BINARY_RESPONSE_ENABLED ,
SwitchboardConstants . REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT ) ;
SwitchboardConstants . REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT ) ;
}
}
return this . solrInstances . getDefaultRemoteConnector ( useBinaryResponseWriter ) ;
return this . solrInstances . getDefaultRemoteConnector ( useBinaryResponseWriter ) ;
} catch ( IOException e ) {
} catch ( IOException e ) {
return null ;
return null ;
}
}
}
}
public EmbeddedInstance getEmbeddedInstance ( ) {
public EmbeddedInstance getEmbeddedInstance ( ) {
this . solrInstancesLock . lock ( ) ;
this . solrInstancesLock . lock ( ) ;
try {
try {
if ( this . solrInstances . isConnectedEmbedded ( ) ) {
if ( this . solrInstances . isConnectedEmbedded ( ) ) {
return this . solrInstances . getEmbedded ( ) ;
return this . solrInstances . getEmbedded ( ) ;
}
}
return null ;
return null ;
} finally {
} finally {
this . solrInstancesLock . unlock ( ) ;
this . solrInstancesLock . unlock ( ) ;
}
}
}
}
public SolrConnector getDefaultConnector ( ) {
public SolrConnector getDefaultConnector ( ) {
this . solrInstancesLock . lock ( ) ;
this . solrInstancesLock . lock ( ) ;
try {
try {
return this . solrInstances . getDefaultMirrorConnector ( ) ;
return this . solrInstances . getDefaultMirrorConnector ( ) ;
} finally {
} finally {
this . solrInstancesLock . unlock ( ) ;
this . solrInstancesLock . unlock ( ) ;
}
}
}
}
public SolrConnector getWebgraphConnector ( ) {
public SolrConnector getWebgraphConnector ( ) {
if ( ! this . writeWebgraph ) {
if ( ! this . writeWebgraph ) {
return null ;
return null ;
}
}
this . solrInstancesLock . lock ( ) ;
this . solrInstancesLock . lock ( ) ;
try {
try {
return this . solrInstances . getGenericMirrorConnector ( WebgraphSchema . CORE_NAME ) ;
return this . solrInstances . getGenericMirrorConnector ( WebgraphSchema . CORE_NAME ) ;
} finally {
} finally {
this . solrInstancesLock . unlock ( ) ;
this . solrInstancesLock . unlock ( ) ;
}
}
}
}
public int bufferSize ( ) {
public int bufferSize ( ) {
return this . solrInstances . bufferSize ( ) ;
return this . solrInstances . bufferSize ( ) ;
}
}
public void clearCaches ( ) {
public void clearCaches ( ) {
this . solrInstances . clearCaches ( ) ;
this . solrInstances . clearCaches ( ) ;
}
}
public void clearLocalSolr ( ) throws IOException {
public void clearLocalSolr ( ) throws IOException {
if ( this . exportthread ! = null ) {
if ( this . exportthread ! = null ) {
this . exportthread . interrupt ( ) ;
this . exportthread . interrupt ( ) ;
}
}
this . solrInstancesLock . lock ( ) ;
this . solrInstancesLock . lock ( ) ;
try {
try {
EmbeddedInstance instance = this . solrInstances . getEmbedded ( ) ;
EmbeddedInstance instance = this . solrInstances . getEmbedded ( ) ;
if ( instance ! = null ) {
if ( instance ! = null ) {
@ -262,12 +266,12 @@ public final class Fulltext {
}
}
this . solrInstances . clearCaches ( ) ;
this . solrInstances . clearCaches ( ) ;
} finally {
} finally {
this . solrInstancesLock . unlock ( ) ;
this . solrInstancesLock . unlock ( ) ;
}
}
}
}
public void clearRemoteSolr ( ) throws IOException {
public void clearRemoteSolr ( ) throws IOException {
this . solrInstancesLock . lock ( ) ;
this . solrInstancesLock . lock ( ) ;
try {
try {
ShardInstance instance = this . solrInstances . getRemote ( ) ;
ShardInstance instance = this . solrInstances . getRemote ( ) ;
if ( instance ! = null ) {
if ( instance ! = null ) {
@ -277,7 +281,7 @@ public final class Fulltext {
}
}
this . solrInstances . clearCaches ( ) ;
this . solrInstances . clearCaches ( ) ;
} finally {
} finally {
this . solrInstancesLock . unlock ( ) ;
this . solrInstancesLock . unlock ( ) ;
}
}
}
}
@ -297,7 +301,7 @@ public final class Fulltext {
this . collectionSizeLastValue = size ;
this . collectionSizeLastValue = size ;
return size ;
return size ;
}
}
/ * *
/ * *
* @return the size of the webgraph index
* @return the size of the webgraph index
* /
* /
@ -309,10 +313,10 @@ public final class Fulltext {
try {
try {
this . solrInstances . close ( ) ;
this . solrInstances . close ( ) ;
} catch ( Throwable e ) {
} catch ( Throwable e ) {
ConcurrentLog . logException ( e ) ;
ConcurrentLog . logException ( e ) ;
}
}
}
}
private long lastCommit = 0 ;
private long lastCommit = 0 ;
public void commit ( boolean softCommit ) {
public void commit ( boolean softCommit ) {
long t = System . currentTimeMillis ( ) ;
long t = System . currentTimeMillis ( ) ;
@ -346,15 +350,15 @@ public final class Fulltext {
if ( urlHash = = null ) return null ;
if ( urlHash = = null ) return null ;
return getMetadata ( urlHash , null , 0 L ) ;
return getMetadata ( urlHash , null , 0 L ) ;
}
}
private URIMetadataNode getMetadata ( final byte [ ] urlHash , final WordReferenceVars wre , final long score ) {
private URIMetadataNode getMetadata ( final byte [ ] urlHash , final WordReferenceVars wre , final long score ) {
String u = ASCII . String ( urlHash ) ;
String u = ASCII . String ( urlHash ) ;
// get the metadata from Solr
// get the metadata from Solr
try {
try {
SolrDocument doc = this . getDefaultConnector ( ) . getDocumentById ( u ) ;
SolrDocument doc = this . getDefaultConnector ( ) . getDocumentById ( u ) ;
if ( doc ! = null ) {
if ( doc ! = null ) {
return new URIMetadataNode ( doc , wre , score ) ;
return new URIMetadataNode ( doc , wre , score ) ;
}
}
} catch ( final IOException e ) {
} catch ( final IOException e ) {
ConcurrentLog . logException ( e ) ;
ConcurrentLog . logException ( e ) ;
@ -411,7 +415,7 @@ public final class Fulltext {
Collection < Object > collections = doc . getFieldValues ( CollectionSchema . collection_sxt . getSolrFieldName ( ) ) ;
Collection < Object > collections = doc . getFieldValues ( CollectionSchema . collection_sxt . getSolrFieldName ( ) ) ;
// collection dht is used to identify metadata from full crawled documents (if "dht" exists don't overwrite rich crawldata with metadata
// collection dht is used to identify metadata from full crawled documents (if "dht" exists don't overwrite rich crawldata with metadata
if ( ! collections . contains ( "dht" ) ) return ;
if ( ! collections . contains ( "dht" ) ) return ;
// passed all checks, overwrite document
// passed all checks, overwrite document
putDocument ( getDefaultConfiguration ( ) . metadata2solr ( entry ) ) ;
putDocument ( getDefaultConfiguration ( ) . metadata2solr ( entry ) ) ;
}
}
@ -451,7 +455,7 @@ public final class Fulltext {
( freshdate = = null | | freshdate . after ( now ) ) ? null :
( freshdate = = null | | freshdate . after ( now ) ) ? null :
( WebgraphSchema . load_date_dt . getSolrFieldName ( ) + ":[* TO " + ISO8601Formatter . FORMATTER . format ( freshdate ) + "]" ) ) ;
( WebgraphSchema . load_date_dt . getSolrFieldName ( ) + ":[* TO " + ISO8601Formatter . FORMATTER . format ( freshdate ) + "]" ) ) ;
}
}
/ * *
/ * *
* delete all documents within a domain that are registered as error document
* delete all documents within a domain that are registered as error document
* @param hosthashes
* @param hosthashes
@ -459,7 +463,7 @@ public final class Fulltext {
public void deleteDomainErrors ( final Set < String > hosthashes ) {
public void deleteDomainErrors ( final Set < String > hosthashes ) {
deleteDomainWithConstraint ( this . getDefaultConnector ( ) , CollectionSchema . host_id_s . getSolrFieldName ( ) , hosthashes , CollectionSchema . failreason_s . getSolrFieldName ( ) + AbstractSolrConnector . CATCHALL_DTERM ) ;
deleteDomainWithConstraint ( this . getDefaultConnector ( ) , CollectionSchema . host_id_s . getSolrFieldName ( ) , hosthashes , CollectionSchema . failreason_s . getSolrFieldName ( ) + AbstractSolrConnector . CATCHALL_DTERM ) ;
}
}
private static void deleteDomainWithConstraint ( SolrConnector connector , String fieldname , final Set < String > hosthashes , String constraintQuery ) {
private static void deleteDomainWithConstraint ( SolrConnector connector , String fieldname , final Set < String > hosthashes , String constraintQuery ) {
if ( hosthashes = = null | | hosthashes . size ( ) = = 0 ) return ;
if ( hosthashes = = null | | hosthashes . size ( ) = = 0 ) return ;
int subsetscount = 1 + ( hosthashes . size ( ) / 255 ) ; // if the list is too large, we get a "too many boolean clauses" exception
int subsetscount = 1 + ( hosthashes . size ( ) / 255 ) ; // if the list is too large, we get a "too many boolean clauses" exception
@ -492,8 +496,7 @@ public final class Fulltext {
} catch ( final IOException e ) {
} catch ( final IOException e ) {
}
}
}
}
/ * *
/ * *
* remove a full subpath from the index
* remove a full subpath from the index
* @param basepath the left path of the url ; at least until the end of the host
* @param basepath the left path of the url ; at least until the end of the host
@ -522,7 +525,7 @@ public final class Fulltext {
} catch ( final InterruptedException e ) { }
} catch ( final InterruptedException e ) { }
return count . get ( ) ;
return count . get ( ) ;
}
}
/ * *
/ * *
* remove a list of id ' s from the index ( matching fulltext . id and webgraph . source_id_s
* remove a list of id ' s from the index ( matching fulltext . id and webgraph . source_id_s
* @param deleteIDs a list of urlhashes ; each denoting a document
* @param deleteIDs a list of urlhashes ; each denoting a document
@ -533,7 +536,7 @@ public final class Fulltext {
this . getDefaultConnector ( ) . deleteByIds ( deleteIDs ) ;
this . getDefaultConnector ( ) . deleteByIds ( deleteIDs ) ;
if ( this . writeWebgraph ) { // Webgraph.id is combination of sourceHash+targetHash+hexCounter, to be successful use source_id_s and/or target_id_s
if ( this . writeWebgraph ) { // Webgraph.id is combination of sourceHash+targetHash+hexCounter, to be successful use source_id_s and/or target_id_s
for ( String id : deleteIDs ) {
for ( String id : deleteIDs ) {
/* Add quotes around the url hash to prevent Solr logging a ParseException stack trace when the hash start with a '-' character */
/* Add quotes around the url hash to prevent Solr logging a ParseException stack trace when the hash start with a '-' character */
this . getWebgraphConnector ( ) . deleteByQuery ( WebgraphSchema . source_id_s . name ( ) + ":\"" + id + "\"" ) ;
this . getWebgraphConnector ( ) . deleteByQuery ( WebgraphSchema . source_id_s . name ( ) + ":\"" + id + "\"" ) ;
}
}
}
}
@ -554,7 +557,7 @@ public final class Fulltext {
String id = ASCII . String ( urlHash ) ;
String id = ASCII . String ( urlHash ) ;
this . getDefaultConnector ( ) . deleteById ( id ) ;
this . getDefaultConnector ( ) . deleteById ( id ) ;
if ( this . writeWebgraph ) { // Webgraph.id is combination of sourceHash+targetHash+hexCounter, to be successful use source_id_s and/or target_id_s
if ( this . writeWebgraph ) { // Webgraph.id is combination of sourceHash+targetHash+hexCounter, to be successful use source_id_s and/or target_id_s
/* Add quotes around the url hash to prevent Solr logging a ParseException stack trace when the hash start with a '-' character */
/* Add quotes around the url hash to prevent Solr logging a ParseException stack trace when the hash start with a '-' character */
this . getWebgraphConnector ( ) . deleteByQuery ( WebgraphSchema . source_id_s + ":\"" + id + "\"" ) ;
this . getWebgraphConnector ( ) . deleteByQuery ( WebgraphSchema . source_id_s + ":\"" + id + "\"" ) ;
}
}
} catch ( final Throwable e ) {
} catch ( final Throwable e ) {
@ -565,12 +568,12 @@ public final class Fulltext {
public DigestURL getURL ( final String urlHash ) throws IOException {
public DigestURL getURL ( final String urlHash ) throws IOException {
if ( urlHash = = null | | this . getDefaultConnector ( ) = = null ) return null ;
if ( urlHash = = null | | this . getDefaultConnector ( ) = = null ) return null ;
SolrConnector . LoadTimeURL md = this . getDefaultConnector ( ) . getLoadTimeURL ( urlHash ) ;
SolrConnector . LoadTimeURL md = this . getDefaultConnector ( ) . getLoadTimeURL ( urlHash ) ;
if ( md = = null ) return null ;
if ( md = = null ) return null ;
return new DigestURL ( md . url , ASCII . getBytes ( urlHash ) ) ;
return new DigestURL ( md . url , ASCII . getBytes ( urlHash ) ) ;
}
}
/ * *
/ * *
* get the load time of a resource .
* get the load time of a resource .
* @param urlHash
* @param urlHash
@ -582,7 +585,7 @@ public final class Fulltext {
if ( md = = null ) return - 1l ;
if ( md = = null ) return - 1l ;
return md . date ;
return md . date ;
}
}
public List < File > dumpFiles ( ) {
public List < File > dumpFiles ( ) {
EmbeddedInstance esc = this . solrInstances . getEmbedded ( ) ;
EmbeddedInstance esc = this . solrInstances . getEmbedded ( ) ;
ArrayList < File > zips = new ArrayList < File > ( ) ;
ArrayList < File > zips = new ArrayList < File > ( ) ;
@ -604,7 +607,7 @@ public final class Fulltext {
}
}
return zips ;
return zips ;
}
}
/ * *
/ * *
* Create a dump file from the current embedded solr directory
* Create a dump file from the current embedded solr directory
* @return file reference to the dump
* @return file reference to the dump
@ -613,11 +616,11 @@ public final class Fulltext {
public File dumpEmbeddedSolr ( ) throws SolrException {
public File dumpEmbeddedSolr ( ) throws SolrException {
final EmbeddedInstance esc = this . solrInstances . getEmbedded ( ) ;
final EmbeddedInstance esc = this . solrInstances . getEmbedded ( ) ;
if ( esc = = null ) {
if ( esc = = null ) {
throw new SolrException ( ErrorCode . SERVICE_UNAVAILABLE , "No embedded Solr available." ) ;
throw new SolrException ( ErrorCode . SERVICE_UNAVAILABLE , "No embedded Solr available." ) ;
}
}
final File storagePath = esc . getContainerPath ( ) ;
final File storagePath = esc . getContainerPath ( ) ;
final File zipOut = new File ( this . archivePath , storagePath . getName ( ) + "_" + GenericFormatter . SHORT_DAY_FORMATTER . format ( ) + ".zip" ) ;
final File zipOut = new File ( this . archivePath , storagePath . getName ( ) + "_" + GenericFormatter . SHORT_DAY_FORMATTER . format ( ) + ".zip" ) ;
this . solrInstancesLock . lock ( ) ;
this . solrInstancesLock . lock ( ) ;
try {
try {
this . disconnectLocalSolr ( ) ;
this . disconnectLocalSolr ( ) ;
try {
try {
@ -632,11 +635,11 @@ public final class Fulltext {
}
}
}
}
} finally {
} finally {
this . solrInstancesLock . unlock ( ) ;
this . solrInstancesLock . unlock ( ) ;
}
}
return zipOut ;
return zipOut ;
}
}
/ * *
/ * *
* Restore a solr dump to the current embedded solr directory
* Restore a solr dump to the current embedded solr directory
* @param solrDumpZipFile the dump file to use
* @param solrDumpZipFile the dump file to use
@ -645,10 +648,10 @@ public final class Fulltext {
public void restoreEmbeddedSolr ( final File solrDumpZipFile ) {
public void restoreEmbeddedSolr ( final File solrDumpZipFile ) {
final EmbeddedInstance esc = this . solrInstances . getEmbedded ( ) ;
final EmbeddedInstance esc = this . solrInstances . getEmbedded ( ) ;
if ( esc = = null ) {
if ( esc = = null ) {
throw new SolrException ( ErrorCode . SERVICE_UNAVAILABLE , "No embedded Solr available." ) ;
throw new SolrException ( ErrorCode . SERVICE_UNAVAILABLE , "No embedded Solr available." ) ;
}
}
final File storagePath = esc . getContainerPath ( ) ;
final File storagePath = esc . getContainerPath ( ) ;
this . solrInstancesLock . lock ( ) ;
this . solrInstancesLock . lock ( ) ;
try {
try {
// this.disconnectLocalSolr(); // moved to (InstanceMirror) sorlInstances.close()
// this.disconnectLocalSolr(); // moved to (InstanceMirror) sorlInstances.close()
this . solrInstances . close ( ) ;
this . solrInstances . close ( ) ;
@ -665,7 +668,7 @@ public final class Fulltext {
}
}
}
}
} finally {
} finally {
this . solrInstancesLock . unlock ( ) ;
this . solrInstancesLock . unlock ( ) ;
}
}
}
}
@ -678,13 +681,13 @@ public final class Fulltext {
getDefaultConnector ( ) . optimize ( size ) ;
getDefaultConnector ( ) . optimize ( size ) ;
if ( this . writeWebgraph ) getWebgraphConnector ( ) . optimize ( size ) ;
if ( this . writeWebgraph ) getWebgraphConnector ( ) . optimize ( size ) ;
}
}
/ * *
/ * *
* Reboot the local embedded Solr ( experimental to check resource management ) .
* Reboot the local embedded Solr ( experimental to check resource management ) .
* Please check before that the local embedded Solr is enabled and no external remote Solr is attached .
* Please check before that the local embedded Solr is enabled and no external remote Solr is attached .
* /
* /
public void rebootEmbeddedLocalSolr ( ) {
public void rebootEmbeddedLocalSolr ( ) {
this . solrInstancesLock . lock ( ) ;
this . solrInstancesLock . lock ( ) ;
try {
try {
this . disconnectLocalSolr ( ) ;
this . disconnectLocalSolr ( ) ;
// this.solrInstances.close(); // moved to (InstanceMirror) sorlInstances.close()
// this.solrInstances.close(); // moved to (InstanceMirror) sorlInstances.close()
@ -695,7 +698,7 @@ public final class Fulltext {
ConcurrentLog . logException ( e ) ;
ConcurrentLog . logException ( e ) ;
}
}
} finally {
} finally {
this . solrInstancesLock . unlock ( ) ;
this . solrInstancesLock . unlock ( ) ;
}
}
}
}
@ -705,10 +708,10 @@ public final class Fulltext {
private ExportFormat ( String ext ) { this . ext = ext ; }
private ExportFormat ( String ext ) { this . ext = ext ; }
public String getExt ( ) { return this . ext ; }
public String getExt ( ) { return this . ext ; }
}
}
public final static String yacy_dump_prefix = "yacy_dump_" ;
public final static String yacy_dump_prefix = "yacy_dump_" ;
public Export export ( Fulltext . ExportFormat format , String filter , String query , final int maxseconds , File path , boolean dom , boolean text ) throws IOException {
public Export export ( Fulltext . ExportFormat format , String filter , String query , final int maxseconds , File path , boolean dom , boolean text ) throws IOException {
// modify query according to maxseconds
// modify query according to maxseconds
long now = System . currentTimeMillis ( ) ;
long now = System . currentTimeMillis ( ) ;
if ( maxseconds > 0 ) {
if ( maxseconds > 0 ) {
@ -720,7 +723,7 @@ public final class Fulltext {
} else {
} else {
query = query = = null ? AbstractSolrConnector . CATCHALL_QUERY : query ;
query = query = = null ? AbstractSolrConnector . CATCHALL_QUERY : query ;
}
}
// check the oldest and latest entry in the index for this query
// check the oldest and latest entry in the index for this query
SolrDocumentList firstdoclist , lastdoclist ;
SolrDocumentList firstdoclist , lastdoclist ;
Object firstdateobject , lastdateobject ;
Object firstdateobject , lastdateobject ;
@ -732,61 +735,60 @@ public final class Fulltext {
final long doccount ;
final long doccount ;
final Date firstdate , lastdate ;
final Date firstdate , lastdate ;
if ( firstdoclist . size ( ) = = 0 | | lastdoclist . size ( ) = = 0 ) {
if ( firstdoclist . size ( ) = = 0 | | lastdoclist . size ( ) = = 0 ) {
/ * Now check again the number of documents without sorting , for compatibility with old fields indexed without DocValues fields ( prior to YaCy 1.90 )
/ * Now check again the number of documents without sorting , for compatibility with old fields indexed without DocValues fields ( prior to YaCy 1.90 )
* When the local Solr index contains such old documents , requests with sort query return nothing and trace in logs
* When the local Solr index contains such old documents , requests with sort query return nothing and trace in logs
* "java.lang.IllegalStateException: unexpected docvalues type NONE for field..." * /
* "java.lang.IllegalStateException: unexpected docvalues type NONE for field..." * /
doccount = this . getDefaultConnector ( ) . getCountByQuery ( query ) ;
doccount = this . getDefaultConnector ( ) . getCountByQuery ( query ) ;
if ( doccount = = 0 ) {
if ( doccount = = 0 ) {
/* Finally no document to export was found */
/* Finally no document to export was found */
throw new IOException ( "number of exported documents == 0" ) ;
throw new IOException ( "number of exported documents == 0" ) ;
}
}
/* we use default date values just to generate a proper dump file path */
/* we use default date values just to generate a proper dump file path */
firstdate = new Date ( 0 ) ;
firstdate = new Date ( 0 ) ;
lastdate = new Date ( 0 ) ;
lastdate = new Date ( 0 ) ;
} else {
} else {
doccount = firstdoclist . getNumFound ( ) ;
doccount = firstdoclist . getNumFound ( ) ;
// create the export name
// create the export name
SolrDocument firstdoc = firstdoclist . get ( 0 ) ;
SolrDocument firstdoc = firstdoclist . get ( 0 ) ;
SolrDocument lastdoc = lastdoclist . get ( 0 ) ;
SolrDocument lastdoc = lastdoclist . get ( 0 ) ;
firstdateobject = firstdoc . getFieldValue ( CollectionSchema . load_date_dt . getSolrFieldName ( ) ) ;
firstdateobject = firstdoc . getFieldValue ( CollectionSchema . load_date_dt . getSolrFieldName ( ) ) ;
lastdateobject = lastdoc . getFieldValue ( CollectionSchema . load_date_dt . getSolrFieldName ( ) ) ;
lastdateobject = lastdoc . getFieldValue ( CollectionSchema . load_date_dt . getSolrFieldName ( ) ) ;
/ * When firstdate or lastdate is null , we use a default one just to generate a proper dump file path
/ * When firstdate or lastdate is null , we use a default one just to generate a proper dump file path
* This should not happen because load_date_dt field is mandatory in the main Solr schema ,
* This should not happen because load_date_dt field is mandatory in the main Solr schema ,
* but for some reason some documents might end up here with an empty load_date_dt field value * /
* but for some reason some documents might end up here with an empty load_date_dt field value * /
if ( firstdateobject instanceof Date ) {
if ( firstdateobject instanceof Date ) {
firstdate = ( Date ) firstdateobject ;
firstdate = ( Date ) firstdateobject ;
} else {
} else {
ConcurrentLog . warn ( "Fulltext" , "The required field " + CollectionSchema . load_date_dt . getSolrFieldName ( ) + " is empty on document with id : "
ConcurrentLog . warn ( "Fulltext" , "The required field " + CollectionSchema . load_date_dt . getSolrFieldName ( ) + " is empty on document with id : "
+ firstdoc . getFieldValue ( CollectionSchema . id . getSolrFieldName ( ) ) ) ;
+ firstdoc . getFieldValue ( CollectionSchema . id . getSolrFieldName ( ) ) ) ;
firstdate = new Date ( 0 ) ;
firstdate = new Date ( 0 ) ;
}
}
if ( lastdateobject instanceof Date ) {
if ( lastdateobject instanceof Date ) {
lastdate = ( Date ) lastdateobject ;
lastdate = ( Date ) lastdateobject ;
} else {
} else {
ConcurrentLog . warn ( "Fulltext" , "The required field " + CollectionSchema . load_date_dt . getSolrFieldName ( ) + " is empty on document with id : "
ConcurrentLog . warn ( "Fulltext" , "The required field " + CollectionSchema . load_date_dt . getSolrFieldName ( ) + " is empty on document with id : "
+ lastdoc . getFieldValue ( CollectionSchema . id . getSolrFieldName ( ) ) ) ;
+ lastdoc . getFieldValue ( CollectionSchema . id . getSolrFieldName ( ) ) ) ;
lastdate = new Date ( 0 ) ;
lastdate = new Date ( 0 ) ;
}
}
}
}
String s = new File ( path , yacy_dump_prefix +
String s = new File ( path , yacy_dump_prefix +
"f" + GenericFormatter . SHORT_MINUTE_FORMATTER . format ( firstdate ) + "_" +
"f" + GenericFormatter . SHORT_MINUTE_FORMATTER . format ( firstdate ) + "_" +
"l" + GenericFormatter . SHORT_MINUTE_FORMATTER . format ( lastdate ) + "_" +
"l" + GenericFormatter . SHORT_MINUTE_FORMATTER . format ( lastdate ) + "_" +
"n" + GenericFormatter . SHORT_MINUTE_FORMATTER . format ( new Date ( now ) ) + "_" +
"n" + GenericFormatter . SHORT_MINUTE_FORMATTER . format ( new Date ( now ) ) + "_" +
"c" + String . format ( "%1$012d" , doccount ) ) . getAbsolutePath ( ) + "_tc" ; // the name ends with the transaction token ('c' = 'created')
"c" + String . format ( "%1$012d" , doccount ) ) . getAbsolutePath ( ) + "_tc" ; // the name ends with the transaction token ('c' = 'created')
// create export file name
// create export file name
if ( s . indexOf ( '.' , 0 ) < 0 ) s + = "." + format . getExt ( ) ;
if ( s . indexOf ( '.' , 0 ) < 0 ) s + = "." + format . getExt ( ) ;
final File f = new File ( s ) ;
final File f = new File ( s ) ;
f . getParentFile ( ) . mkdirs ( ) ;
f . getParentFile ( ) . mkdirs ( ) ;
return export ( f , filter , query , format , dom , text ) ;
return export ( f , filter , query , format , dom , text ) ;
}
}
// export methods
// export methods
public Export export ( final File f , final String filter , final String query , final ExportFormat format , final boolean dom , final boolean text ) {
public Export export ( final File f , final String filter , final String query , final ExportFormat format , final boolean dom , final boolean text ) {
if ( ( this . exportthread ! = null ) & & ( this . exportthread . isAlive ( ) ) ) {
if ( ( this . exportthread ! = null ) & & ( this . exportthread . isAlive ( ) ) ) {
@ -797,10 +799,10 @@ public final class Fulltext {
this . exportthread . start ( ) ;
this . exportthread . start ( ) ;
return this . exportthread ;
return this . exportthread ;
}
}
public static void main ( String args [ ] ) {
public static void main ( String args [ ] ) {
Date firstdate = null ;
Date firstdate = null ;
System . out . println ( GenericFormatter . SHORT_MINUTE_FORMATTER . format ( firstdate ) ) ;
System . out . println ( GenericFormatter . SHORT_MINUTE_FORMATTER . format ( firstdate ) ) ;
}
}
public Export export ( ) {
public Export export ( ) {
@ -816,7 +818,7 @@ public final class Fulltext {
private final boolean dom , text ;
private final boolean dom , text ;
private Export ( final File f , final String filter , final String query , final ExportFormat format , final boolean dom , final boolean text ) {
private Export ( final File f , final String filter , final String query , final ExportFormat format , final boolean dom , final boolean text ) {
super ( "Fulltext.Export" ) ;
super ( "Fulltext.Export" ) ;
// format: 0=text, 1=html, 2=rss/xml
// format: 0=text, 1=html, 2=rss/xml
this . f = f ;
this . f = f ;
this . pattern = filter = = null ? null : Pattern . compile ( filter ) ;
this . pattern = filter = = null ? null : Pattern . compile ( filter ) ;
@ -831,20 +833,20 @@ public final class Fulltext {
@Override
@Override
public void run ( ) {
public void run ( ) {
try {
try {
final File parentf = this . f . getParentFile ( ) ;
final File parentf = this . f . getParentFile ( ) ;
if ( parentf ! = null ) {
if ( parentf ! = null ) {
parentf . mkdirs ( ) ;
parentf . mkdirs ( ) ;
}
}
} catch ( Exception e ) {
} catch ( Exception e ) {
ConcurrentLog . logException ( e ) ;
ConcurrentLog . logException ( e ) ;
this . failure = e . getMessage ( ) ;
this . failure = e . getMessage ( ) ;
return ;
return ;
}
}
try ( /* Resources automatically closed by this try-with-resources statement */
try ( /* Resources automatically closed by this try-with-resources statement */
final OutputStream os = new FileOutputStream ( this . format = = ExportFormat . solr ? new File ( this . f . getAbsolutePath ( ) + ".gz" ) : this . f ) ;
final OutputStream os = new FileOutputStream ( this . format = = ExportFormat . solr ? new File ( this . f . getAbsolutePath ( ) + ".gz" ) : this . f ) ;
final OutputStream wrappedStream = ( ( this . format = = ExportFormat . solr ) ) ? new GZIPOutputStream ( os , 65536 ) { { def . setLevel ( Deflater . BEST_COMPRESSION ) ; } } : os ;
final OutputStream wrappedStream = ( ( this . format = = ExportFormat . solr ) ) ? new GZIPOutputStream ( os , 65536 ) { { def . setLevel ( Deflater . BEST_COMPRESSION ) ; } } : os ;
final PrintWriter pw = new PrintWriter ( new BufferedOutputStream ( wrappedStream ) ) ;
final PrintWriter pw = new PrintWriter ( new BufferedOutputStream ( wrappedStream ) ) ;
) {
) {
if ( this . format = = ExportFormat . html ) {
if ( this . format = = ExportFormat . html ) {
@ -946,7 +948,7 @@ public final class Fulltext {
pw . println ( "</response>" ) ;
pw . println ( "</response>" ) ;
}
}
} catch ( final Exception e ) {
} catch ( final Exception e ) {
/* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */
/* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */
ConcurrentLog . logException ( e ) ;
ConcurrentLog . logException ( e ) ;
this . failure = e . getMessage ( ) ;
this . failure = e . getMessage ( ) ;
}
}
@ -964,12 +966,12 @@ public final class Fulltext {
public int count ( ) {
public int count ( ) {
return this . count ;
return this . count ;
}
}
@SuppressWarnings ( "unchecked" )
@SuppressWarnings ( "unchecked" )
private String getStringFrom ( final Object o ) {
private String getStringFrom ( final Object o ) {
if ( o = = null ) return "" ;
if ( o = = null ) return "" ;
if ( o instanceof ArrayList ) return ( ( ArrayList < String > ) o ) . get ( 0 ) ;
if ( o instanceof ArrayList ) return ( ( ArrayList < String > ) o ) . get ( 0 ) ;
return ( String ) o ;
return ( String ) o ;
}
}
}
}