@ -34,25 +34,20 @@ import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants ;
import com.google.common.io.Files ;
import static java.lang.Thread.MIN_PRIORITY ;
import java.util.ArrayList ;
import java.util.HashSet ;
import java.util.Iterator ;
import java.util.concurrent.Semaphore ;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector ;
import java.util.Set ;
import net.yacy.cora.storage.Configuration.Entry ;
import net.yacy.kelondro.data.meta.URIMetadataRow ;
import net.yacy.kelondro.index.Index ;
import net.yacy.kelondro.index.Row ;
import net.yacy.kelondro.workflow.AbstractBusyThread ;
import net.yacy.kelondro.workflow.AbstractThread ;
import net.yacy.kelondro.workflow.BusyThread ;
import net.yacy.kelondro.workflow.InstantBusyThread ;
import net.yacy.kelondro.workflow.WorkflowThread ;
import net.yacy.search.index.Fulltext ;
import net.yacy.search.schema.CollectionConfiguration ;
import org.apache.solr.common.SolrDocument ;
import org.apache.solr.common.SolrDocumentList ;
import org.apache.solr.common.SolrInputDocument ;
import net.yacy.search.schema.CollectionSchema ;
import org.apache.solr.client.solrj.SolrServerException ;
import org.apache.solr.client.solrj.request.LukeRequest ;
import org.apache.solr.client.solrj.response.LukeResponse ;
public class migration {
//SVN constants
@ -367,32 +362,71 @@ public class migration {
return bt . getJobCount ( ) ;
}
ReindexSolrBusyThread reidx = new ReindexSolrBusyThread ( null ) ; // ("*:*" would reindex all)
// add all disabled fields
boolean lukeCheckok = false ;
Set < String > omitFields = new HashSet < String > ( 3 ) ;
omitFields . add ( CollectionSchema . author_sxt . getSolrFieldName ( ) ) ; // special fields to exclude from disabled check
omitFields . add ( CollectionSchema . coordinate_p_0_coordinate . getSolrFieldName ( ) ) ;
omitFields . add ( CollectionSchema . coordinate_p_1_coordinate . getSolrFieldName ( ) ) ;
CollectionConfiguration colcfg = Switchboard . getSwitchboard ( ) . index . fulltext ( ) . getDefaultConfiguration ( ) ;
Iterator < Entry > itcol = colcfg . entryIterator ( ) ;
while ( itcol . hasNext ( ) ) {
Entry etr = itcol . next ( ) ;
if ( ! etr . enabled ( ) ) {
reidx . addSelectFieldname ( etr . key ( ) ) ;
ReindexSolrBusyThread reidx = new ReindexSolrBusyThread ( null ) ; // ("*:*" would reindex all);
try { // get all fields contained in index
LukeRequest lukeRequest = new LukeRequest ( ) ;
lukeRequest . setNumTerms ( 1 ) ;
LukeResponse lukeResponse = lukeRequest . process ( Switchboard . getSwitchboard ( ) . index . fulltext ( ) . getDefaultEmbeddedConnector ( ) . getServer ( ) ) ;
for ( LukeResponse . FieldInfo solrfield : lukeResponse . getFieldInfo ( ) . values ( ) ) {
if ( ! colcfg . contains ( solrfield . getName ( ) ) & & ! omitFields . contains ( solrfield . getName ( ) ) ) { // add found fields not in config for reindexing
reidx . addSelectFieldname ( solrfield . getName ( ) ) ;
}
}
lukeCheckok = true ;
} catch ( SolrServerException ex ) {
Log . logException ( ex ) ;
} catch ( IOException ex ) {
Log . logException ( ex ) ;
}
// add obsolete fields (not longer part of main index)
reidx . addSelectFieldname ( "inboundlinks_tag_txt" ) ;
reidx . addSelectFieldname ( "inboundlinks_relflags_val" ) ;
reidx . addSelectFieldname ( "inboundlinks_rel_sxt" ) ;
reidx . addSelectFieldname ( "inboundlinks_text_txt" ) ;
reidx . addSelectFieldname ( "inboundlinks_alttag_txt" ) ;
reidx . addSelectFieldname ( "outboundlinks_tag_txt" ) ;
reidx . addSelectFieldname ( "outboundlinks_relflags_val" ) ;
reidx . addSelectFieldname ( "outboundlinks_rel_sxt" ) ;
reidx . addSelectFieldname ( "outboundlinks_text_txt" ) ;
reidx . addSelectFieldname ( "outboundlinks_alttag_txt" ) ;
if ( ! lukeCheckok ) { // if luke failed alternatively use config and manual list
// add all disabled fields
Iterator < Entry > itcol = colcfg . entryIterator ( ) ;
while ( itcol . hasNext ( ) ) { // check for disabled fields in config
Entry etr = itcol . next ( ) ;
if ( ! etr . enabled ( ) & & ! omitFields . contains ( etr . key ( ) ) ) {
reidx . addSelectFieldname ( etr . key ( ) ) ;
}
}
sb . deployThread ( "reindexSolr" , "Reindex Solr" , "reindex documents with obsolete fields in embedded Solr index" , "/IndexReIndexMonitor_p.html" , reidx /*privateWorkerThread*/ , 0 ) ;
// add obsolete fields (not longer part of main index)
reidx . addSelectFieldname ( "author_s" ) ;
reidx . addSelectFieldname ( "css_tag_txt" ) ;
reidx . addSelectFieldname ( "css_url_txt" ) ;
reidx . addSelectFieldname ( "scripts_txt" ) ;
reidx . addSelectFieldname ( "images_tag_txt" ) ;
reidx . addSelectFieldname ( "images_urlstub_txt" ) ;
reidx . addSelectFieldname ( "canonical_t" ) ;
reidx . addSelectFieldname ( "frames_txt" ) ;
reidx . addSelectFieldname ( "iframes_txt" ) ;
reidx . addSelectFieldname ( "inboundlinks_tag_txt" ) ;
reidx . addSelectFieldname ( "inboundlinks_relflags_val" ) ;
reidx . addSelectFieldname ( "inboundlinks_name_txt" ) ;
reidx . addSelectFieldname ( "inboundlinks_rel_sxt" ) ;
reidx . addSelectFieldname ( "inboundlinks_text_txt" ) ;
reidx . addSelectFieldname ( "inboundlinks_text_chars_val" ) ;
reidx . addSelectFieldname ( "inboundlinks_text_words_val" ) ;
reidx . addSelectFieldname ( "inboundlinks_alttag_txt" ) ;
reidx . addSelectFieldname ( "outboundlinks_tag_txt" ) ;
reidx . addSelectFieldname ( "outboundlinks_relflags_val" ) ;
reidx . addSelectFieldname ( "outboundlinks_name_txt" ) ;
reidx . addSelectFieldname ( "outboundlinks_rel_sxt" ) ;
reidx . addSelectFieldname ( "outboundlinks_text_txt" ) ;
reidx . addSelectFieldname ( "outboundlinks_text_chars_val" ) ;
reidx . addSelectFieldname ( "outboundlinks_text_words_val" ) ;
reidx . addSelectFieldname ( "outboundlinks_alttag_txt" ) ;
}
sb . deployThread ( "reindexSolr" , "Reindex Solr" , "reindex documents with obsolete fields in embedded Solr index" , "/IndexReIndexMonitor_p.html" , reidx , 0 ) ;
return 0 ;
}
}