Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

Conflicts:
	source/net/yacy/migration.java
pull/1/head
Michael Peter Christen 12 years ago
commit b68fbe7d21

@ -24,7 +24,10 @@ package net.yacy;
import net.yacy.search.index.ReindexSolrBusyThread; import net.yacy.search.index.ReindexSolrBusyThread;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Set;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.Digest; import net.yacy.cora.order.Digest;
@ -34,7 +37,6 @@ import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import com.google.common.io.Files; import com.google.common.io.Files;
import java.util.Iterator;
import net.yacy.cora.storage.Configuration.Entry; import net.yacy.cora.storage.Configuration.Entry;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.index.Index; import net.yacy.kelondro.index.Index;
@ -42,6 +44,10 @@ import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.workflow.BusyThread; import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.search.index.Fulltext; import net.yacy.search.index.Fulltext;
import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.response.LukeResponse;
public class migration { public class migration {
//SVN constants //SVN constants
@ -354,34 +360,73 @@ public class migration {
// a reindex job is already running // a reindex job is already running
if (bt != null) { if (bt != null) {
return bt.getJobCount(); return bt.getJobCount();
} }
ReindexSolrBusyThread reidx = new ReindexSolrBusyThread(null); // ("*:*" would reindex all) boolean lukeCheckok = false;
Set<String> omitFields = new HashSet<String>(3);
// add all disabled fields omitFields.add(CollectionSchema.author_sxt.getSolrFieldName()); // special fields to exclude from disabled check
omitFields.add(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName());
omitFields.add(CollectionSchema.coordinate_p_1_coordinate.getSolrFieldName());
CollectionConfiguration colcfg = Switchboard.getSwitchboard().index.fulltext().getDefaultConfiguration(); CollectionConfiguration colcfg = Switchboard.getSwitchboard().index.fulltext().getDefaultConfiguration();
Iterator<Entry> itcol = colcfg.entryIterator(); ReindexSolrBusyThread reidx = new ReindexSolrBusyThread(null); // ("*:*" would reindex all);
while (itcol.hasNext()) {
Entry etr = itcol.next(); try { // get all fields contained in index
if (!etr.enabled()) { LukeRequest lukeRequest = new LukeRequest();
reidx.addSelectFieldname(etr.key()); lukeRequest.setNumTerms(1);
LukeResponse lukeResponse = lukeRequest.process(Switchboard.getSwitchboard().index.fulltext().getDefaultEmbeddedConnector().getServer());
for (LukeResponse.FieldInfo solrfield : lukeResponse.getFieldInfo().values()) {
if (!colcfg.contains(solrfield.getName()) && !omitFields.contains(solrfield.getName())) { // add found fields not in config for reindexing
reidx.addSelectFieldname(solrfield.getName());
}
} }
lukeCheckok = true;
} catch (SolrServerException ex) {
Log.logException(ex);
} catch (IOException ex) {
Log.logException(ex);
} }
if (!lukeCheckok) { // if luke failed alternatively use config and manual list
// add all disabled fields
Iterator<Entry> itcol = colcfg.entryIterator();
while (itcol.hasNext()) { // check for disabled fields in config
Entry etr = itcol.next();
if (!etr.enabled() && !omitFields.contains(etr.key())) {
reidx.addSelectFieldname(etr.key());
}
}
// add obsolete fields (not longer part of main index) // add obsolete fields (not longer part of main index)
reidx.addSelectFieldname("inboundlinks_tag_txt"); reidx.addSelectFieldname("author_s");
reidx.addSelectFieldname("inboundlinks_relflags_val"); reidx.addSelectFieldname("css_tag_txt");
reidx.addSelectFieldname("inboundlinks_rel_sxt"); reidx.addSelectFieldname("css_url_txt");
reidx.addSelectFieldname("inboundlinks_text_txt"); reidx.addSelectFieldname("scripts_txt");
reidx.addSelectFieldname("inboundlinks_alttag_txt"); reidx.addSelectFieldname("images_tag_txt");
reidx.addSelectFieldname("images_urlstub_txt");
reidx.addSelectFieldname("canonical_t");
reidx.addSelectFieldname("frames_txt");
reidx.addSelectFieldname("iframes_txt");
reidx.addSelectFieldname("outboundlinks_tag_txt"); reidx.addSelectFieldname("inboundlinks_tag_txt");
reidx.addSelectFieldname("outboundlinks_relflags_val"); reidx.addSelectFieldname("inboundlinks_relflags_val");
reidx.addSelectFieldname("outboundlinks_rel_sxt"); reidx.addSelectFieldname("inboundlinks_name_txt");
reidx.addSelectFieldname("outboundlinks_text_txt"); reidx.addSelectFieldname("inboundlinks_rel_sxt");
reidx.addSelectFieldname("outboundlinks_alttag_txt"); reidx.addSelectFieldname("inboundlinks_text_txt");
reidx.addSelectFieldname("inboundlinks_text_chars_val");
sb.deployThread("reindexSolr", "Reindex Solr", "reindex documents with obsolete fields in embedded Solr index", "/IndexReIndexMonitor_p.html",reidx /*privateWorkerThread*/, 0); reidx.addSelectFieldname("inboundlinks_text_words_val");
reidx.addSelectFieldname("inboundlinks_alttag_txt");
reidx.addSelectFieldname("outboundlinks_tag_txt");
reidx.addSelectFieldname("outboundlinks_relflags_val");
reidx.addSelectFieldname("outboundlinks_name_txt");
reidx.addSelectFieldname("outboundlinks_rel_sxt");
reidx.addSelectFieldname("outboundlinks_text_txt");
reidx.addSelectFieldname("outboundlinks_text_chars_val");
reidx.addSelectFieldname("outboundlinks_text_words_val");
reidx.addSelectFieldname("outboundlinks_alttag_txt");
}
sb.deployThread("reindexSolr", "Reindex Solr", "reindex documents with obsolete fields in embedded Solr index", "/IndexReIndexMonitor_p.html",reidx , 0);
return 0; return 0;
} }
} }

@ -24,9 +24,15 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.concurrent.Semaphore; import java.util.concurrent.Semaphore;
import java.util.logging.Level;
import java.util.logging.Logger;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.kelondro.workflow.AbstractBusyThread; import net.yacy.kelondro.workflow.AbstractBusyThread;
import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionConfiguration;
import org.apache.lucene.index.FieldInfo;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
@ -72,7 +78,7 @@ import org.apache.solr.common.SolrInputDocument;
} }
setName("reindexSolr"); setName("reindexSolr");
this.setPriority(Thread.MIN_PRIORITY); this.setPriority(Thread.MIN_PRIORITY);
} }
/** /**
@ -126,14 +132,13 @@ import org.apache.solr.common.SolrInputDocument;
} else { } else {
Log.logInfo("MIGRATION-REINDEX", "reindex docs with query=" + query + " found=" + docstoreindex + " start=" + start); Log.logInfo("MIGRATION-REINDEX", "reindex docs with query=" + query + " found=" + docstoreindex + " start=" + start);
start = start + chunksize; start = start + chunksize;
}
for (SolrDocument doc : xdocs) {
for (SolrDocument doc : xdocs) { SolrInputDocument idoc = colcfg.toSolrInputDocument(doc);
SolrInputDocument idoc = colcfg.toSolrInputDocument(doc); Switchboard.getSwitchboard().index.fulltext().putDocument(idoc);
Switchboard.getSwitchboard().index.fulltext().putDocument(idoc); processed++;
processed++; }
} }
} catch (IOException ex) { } catch (IOException ex) {
Log.logException(ex); Log.logException(ex);
} finally { } finally {
@ -152,11 +157,15 @@ import org.apache.solr.common.SolrInputDocument;
} }
@Override @Override
public void terminate(final boolean waitFor) { public void terminate(final boolean waitFor) {
querylist.clear(); querylist.clear();
super.terminate(waitFor); // if interrupted without finished commit to reflect latest changes
} if (docstoreindex > 0 && processed > 0) {
esc.commit(true);
}
super.terminate(waitFor);
}
/** /**
* @return total number of processed documents * @return total number of processed documents
@ -168,7 +177,7 @@ import org.apache.solr.common.SolrInputDocument;
/** /**
* @return the currently processed Solr select query * @return the currently processed Solr select query
*/ */
public String getCurrentQuery() { public String getCurrentQuery() {
return querylist.isEmpty() ? "" : querylist.get(0); return querylist.isEmpty() ? "" : querylist.get(0);
} }
@ -186,6 +195,8 @@ import org.apache.solr.common.SolrInputDocument;
if (chunksize > 2) { if (chunksize > 2) {
this.chunksize = this.chunksize / 2; this.chunksize = this.chunksize / 2;
} }
esc.commit(true);
start = 0;
} }
} }

Loading…
Cancel
Save