sixcooler 9 years ago
commit c9da652249

@ -123,4 +123,6 @@ ul.nav li.dropdown:hover ul.dropdown-menu{
.popover {
z-index: 1600;
}
.col-md-9 {
overflow-x: hidden;
}

@ -216,11 +216,14 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
clearCaches(); // prevent further OOM if this was caused by OOM
ConcurrentLog.logException(e);
// catches "version conflict for": try this again and delete the document in advance
/*
// with possible partial update docs, don't try to delete index doc and reinsert solrdoc
// as this would result in a index doc with just the updated fields
try {
this.server.deleteById((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
} catch (final SolrServerException e1) {
ConcurrentLog.logException(e1);
}
}*/
try {
this.server.add(solrdoc, -1);
} catch (final Throwable ee) {
@ -255,13 +258,17 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
clearCaches(); // prevent further OOM if this was caused by OOM
ConcurrentLog.logException(e);
// catches "version conflict for": try this again and delete the document in advance
/*
// with possible partial update docs, don't try to delete index doc and reinsert solrdoc
// as this would result in a index doc with just the updated fields
List<String> ids = new ArrayList<String>();
for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
try {
this.server.deleteById(ids);
} catch (final SolrServerException e1) {
ConcurrentLog.logException(e1);
}
}*/
try {
this.server.commit();
} catch (final Throwable eee) {
@ -272,6 +279,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
this.server.add(solrdocs, -1);
} catch (final Throwable ee) {
ConcurrentLog.logException(ee);
List<String> ids = new ArrayList<String>();
for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
log.warn(e.getMessage() + " IDs=" + ids.toString());
throw new IOException(ee);
}

@ -23,6 +23,7 @@
*/
package net.yacy.crawler;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Set;
@ -54,7 +55,7 @@ public class RecrawlBusyThread extends AbstractBusyThread {
private int chunkstart = 0;
private int chunksize = 200;
final Switchboard sb;
private Set<DigestURL> urlstack; // buffer of urls to recrawl
private final Set<DigestURL> urlstack; // buffer of urls to recrawl
public long urlsfound = 0;
public RecrawlBusyThread(Switchboard xsb) {
@ -181,6 +182,12 @@ public class RecrawlBusyThread extends AbstractBusyThread {
try {
this.urlstack.add(new DigestURL((String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName())));
} catch (MalformedURLException ex) {
try { // if index entry hasn't a valid url (useless), delete it
solrConnector.deleteById((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
ConcurrentLog.severe(THREAD_NAME, "deleted index document with invalid url " + (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
} catch (IOException ex1) {
ConcurrentLog.severe(THREAD_NAME, ex1.getMessage());
}
}
}
this.chunkstart = this.chunkstart + this.chunksize;

@ -777,7 +777,7 @@ dc_rights
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
final Writer osw = new OutputStreamWriter(baos, "UTF-8");
writeXML(osw, new Date());
writeXML(osw, this.lastModified);
osw.close();
return UTF8.String(baos.toByteArray());
} catch (final UnsupportedEncodingException e1) {

@ -2480,12 +2480,22 @@ public final class Switchboard extends serverSwitch {
if (!minimum_load_fullfilled) log.info("postprocessing deactivated: too high load (" + Memory.load() + ") > " + getConfigFloat("postprocessing.maximum_load", 0) + ", to force change field postprocessing.maximum_load");
boolean postprocessing = process_key_exist && reference_index_exist && minimum_ram_fullfilled && minimum_load_fullfilled;
if (!postprocessing) log.info("postprocessing deactivated: constraints violated");
// Hack to prevent Solr problem on partial update if target document contains multivalued date field
// regardless if this field is part of the update it causes a org.apache.solr.common.SolrException: Invalid Date String Exception.
// 2015-09-12 Solr v5.2.1 & v5.3
// this hack switches partial update off (if multivalued datefield _dts exists, like: dates_in_content_dts startDates_dts endDates_dts)
boolean partialUpdate = getConfigBool("postprocessing.partialUpdate", true);
for (String sf : index.fulltext().getDefaultConfiguration().keySet()) {
if (sf.endsWith("_dts")) {
partialUpdate = false;
}
}
if (allCrawlsFinished) {
if (postprocessing) {
// run postprocessing on all profiles
ReferenceReportCache rrCache = index.getReferenceReportCache();
proccount += collection1Configuration.postprocessing(index, rrCache, null, getConfigBool("postprocessing.partialUpdate", true));
proccount += collection1Configuration.postprocessing(index, rrCache, null, partialUpdate);
this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring
}
this.crawler.cleanProfiles(this.crawler.getActiveProfiles());
@ -2498,7 +2508,7 @@ public final class Switchboard extends serverSwitch {
if (postprocessing) {
// run postprocessing on these profiles
ReferenceReportCache rrCache = index.getReferenceReportCache();
for (String profileHash: deletionCandidates) proccount += collection1Configuration.postprocessing(index, rrCache, profileHash, getConfigBool("postprocessing.partialUpdate", true));
for (String profileHash: deletionCandidates) proccount += collection1Configuration.postprocessing(index, rrCache, profileHash, partialUpdate);
this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring
}
this.crawler.cleanProfiles(deletionCandidates);

@ -2,24 +2,34 @@ package net.yacy.cora.federate.solr.connector;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.junit.After;
import org.junit.Before;
import org.junit.AfterClass;
import org.junit.Test;
import static org.junit.Assert.*;
import org.junit.BeforeClass;
public class EmbeddedSolrConnectorTest {
EmbeddedSolrConnector solr;
static EmbeddedSolrConnector solr;
public EmbeddedSolrConnectorTest() {
}
@Before
public void setUp() {
/**
* init for all test cases (via BeforeClass annotation),
* for the expensive creating or loading of index
*/
@BeforeClass
public static void initTesting() {
File solr_config = new File("defaults/solr");
File storage = new File("test/DATA/INDEX/webportal/SEGMENTS/text/solr/");
storage.mkdirs();
@ -27,14 +37,14 @@ public class EmbeddedSolrConnectorTest {
try {
EmbeddedInstance localCollectionInstance = new EmbeddedInstance(solr_config, storage, CollectionSchema.CORE_NAME, new String[]{CollectionSchema.CORE_NAME, WebgraphSchema.CORE_NAME});
solr = new EmbeddedSolrConnector(localCollectionInstance);
solr.clear(); // delete all documents in index (for clean testing)
} catch (final IOException ex) {
fail("IOException starting Jetty");
}
}
@After
public void tearDown() {
@AfterClass
public static void finalizeTesting() {
solr.close();
}
@ -45,18 +55,119 @@ public class EmbeddedSolrConnectorTest {
public void testQuery() throws IOException {
System.out.println("adding test document to solr");
SolrInputDocument doc = new SolrInputDocument();
doc.addField(CollectionSchema.id.name(), "ABCD0000abcd");
String id = Long.toString(System.currentTimeMillis());
doc.addField(CollectionSchema.id.name(), id);
doc.addField(CollectionSchema.title.name(), "Lorem ipsum");
doc.addField(CollectionSchema.host_s.name(), "yacy.net");
doc.addField(CollectionSchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
// mixing in the id as unique word
doc.addField(CollectionSchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, x"+id+ " sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
solr.add(doc);
solr.commit(true);
System.out.println("query solr");
long expResult = 1;
long result = solr.getCountByQuery(CollectionSchema.text_t.name() + ":tempor");
long result = solr.getCountByQuery(CollectionSchema.text_t.name() + ":x" + id);
System.out.println("found = " + result + " (expected = 1 )");
assertEquals(expResult, result);
}
/**
* Test of update (partial update)
*/
@Test
public void testUdate() throws IOException {
SolrInputDocument doc = new SolrInputDocument();
String id = Long.toString(System.currentTimeMillis());
System.out.println("testUpdate: adding test document to solr ID=" + id);
doc.addField(CollectionSchema.id.name(), id);
doc.addField(CollectionSchema.title.name(), "Lorem ipsum");
doc.addField(CollectionSchema.host_s.name(), "yacy.net");
doc.addField(CollectionSchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
solr.add(doc);
solr.commit(true);
System.out.println("testUpdate: update one document ID=" + id);
HashSet<String> fieldnames = new HashSet<String>();
fieldnames.addAll(doc.getFieldNames());
SolrInputDocument sid = new SolrInputDocument();
sid.addField(CollectionSchema.id.name(), doc.getFieldValue(CollectionSchema.id.name()));
sid.addField(CollectionSchema.host_s.name(), "yacy.yacyh");
solr.update(sid);
solr.commit(true);
long expResult = 1;
SolrDocumentList sl = solr.getDocumentListByQuery(CollectionSchema.host_s.name()+":yacy.yacyh",null,0,10);
assertTrue(sl.size() >= expResult);
System.out.println("testUpdate: verify update of document ID=" + id);
String foundid = null;
for (SolrDocument rdoc : sl) {
foundid = (String) rdoc.getFieldValue("id");
if (id.equals(foundid)) {
HashSet<String> newfieldnames = new HashSet<String>();
newfieldnames.addAll(rdoc.getFieldNames());
assertTrue(newfieldnames.containsAll(fieldnames));
break;
}
}
assertEquals(id, foundid);
}
/**
* Test for partial update for document containing a multivalued date field
* this is a Solr issue (2015-09-12)
* the test case is just to demonstrate the effect on YaCy (currently catching the solr exception and reinserting a document with fields missing)
*/
@Test
public void testUdate_withMultivaluedDateField() throws SolrException, IOException {
SolrInputDocument doc = new SolrInputDocument();
String id = Long.toString(System.currentTimeMillis());
System.out.println("testUpdate: adding test document to solr ID=" + id);
doc.addField(CollectionSchema.id.name(), id);
doc.addField(CollectionSchema.title.name(), "Lorem ipsum");
doc.addField(CollectionSchema.host_s.name(), "yacy.net");
doc.addField(CollectionSchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
doc.addField(CollectionSchema.dates_in_content_dts.name(), new Date());
solr.add(doc);
solr.commit(true);
System.out.println("testUpdate: update one document ID=" + id);
HashSet<String> fieldnames = new HashSet<String>();
fieldnames.addAll(doc.getFieldNames());
SolrInputDocument sid = new SolrInputDocument();
sid.addField(CollectionSchema.id.name(), doc.getFieldValue(CollectionSchema.id.name()));
sid.addField(CollectionSchema.host_s.name(), "yacy.yacy");
solr.update(sid);
solr.commit(true);
long expResult = 1;
SolrDocumentList sl = solr.getDocumentListByQuery(CollectionSchema.host_s.name()+":yacy.yacy",null,0,10);
assertTrue(sl.size() >= expResult);
System.out.println("testUpdate: verify update of document ID=" + id);
String foundid = null;
for (SolrDocument rdoc : sl) {
foundid = (String) rdoc.getFieldValue("id");
if (id.equals(foundid)) {
HashSet<String> newfieldnames = new HashSet<String>();
newfieldnames.addAll(rdoc.getFieldNames());
if (!newfieldnames.containsAll(fieldnames)) {
System.err.println("!!!++++++++++++++++++++++++++++++++++++!!!");
System.err.println("fields in original document: "+fieldnames.toString());
System.err.println("fields after partial update: "+newfieldnames.toString());
System.err.println("!!!++++++++++++++++++++++++++++++++++++!!!");
}
assertTrue (newfieldnames.containsAll(fieldnames));
break;
}
}
assertEquals(id, foundid);
}
}

@ -43,7 +43,7 @@ public class TextSnippetTest {
}
@Test
public void testTextSnippet() {
public void testTextSnippet() throws MalformedURLException {
URIMetadataNode testpage = new URIMetadataNode(doc);
testpage.addField(CollectionSchema.title.name(), "New test case");
@ -83,7 +83,7 @@ public class TextSnippetTest {
* Test of getLineMarked method, of class TextSnippet.
*/
@Test
public void testGetLineMarked() {
public void testGetLineMarked() throws MalformedURLException {
URIMetadataNode testpage = new URIMetadataNode(doc);
testpage.addField(CollectionSchema.title.name(), "New test case");
testpage.addField(CollectionSchema.keywords.name(), "junit");

Loading…
Cancel
Save