From e594130aecdc02fce49d69c97489cdc7d84bec1b Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 13 Sep 2015 06:02:07 +0200 Subject: [PATCH] add test case for partial update - to discover effect on YaCy for update of documents with multivalued date fields (like dates_in_content_dts) current result: loss of fields/information in index document, see EmbeddedSolrConnectorTest.testUdate_withMultivaluedDateField() --- .../connector/EmbeddedSolrConnectorTest.java | 133 ++++++++++++++++-- 1 file changed, 122 insertions(+), 11 deletions(-) diff --git a/test/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnectorTest.java b/test/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnectorTest.java index 37b7899c0..78c49bf4d 100644 --- a/test/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnectorTest.java +++ b/test/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnectorTest.java @@ -2,24 +2,34 @@ package net.yacy.cora.federate.solr.connector; import java.io.File; import java.io.IOException; +import java.util.Date; +import java.util.HashSet; import net.yacy.cora.federate.solr.instance.EmbeddedInstance; import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.WebgraphSchema; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; -import org.junit.After; -import org.junit.Before; +import org.junit.AfterClass; import org.junit.Test; import static org.junit.Assert.*; +import org.junit.BeforeClass; + public class EmbeddedSolrConnectorTest { - EmbeddedSolrConnector solr; + static EmbeddedSolrConnector solr; public EmbeddedSolrConnectorTest() { } - @Before - public void setUp() { + /** + * init for all test cases (via BeforeClass annotation), + * for the expensive creating or loading of index + */ + @BeforeClass + public static void initTesting() { File solr_config = new File("defaults/solr"); File storage = new File("test/DATA/INDEX/webportal/SEGMENTS/text/solr/"); storage.mkdirs(); @@ -27,14 +37,14 @@ public class EmbeddedSolrConnectorTest { try { EmbeddedInstance localCollectionInstance = new EmbeddedInstance(solr_config, storage, CollectionSchema.CORE_NAME, new String[]{CollectionSchema.CORE_NAME, WebgraphSchema.CORE_NAME}); solr = new EmbeddedSolrConnector(localCollectionInstance); - + solr.clear(); // delete all documents in index (for clean testing) } catch (final IOException ex) { fail("IOException starting Jetty"); } } - @After - public void tearDown() { + @AfterClass + public static void finalizeTesting() { solr.close(); } @@ -45,18 +55,119 @@ public class EmbeddedSolrConnectorTest { public void testQuery() throws IOException { System.out.println("adding test document to solr"); SolrInputDocument doc = new SolrInputDocument(); - doc.addField(CollectionSchema.id.name(), "ABCD0000abcd"); + String id = Long.toString(System.currentTimeMillis()); + doc.addField(CollectionSchema.id.name(), id); doc.addField(CollectionSchema.title.name(), "Lorem ipsum"); doc.addField(CollectionSchema.host_s.name(), "yacy.net"); - doc.addField(CollectionSchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."); + // mixing in the id as unique word + doc.addField(CollectionSchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, x"+id+ " sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."); solr.add(doc); solr.commit(true); System.out.println("query solr"); long expResult = 1; - long result = solr.getCountByQuery(CollectionSchema.text_t.name() + ":tempor"); + long result = solr.getCountByQuery(CollectionSchema.text_t.name() + ":x" + id); System.out.println("found = " + result + " (expected = 1 )"); assertEquals(expResult, result); } + + /** + * Test of update (partial update) + */ + @Test + public void testUdate() throws IOException { + SolrInputDocument doc = new SolrInputDocument(); + String id = Long.toString(System.currentTimeMillis()); + System.out.println("testUpdate: adding test document to solr ID=" + id); + doc.addField(CollectionSchema.id.name(), id); + doc.addField(CollectionSchema.title.name(), "Lorem ipsum"); + doc.addField(CollectionSchema.host_s.name(), "yacy.net"); + doc.addField(CollectionSchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."); + + solr.add(doc); + solr.commit(true); + + System.out.println("testUpdate: update one document ID=" + id); + + HashSet fieldnames = new HashSet(); + fieldnames.addAll(doc.getFieldNames()); + + SolrInputDocument sid = new SolrInputDocument(); + sid.addField(CollectionSchema.id.name(), doc.getFieldValue(CollectionSchema.id.name())); + sid.addField(CollectionSchema.host_s.name(), "yacy.yacyh"); + solr.update(sid); + solr.commit(true); + + long expResult = 1; + SolrDocumentList sl = solr.getDocumentListByQuery(CollectionSchema.host_s.name()+":yacy.yacyh",null,0,10); + assertTrue(sl.size() >= expResult); + + System.out.println("testUpdate: verify update of document ID=" + id); + String foundid = null; + for (SolrDocument rdoc : sl) { + foundid = (String) rdoc.getFieldValue("id"); + if (id.equals(foundid)) { + HashSet newfieldnames = new HashSet(); + newfieldnames.addAll(rdoc.getFieldNames()); + assertTrue(newfieldnames.containsAll(fieldnames)); + break; + } + } + assertEquals(id, foundid); + } + + /** + * Test for partial update for document containing a multivalued date field + * this is a Solr issue (2015-09-12) + * the test case is just to demonstrate the effect on YaCy (currently catching the solr exception and reinserting a document with fields missing) + */ + @Test + public void testUdate_withMultivaluedDateField() throws SolrException, IOException { + SolrInputDocument doc = new SolrInputDocument(); + String id = Long.toString(System.currentTimeMillis()); + System.out.println("testUpdate: adding test document to solr ID=" + id); + doc.addField(CollectionSchema.id.name(), id); + doc.addField(CollectionSchema.title.name(), "Lorem ipsum"); + doc.addField(CollectionSchema.host_s.name(), "yacy.net"); + doc.addField(CollectionSchema.text_t.name(), "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."); + doc.addField(CollectionSchema.dates_in_content_dts.name(), new Date()); + + solr.add(doc); + solr.commit(true); + + System.out.println("testUpdate: update one document ID=" + id); + + HashSet fieldnames = new HashSet(); + fieldnames.addAll(doc.getFieldNames()); + + SolrInputDocument sid = new SolrInputDocument(); + sid.addField(CollectionSchema.id.name(), doc.getFieldValue(CollectionSchema.id.name())); + sid.addField(CollectionSchema.host_s.name(), "yacy.yacy"); + solr.update(sid); + solr.commit(true); + + long expResult = 1; + SolrDocumentList sl = solr.getDocumentListByQuery(CollectionSchema.host_s.name()+":yacy.yacy",null,0,10); + assertTrue(sl.size() >= expResult); + + System.out.println("testUpdate: verify update of document ID=" + id); + String foundid = null; + for (SolrDocument rdoc : sl) { + foundid = (String) rdoc.getFieldValue("id"); + if (id.equals(foundid)) { + HashSet newfieldnames = new HashSet(); + newfieldnames.addAll(rdoc.getFieldNames()); + if (!newfieldnames.containsAll(fieldnames)) { + System.err.println("!!!++++++++++++++++++++++++++++++++++++!!!"); + System.err.println("fields in original document: "+fieldnames.toString()); + System.err.println("fields after partial update: "+newfieldnames.toString()); + System.err.println("!!!++++++++++++++++++++++++++++++++++++!!!"); + } + assertTrue (newfieldnames.containsAll(fieldnames)); + break; + } + } + assertEquals(id, foundid); + } }