diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 8f9833da4..b4a657672 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -520,7 +520,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
if (rel.length() == 0) rel = "nofollow"; else if (rel.indexOf("nofollow") < 0) rel += ",nofollow";
tag.opts.put("rel", rel);
}
- tag.opts.put("text", new String(tag.content.getChars()));
+ tag.opts.put("text", stripAllTags(tag.content.getChars())); // strip any inline html in tag text like " test "
tag.opts.put("href", url.toNormalform(true)); // we must assign this because the url may have resolved backpaths and may not be absolute
url.setAll(tag.opts);
recursiveParse(url, tag.content.getChars());
diff --git a/test/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnectorTest.java b/test/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnectorTest.java
index 5f6c0b66f..37b7899c0 100644
--- a/test/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnectorTest.java
+++ b/test/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnectorTest.java
@@ -5,8 +5,6 @@ import java.io.IOException;
import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema;
-import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.junit.After;
import org.junit.Before;
@@ -57,12 +55,8 @@ public class EmbeddedSolrConnectorTest {
System.out.println("query solr");
long expResult = 1;
- SolrDocumentList result;
- try {
- result = solr.getDocumentListByQuery(CollectionSchema.text_t.name() + ":tempor", 0, 10,"");
- assertEquals(expResult, result.getNumFound());
- } catch (final IOException ex) {
- fail("Solr query no result");
- }
+ long result = solr.getCountByQuery(CollectionSchema.text_t.name() + ":tempor");
+ System.out.println("found = " + result + " (expected = 1 )");
+ assertEquals(expResult, result);
}
}
diff --git a/test/net/yacy/document/parser/htmlParserTest.java b/test/net/yacy/document/parser/htmlParserTest.java
index 7df23d74e..9c0fafd93 100644
--- a/test/net/yacy/document/parser/htmlParserTest.java
+++ b/test/net/yacy/document/parser/htmlParserTest.java
@@ -5,11 +5,15 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
+import java.util.List;
+import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertTrue;
import junit.framework.TestCase;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.document.Document;
import net.yacy.document.Parser;
+import net.yacy.document.parser.html.ContentScraper;
+import static net.yacy.document.parser.htmlParser.parseToScraper;
import org.junit.Test;
public class htmlParserTest extends TestCase {
@@ -80,4 +84,35 @@ public class htmlParserTest extends TestCase {
}
}
+
+ /**
+ * Test of parseToScraper method, of class htmlParser.
+ */
+ @Test
+ public void testParseToScraper_4args() throws Exception {
+ // test link with inline html in text
+ // expectation to deliver pure text as it is possibly indexed in outboundlinks_anchortext_txt/inboundlinks_anchortext_txt
+ final AnchorURL url = new AnchorURL("http://localhost/");
+ final String mimetype = "text/html";
+ final String testhtml = ""
+ + "testtext" // "testtext"
+ + " Start" // "Start"
+ + "" // "" + image
+ + "