diff --git a/test/java/net/yacy/crawler/HostBalancerTest.java b/test/java/net/yacy/crawler/HostBalancerTest.java index 429cd0cba..c8590eb09 100644 --- a/test/java/net/yacy/crawler/HostBalancerTest.java +++ b/test/java/net/yacy/crawler/HostBalancerTest.java @@ -32,6 +32,7 @@ import net.yacy.crawler.data.CrawlProfile; import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.robots.RobotsTxt; import net.yacy.data.WorkTables; +import net.yacy.document.parser.html.TagValency; import net.yacy.kelondro.blob.ArrayStack; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.RowHandleSet; @@ -127,11 +128,12 @@ public class HostBalancerTest { CrawlProfile.MATCH_NEVER_STRING, // indexUrlMustNotMatch CrawlProfile.MATCH_ALL_STRING, // indexContentMustMatch CrawlProfile.MATCH_NEVER_STRING, // indexContentMustNotMatch + false, 0, false, CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, false, // crawlingQ, followFrames, obeyHtmlRobotsNoindex, obeyHtmlRobotsNofollow, true, true, true, false, -1, false, true, CrawlProfile.MATCH_NEVER_STRING, CacheStrategy.IFEXIST, "robot_" + CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, - ClientIdentification.yacyIntranetCrawlerAgentName, null, null, 0); + ClientIdentification.yacyIntranetCrawlerAgentName, TagValency.EVAL, null, null, 0); /** RobotsTxt instance */ private final RobotsTxt robots; diff --git a/test/java/net/yacy/document/parser/html/ContentScraperTest.java b/test/java/net/yacy/document/parser/html/ContentScraperTest.java index 92ecfb072..56af19693 100644 --- a/test/java/net/yacy/document/parser/html/ContentScraperTest.java +++ b/test/java/net/yacy/document/parser/html/ContentScraperTest.java @@ -149,7 +149,7 @@ public class ContentScraperTest { + "" // html5 time tag + ""; - final ContentScraper scraper = new ContentScraper(root, 10, new HashSet(), new VocabularyScraper(), 0); + final ContentScraper scraper = new ContentScraper(root, 10, new HashSet(), TagValency.IGNORE, new VocabularyScraper(), 0); final Writer writer = new TransformerWriter(null, null, scraper, false); FileUtils.copy(new StringReader(page), writer); @@ -425,7 +425,7 @@ public class ContentScraperTest { html2Results.put(html, expectedUrls); for (final Entry html2Result : html2Results.entrySet()) { - final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet(), new VocabularyScraper(), 0); + final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet(), TagValency.EVAL, new VocabularyScraper(), 0); try (final Writer writer = new TransformerWriter(null, null, scraper, false)) { FileUtils.copy(new StringReader(html2Result.getKey()), writer); @@ -500,7 +500,7 @@ public class ContentScraperTest { for (final Entry html2Result : html2Results.entrySet()) { - final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet(), new VocabularyScraper(), 0); + final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet(), TagValency.EVAL, new VocabularyScraper(), 0); try (final Writer writer = new TransformerWriter(null, null, scraper, false)) { FileUtils.copy(new StringReader(html2Result.getKey()), writer); diff --git a/test/java/net/yacy/document/parser/htmlParserTest.java b/test/java/net/yacy/document/parser/htmlParserTest.java index 5c4b62c28..d2a113c88 100644 --- a/test/java/net/yacy/document/parser/htmlParserTest.java +++ b/test/java/net/yacy/document/parser/htmlParserTest.java @@ -24,6 +24,7 @@ import net.yacy.document.Parser; import net.yacy.document.VocabularyScraper; import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.ImageEntry; +import net.yacy.document.parser.html.TagValency; public class htmlParserTest extends TestCase { @@ -221,7 +222,7 @@ public class htmlParserTest extends TestCase { testHtml.toString().getBytes(StandardCharsets.UTF_8));) { final Set ignore = new HashSet<>(); ignore.add("optional"); - final Document[] docs = parser.parse(url, mimetype, null, ignore, new VocabularyScraper(), 0, sourceStream); + final Document[] docs = parser.parse(url, mimetype, null, TagValency.EVAL, ignore, new VocabularyScraper(), 0, sourceStream); final Document doc = docs[0]; final String parsedDext = doc.getTextString(); @@ -368,7 +369,7 @@ public class htmlParserTest extends TestCase { + "
\"image" // + img width 550 (+html5 figure) + ""; - ContentScraper scraper = parseToScraper(url, charset, new HashSet(), new VocabularyScraper(), 0, testhtml, 10, 10); + ContentScraper scraper = parseToScraper(url, charset, TagValency.IGNORE, new HashSet(), new VocabularyScraper(), 0, testhtml, 10, 10); List anchorlist = scraper.getAnchors(); String linktxt = anchorlist.get(0).getTextProperty(); @@ -410,7 +411,7 @@ public class htmlParserTest extends TestCase { } testHtml.append("

"); - ContentScraper scraper = parseToScraper(url, charset, new HashSet(), new VocabularyScraper(), 0, testHtml.toString(), Integer.MAX_VALUE, Integer.MAX_VALUE); + ContentScraper scraper = parseToScraper(url, charset, TagValency.IGNORE, new HashSet(), new VocabularyScraper(), 0, testHtml.toString(), Integer.MAX_VALUE, Integer.MAX_VALUE); assertEquals(nestingDepth, scraper.getAnchors().size()); assertEquals(1, scraper.getImages().size()); @@ -431,7 +432,7 @@ public class htmlParserTest extends TestCase { + "

" + textSource + "

" + ""; - ContentScraper scraper = parseToScraper(url, charset, new HashSet(), new VocabularyScraper(), 0, testhtml, 10, 10); + ContentScraper scraper = parseToScraper(url, charset, TagValency.IGNORE, new HashSet(), new VocabularyScraper(), 0, testhtml, 10, 10); String txt = scraper.getText(); System.out.println("ScraperTagTest: [" + textSource + "] = [" + txt + "]"); @@ -460,7 +461,7 @@ public class htmlParserTest extends TestCase { + "\n" + "" + textSource + "\n" + ""; - ContentScraper scraper = parseToScraper(url, charset, new HashSet(), new VocabularyScraper(), 0, testhtml, 10, 10); + ContentScraper scraper = parseToScraper(url, charset, TagValency.IGNORE, new HashSet(), new VocabularyScraper(), 0, testhtml, 10, 10); String txt = scraper.getText(); System.out.println("ScraperScriptTagTest: [" + textSource + "] = [" + txt + "]");