fix compile test

pull/690/head
Henschi 1 week ago
parent 50421715a1
commit 3d5ba29d01

@ -32,6 +32,7 @@ import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.data.WorkTables;
import net.yacy.document.parser.html.TagValency;
import net.yacy.kelondro.blob.ArrayStack;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowHandleSet;
@ -127,11 +128,12 @@ public class HostBalancerTest {
CrawlProfile.MATCH_NEVER_STRING, // indexUrlMustNotMatch
CrawlProfile.MATCH_ALL_STRING, // indexContentMustMatch
CrawlProfile.MATCH_NEVER_STRING, // indexContentMustNotMatch
false,
0, false, CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE),
-1, true, true, true, false, // crawlingQ, followFrames, obeyHtmlRobotsNoindex, obeyHtmlRobotsNofollow,
true, true, true, false, -1, false, true, CrawlProfile.MATCH_NEVER_STRING, CacheStrategy.IFEXIST,
"robot_" + CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT,
ClientIdentification.yacyIntranetCrawlerAgentName, null, null, 0);
ClientIdentification.yacyIntranetCrawlerAgentName, TagValency.EVAL, null, null, 0);
/** RobotsTxt instance */
private final RobotsTxt robots;

@ -149,7 +149,7 @@ public class ContentScraperTest {
+ "<time datetime='2016-12-23'>23. Dezember 2016</time>" // html5 time tag
+ "</body></html>";
final ContentScraper scraper = new ContentScraper(root, 10, new HashSet<String>(), new VocabularyScraper(), 0);
final ContentScraper scraper = new ContentScraper(root, 10, new HashSet<String>(), TagValency.IGNORE, new VocabularyScraper(), 0);
final Writer writer = new TransformerWriter(null, null, scraper, false);
FileUtils.copy(new StringReader(page), writer);
@ -425,7 +425,7 @@ public class ContentScraperTest {
html2Results.put(html, expectedUrls);
for (final Entry<String, String[]> html2Result : html2Results.entrySet()) {
final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet<String>(), new VocabularyScraper(), 0);
final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet<String>(), TagValency.EVAL, new VocabularyScraper(), 0);
try (final Writer writer = new TransformerWriter(null, null, scraper, false)) {
FileUtils.copy(new StringReader(html2Result.getKey()), writer);
@ -500,7 +500,7 @@ public class ContentScraperTest {
for (final Entry<String, String[]> html2Result : html2Results.entrySet()) {
final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet<String>(), new VocabularyScraper(), 0);
final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet<String>(), TagValency.EVAL, new VocabularyScraper(), 0);
try (final Writer writer = new TransformerWriter(null, null, scraper, false)) {
FileUtils.copy(new StringReader(html2Result.getKey()), writer);

@ -24,6 +24,7 @@ import net.yacy.document.Parser;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.document.parser.html.TagValency;
public class htmlParserTest extends TestCase {
@ -221,7 +222,7 @@ public class htmlParserTest extends TestCase {
testHtml.toString().getBytes(StandardCharsets.UTF_8));) {
final Set<String> ignore = new HashSet<>();
ignore.add("optional");
final Document[] docs = parser.parse(url, mimetype, null, ignore, new VocabularyScraper(), 0, sourceStream);
final Document[] docs = parser.parse(url, mimetype, null, TagValency.EVAL, ignore, new VocabularyScraper(), 0, sourceStream);
final Document doc = docs[0];
final String parsedDext = doc.getTextString();
@ -368,7 +369,7 @@ public class htmlParserTest extends TestCase {
+ "<figure><img width=\"550px\" title=\"image as exemple\" alt=\"image as exemple\" src=\"./img/my_image.png\"></figrue>" // + img width 550 (+html5 figure)
+ "</body></html>";
ContentScraper scraper = parseToScraper(url, charset, new HashSet<String>(), new VocabularyScraper(), 0, testhtml, 10, 10);
ContentScraper scraper = parseToScraper(url, charset, TagValency.IGNORE, new HashSet<String>(), new VocabularyScraper(), 0, testhtml, 10, 10);
List<AnchorURL> anchorlist = scraper.getAnchors();
String linktxt = anchorlist.get(0).getTextProperty();
@ -410,7 +411,7 @@ public class htmlParserTest extends TestCase {
}
testHtml.append("</p></body></html>");
ContentScraper scraper = parseToScraper(url, charset, new HashSet<String>(), new VocabularyScraper(), 0, testHtml.toString(), Integer.MAX_VALUE, Integer.MAX_VALUE);
ContentScraper scraper = parseToScraper(url, charset, TagValency.IGNORE, new HashSet<String>(), new VocabularyScraper(), 0, testHtml.toString(), Integer.MAX_VALUE, Integer.MAX_VALUE);
assertEquals(nestingDepth, scraper.getAnchors().size());
assertEquals(1, scraper.getImages().size());
@ -431,7 +432,7 @@ public class htmlParserTest extends TestCase {
+ "<p>" + textSource + "</p>"
+ "</body></html>";
ContentScraper scraper = parseToScraper(url, charset, new HashSet<String>(), new VocabularyScraper(), 0, testhtml, 10, 10);
ContentScraper scraper = parseToScraper(url, charset, TagValency.IGNORE, new HashSet<String>(), new VocabularyScraper(), 0, testhtml, 10, 10);
String txt = scraper.getText();
System.out.println("ScraperTagTest: [" + textSource + "] = [" + txt + "]");
@ -460,7 +461,7 @@ public class htmlParserTest extends TestCase {
+ "</head>\n"
+ "<body>" + textSource + "</body>\n"
+ "</html>";
ContentScraper scraper = parseToScraper(url, charset, new HashSet<String>(), new VocabularyScraper(), 0, testhtml, 10, 10);
ContentScraper scraper = parseToScraper(url, charset, TagValency.IGNORE, new HashSet<String>(), new VocabularyScraper(), 0, testhtml, 10, 10);
String txt = scraper.getText();
System.out.println("ScraperScriptTagTest: [" + textSource + "] = [" + txt + "]");

Loading…
Cancel
Save