From 77851fa53c6ad886b6116bdc4e7367bad25b2f75 Mon Sep 17 00:00:00 2001 From: reger Date: Wed, 11 Feb 2015 01:43:02 +0100 Subject: [PATCH] fix parser test cases (Vocabulary paramete) --- test/net/yacy/document/ParserTest.java | 10 +++++----- test/net/yacy/document/parser/htmlParserTest.java | 5 +++-- .../document/parser/images/genericImageParserTest.java | 2 +- .../parser/images/metadataImageParserTest.java | 2 +- test/net/yacy/document/parser/pdfParserTest.java | 2 +- test/net/yacy/search/snippet/TextSnippetTest.java | 1 + 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/test/net/yacy/document/ParserTest.java b/test/net/yacy/document/ParserTest.java index 1b49125d2..22004863c 100644 --- a/test/net/yacy/document/ParserTest.java +++ b/test/net/yacy/document/ParserTest.java @@ -36,7 +36,7 @@ public class ParserTest { final AnchorURL url = new AnchorURL("http://localhost/"+filename); AbstractParser p = new ooxmlParser(); - final Document[] docs = p.parse(url, mimetype, null, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, null, null, new FileInputStream(file)); for (final Document doc: docs) { final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset()); final StringBuilder str = new StringBuilder(); @@ -70,7 +70,7 @@ public class ParserTest { final AnchorURL url = new AnchorURL("http://localhost/"+filename); AbstractParser p = new odtParser(); - final Document[] docs = p.parse(url, mimetype, null, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, null, null, new FileInputStream(file)); for (final Document doc: docs) { final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset()); final StringBuilder str = new StringBuilder(); @@ -102,7 +102,7 @@ public class ParserTest { final AnchorURL url = new AnchorURL("http://localhost/"+filename); AbstractParser p = new pdfParser(); - final Document[] docs = p.parse(url, mimetype, null, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, null, null, new FileInputStream(file)); for (final Document doc: docs) { final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset()); final StringBuilder str = new StringBuilder(); @@ -134,7 +134,7 @@ public class ParserTest { final AnchorURL url = new AnchorURL("http://localhost/"+filename); AbstractParser p = new docParser(); - final Document[] docs = p.parse(url, mimetype, null, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, null, null, new FileInputStream(file)); for (final Document doc: docs) { final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset()); final StringBuilder str = new StringBuilder(); @@ -172,7 +172,7 @@ public class ParserTest { final AnchorURL url = new AnchorURL("http://localhost/" + filename); AbstractParser p = new pptParser(); - final Document[] docs = p.parse(url, mimetype, null, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, null, null, new FileInputStream(file)); for (final Document doc : docs) { final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset()); final StringBuilder str = new StringBuilder(); diff --git a/test/net/yacy/document/parser/htmlParserTest.java b/test/net/yacy/document/parser/htmlParserTest.java index 67c1034ba..32626934a 100644 --- a/test/net/yacy/document/parser/htmlParserTest.java +++ b/test/net/yacy/document/parser/htmlParserTest.java @@ -10,6 +10,7 @@ import junit.framework.TestCase; import net.yacy.cora.document.id.AnchorURL; import net.yacy.document.Document; import net.yacy.document.Parser; +import net.yacy.document.VocabularyScraper; import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.ImageEntry; import static net.yacy.document.parser.htmlParser.parseToScraper; @@ -74,7 +75,7 @@ public class htmlParserTest extends TestCase { System.out.println("parse file: " + filename); htmlParser p = new htmlParser(); - final Document[] docs = p.parse(url, mimetype, null, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, null, null, new FileInputStream(file)); Document doc = docs[0]; String txt = doc.getCharset(); @@ -100,7 +101,7 @@ public class htmlParserTest extends TestCase { + "
\"image" // + img width 550 (+html5 figure) + ""; - ContentScraper scraper = parseToScraper(url, mimetype, testhtml, 10); + ContentScraper scraper = parseToScraper(url, mimetype, new VocabularyScraper(), testhtml, 10); List anchorlist = scraper.getAnchors(); String linktxt = anchorlist.get(0).getTextProperty(); diff --git a/test/net/yacy/document/parser/images/genericImageParserTest.java b/test/net/yacy/document/parser/images/genericImageParserTest.java index 34532b3f7..d2b3c0fe0 100644 --- a/test/net/yacy/document/parser/images/genericImageParserTest.java +++ b/test/net/yacy/document/parser/images/genericImageParserTest.java @@ -28,7 +28,7 @@ public class genericImageParserTest { System.out.println("parse file: " + filename); genericImageParser p = new genericImageParser(); - final Document[] docs = p.parse(url, mimetype, charset, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, charset, null, new FileInputStream(file)); Document doc = docs[0]; assertEquals("YaCy Logo",doc.dc_title()); diff --git a/test/net/yacy/document/parser/images/metadataImageParserTest.java b/test/net/yacy/document/parser/images/metadataImageParserTest.java index 9dec63219..194900104 100644 --- a/test/net/yacy/document/parser/images/metadataImageParserTest.java +++ b/test/net/yacy/document/parser/images/metadataImageParserTest.java @@ -29,7 +29,7 @@ public class metadataImageParserTest { System.out.println("parse file: " + filename); metadataImageParser p = new metadataImageParser(); - final Document[] docs = p.parse(url, mimetype, charset, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, charset, null, new FileInputStream(file)); Document doc = docs[0]; assertEquals("YaCy Logo",doc.dc_title()); diff --git a/test/net/yacy/document/parser/pdfParserTest.java b/test/net/yacy/document/parser/pdfParserTest.java index e2c419035..d7934b73b 100644 --- a/test/net/yacy/document/parser/pdfParserTest.java +++ b/test/net/yacy/document/parser/pdfParserTest.java @@ -29,7 +29,7 @@ public class pdfParserTest { System.out.println("parse file: " + filename); pdfParser p = new pdfParser(); - final Document[] docs = p.parse(url, mimetype, charset, new FileInputStream(file)); + final Document[] docs = p.parse(url, mimetype, charset, null, new FileInputStream(file)); Document doc = docs[0]; int ilinks = doc.getAnchors().size(); diff --git a/test/net/yacy/search/snippet/TextSnippetTest.java b/test/net/yacy/search/snippet/TextSnippetTest.java index d8392c4db..bd93f58c3 100644 --- a/test/net/yacy/search/snippet/TextSnippetTest.java +++ b/test/net/yacy/search/snippet/TextSnippetTest.java @@ -5,6 +5,7 @@ import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.storage.HandleSet; +import net.yacy.cora.util.CommonPattern; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.search.query.QueryGoal; import net.yacy.search.schema.CollectionSchema;