You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/test/net/yacy/document/parser/pdfParserTest.java

47 lines
1.5 KiB

package net.yacy.document.parser;
import java.io.File;
import java.io.FileInputStream;
import java.util.Collection;
import static junit.framework.TestCase.assertEquals;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.document.Document;
import org.junit.Test;
public class pdfParserTest {
/**
* Test extraction of links in parse method, of class pdfParser.
*/
@Test
public void testParse() throws Exception {
System.out.println("pdfParser.parse");
final String testFiles = "umlaute_linux.pdf";
final String mimetype = "application/pdf";
final String charset = null;
//final String resulttxt = "In München steht ein Hofbräuhaus. Dort gibt es Bier aus Maßkrügen.";
final String filename = "test/parsertest/" + testFiles;
final File file = new File(filename);
final AnchorURL url = new AnchorURL("http://localhost/" + filename);
System.out.println("parse file: " + filename);
pdfParser p = new pdfParser();
final Document[] docs = p.parse(url, mimetype, charset, null, new FileInputStream(file));
Document doc = docs[0];
int ilinks = doc.getAnchors().size();
assertEquals("number of links in pdf", 1, ilinks);
Collection<AnchorURL> links = doc.getAnchors();
System.out.println("number of links detected = " + ilinks);
for (AnchorURL aurl : links) {
System.out.println(" found: " + aurl.toString());
}
}
}