From d2cc11ea8f679a47343cc7d723c02e2b73d07360 Mon Sep 17 00:00:00 2001 From: reger Date: Sat, 19 Sep 2015 05:30:55 +0200 Subject: [PATCH] fix html parser taking " + + "" + + "

" + textSource + "

" + + ""; + + ContentScraper scraper = parseToScraper(url, mimetype, new VocabularyScraper(), 0, testhtml, 10); + + String txt = scraper.getText(); + System.out.println("ScraperTagTest: [" + textSource + "] = [" + txt + "]"); + assertEquals(txt, textSource); + } }