fixed wrong parsing of style and script

pull/1/head
Michael Peter Christen 13 years ago
parent 22d5e33c5e
commit be928815fc

@ -99,7 +99,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
strong(TagType.pair), strong(TagType.pair),
i(TagType.pair), i(TagType.pair),
li(TagType.pair), li(TagType.pair),
script(TagType.pair); script(TagType.pair),
style(TagType.pair);
public TagType type; public TagType type;
private Tag(final TagType type) { private Tag(final TagType type) {
@ -201,6 +202,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
@Override @Override
public void scrapeText(final char[] newtext, final String insideTag) { public void scrapeText(final char[] newtext, final String insideTag) {
// System.out.println("SCRAPE: " + UTF8.String(newtext)); // System.out.println("SCRAPE: " + UTF8.String(newtext));
if (insideTag != null && ("script".equals(insideTag) || "style".equals(insideTag))) return;
int p, pl, q, s = 0; int p, pl, q, s = 0;
// match evaluation pattern // match evaluation pattern
@ -434,7 +436,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
} }
@Override @Override
public void scrapeTag1(final String tagname, final Properties tagopts, final char[] text) { public void scrapeTag1(final String tagname, final Properties tagopts, char[] text) {
// System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + UTF8.String(text)); // System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + UTF8.String(text));
if (tagname.equalsIgnoreCase("a") && text.length < 2048) { if (tagname.equalsIgnoreCase("a") && text.length < 2048) {
final String href = tagopts.getProperty("href", EMPTY_STRING); final String href = tagopts.getProperty("href", EMPTY_STRING);

@ -202,7 +202,7 @@ public final class TransformerWriter extends Writer {
if (tag == null) { if (tag == null) {
// case (1): this is not a tag opener/closer // case (1): this is not a tag opener/closer
if (this.scraper != null) this.scraper.scrapeText(content, null); if (this.scraper != null && content.length > 0) this.scraper.scrapeText(content, null);
if (this.transformer != null) return this.transformer.transformText(content); if (this.transformer != null) return this.transformer.transformText(content);
return content; return content;
} }
@ -222,7 +222,9 @@ public final class TransformerWriter extends Writer {
// we are collection tag text for the tag 'filterTag' -> case (4) - (7) // we are collection tag text for the tag 'filterTag' -> case (4) - (7)
if (tag == null || tag.equals("!")) { if (tag == null || tag.equals("!")) {
// case (4): getting no tag, go on collecting content // case (4): getting no tag, go on collecting content
if (this.scraper != null) this.scraper.scrapeText(content, this.filterTag); if (this.scraper != null) {
this.scraper.scrapeText(content, this.filterTag);
}
if (this.transformer != null) { if (this.transformer != null) {
this.filterCont.append(this.transformer.transformText(content)); this.filterCont.append(this.transformer.transformText(content));
} else { } else {
@ -330,7 +332,7 @@ public final class TransformerWriter extends Writer {
if (in[1] == '/') { if (in[1] == '/') {
// a closing tag // a closing tag
tagend = tagEnd(in, 2); tagend = tagEnd(in, 2);
tag = new String(in, 2, tagend - 2); tag = new String(in, 2, tagend - 2).toLowerCase();
final char[] text = new char[in.length - tagend - 1]; final char[] text = new char[in.length - tagend - 1];
System.arraycopy(in, tagend, text, 0, in.length - tagend - 1); System.arraycopy(in, tagend, text, 0, in.length - tagend - 1);
return filterTag(tag, false, text, quotechar); return filterTag(tag, false, text, quotechar);
@ -338,7 +340,7 @@ public final class TransformerWriter extends Writer {
// an opening tag // an opening tag
tagend = tagEnd(in, 1); tagend = tagEnd(in, 1);
tag = new String(in, 1, tagend - 1); tag = new String(in, 1, tagend - 1).toLowerCase();
final char[] text = new char[in.length - tagend - 1]; final char[] text = new char[in.length - tagend - 1];
System.arraycopy(in, tagend, text, 0, in.length - tagend - 1); System.arraycopy(in, tagend, text, 0, in.length - tagend - 1);
return filterTag(tag, true, text, quotechar); return filterTag(tag, true, text, quotechar);

Loading…
Cancel
Save