|
|
|
@ -128,10 +128,10 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
|
|
|
|
|
// condenser to distinguish headlines from text beginnings.
|
|
|
|
|
// to make it easier for the condenser, a dot ('.') is appended in case that
|
|
|
|
|
// no punctuation is part of the newtext line
|
|
|
|
|
if ((b.length() != 0) && (!(punctuation(b.charAt(b.length() - 1))))) b.append('.');
|
|
|
|
|
if ((b.length() != 0) && (!(punctuation(b.charAt(b.length() - 1))))) b.append((int) '.');
|
|
|
|
|
//System.out.println("*** Appended dot: " + b.toString());
|
|
|
|
|
}
|
|
|
|
|
if (b.length() != 0) content.append(b).append((char) 32);
|
|
|
|
|
if (b.length() != 0) content.append(b).append((int) 32);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"';
|
|
|
|
|