git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4055 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
borg-0300 18 years ago
parent 54004e929b
commit 4f6d56330d

@ -276,8 +276,12 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (s.charAt(i) < ' ') s = s.substring(0, i) + " " + s.substring(i + 1);
*/
// remove double-spaces
int p;
// CR/LF entfernen, dabei koennen doppelte Leerzeichen enstehen die aber weiter unten entfernt werden - thq
while ((p = s.indexOf("\n")) >= 0) s = s.substring(0, p) + ((p + 1 == s.length()) ? "" : " " + s.substring(p + 1));
// remove double-spaces
while ((p = s.indexOf(" ")) >= 0) s = s.substring(0, p) + s.substring(p + 1);
// we don't accept headlines that are too short

Loading…
Cancel
Save