git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4055 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
borg-0300 18 years ago
parent 54004e929b
commit 4f6d56330d

@ -275,15 +275,19 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
for (int i = 0; i < s.length(); i++) for (int i = 0; i < s.length(); i++)
if (s.charAt(i) < ' ') s = s.substring(0, i) + " " + s.substring(i + 1); if (s.charAt(i) < ' ') s = s.substring(0, i) + " " + s.substring(i + 1);
*/ */
// remove double-spaces
int p; int p;
while ((p = s.indexOf(" ")) >= 0) s = s.substring(0, p) + s.substring(p + 1);
// CR/LF entfernen, dabei koennen doppelte Leerzeichen enstehen die aber weiter unten entfernt werden - thq
while ((p = s.indexOf("\n")) >= 0) s = s.substring(0, p) + ((p + 1 == s.length()) ? "" : " " + s.substring(p + 1));
// remove double-spaces
while ((p = s.indexOf(" ")) >= 0) s = s.substring(0, p) + s.substring(p + 1);
// we don't accept headlines that are too short // we don't accept headlines that are too short
s = s.trim(); s = s.trim();
if (s.length() < 4) s = ""; if (s.length() < 4) s = "";
// return result // return result
return s; return s;
} }
@ -360,7 +364,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
* @return the {@link URL} to the favicon that belongs to the document * @return the {@link URL} to the favicon that belongs to the document
*/ */
public URL getFavicon() { public URL getFavicon() {
return this.favicon; return this.favicon;
} }
public String getDescription() { public String getDescription() {

Loading…
Cancel
Save