diff --git a/source/net/yacy/cora/util/NumberTools.java b/source/net/yacy/cora/util/NumberTools.java index 518a58211..454e1d094 100644 --- a/source/net/yacy/cora/util/NumberTools.java +++ b/source/net/yacy/cora/util/NumberTools.java @@ -27,7 +27,8 @@ public class NumberTools { /** * this method replaces Long.parseLong/2 where a substring of decimal numbers shall be parsed - * Strings are also auto-trimmed, that means parsing stops at spaces without throwing a NumberFormatException + * Strings are also auto-trimmed, that means parsing stops at trailing spaces without throwing a NumberFormatException + * leading spaces are skip'd and parse stops at first none digit character * @param s * @param startPos * @return the number @@ -54,6 +55,7 @@ public class NumberTools { int digit; char c; + while (s.charAt(i) == ' ') i++; // skip leading blanks char firstChar = s.charAt(i); if (firstChar < '0') { if (firstChar == '-') { @@ -66,7 +68,7 @@ public class NumberTools { multmin = limit / 10; while (i < endPos) { c = s.charAt(i++); - if (c == ' ') break; + if (c < '0' || c > '9') break; // stop at first non digit character digit = c - '0'; if (digit < 0 || digit > 9 || result < multmin) throw new NumberFormatException(s); result *= 10; @@ -97,6 +99,7 @@ public class NumberTools { int digit; char c; + while (s.charAt(i) == ' ') i++; // skip leading blanks char firstChar = s.charAt(i); if (firstChar < '0') { if (firstChar == '-') { @@ -109,7 +112,7 @@ public class NumberTools { multmin = limit / 10; while (i < endPos) { c = s.charAt(i++); - if (c == ' ') break; + if (c < '0' || c > '9') break; // stop at first non digit character digit = c - '0'; if (digit < 0 || digit > 9 || result < multmin) throw new NumberFormatException(s); result *= 10; diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 77b59ad79..3e2caa3fa 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -28,7 +28,6 @@ import java.io.Writer; import java.lang.reflect.Array; import java.net.MalformedURLException; import java.nio.charset.Charset; -import java.text.NumberFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.Date; @@ -415,15 +414,14 @@ public class ContentScraper extends AbstractScraper implements Scraper { if (src.length() > 0) { final DigestURL url = absolutePath(src); if (url != null) { - // use Numberformat.parse to allow parse of "550px" - NumberFormat intnum = NumberFormat.getIntegerInstance (); - final int width = intnum.parse(tag.opts.getProperty("width", "-1")).intValue(); // Integer.parseInt fails on "200px" - final int height = intnum.parse(tag.opts.getProperty("height", "-1")).intValue(); + // use to allow parse of "550px", with better performance as Numberformat.parse + final int width = NumberTools.parseIntDecSubstring(tag.opts.getProperty("width", "-1")); // Integer.parseInt fails on "200px" + final int height = NumberTools.parseIntDecSubstring(tag.opts.getProperty("height", "-1")); final ImageEntry ie = new ImageEntry(url, tag.opts.getProperty("alt", EMPTY_STRING), width, height, -1); this.images.add(ie); } } - } catch (final ParseException e) {} + } catch (final NumberFormatException e) {} this.evaluationScores.match(Element.imgpath, src); } else if(tag.name.equalsIgnoreCase("base")) { try { diff --git a/test/net/yacy/cora/util/NumberToolsTest.java b/test/net/yacy/cora/util/NumberToolsTest.java new file mode 100644 index 000000000..d714b9248 --- /dev/null +++ b/test/net/yacy/cora/util/NumberToolsTest.java @@ -0,0 +1,27 @@ +package net.yacy.cora.util; + +import static net.yacy.cora.util.NumberTools.parseIntDecSubstring; +import org.junit.Test; +import static org.junit.Assert.*; + +public class NumberToolsTest { + + /** + * Test of parseLongDecSubstring method, of class NumberTools. + */ + @Test + public void testParseIntDecSubstring() { + String[] TestNumbers = new String[]{ + "101", " 102", " 103", " 104 ", + "+105", " -106", " +107 ", " -108 ", + "109px", " 110px"}; + + int i=101; + for (String s : TestNumbers) { + int result = parseIntDecSubstring(s); + assertEquals (s + " = " + Integer.toString(i),i,Math.abs(result)); + i++; + } + + } +}