optimize parseInt for <img> tag attribute parsing

Performance better as using Numberformat.parse or parseInt(substring())
pull/18/head
reger 9 years ago
parent 3c31bf845f
commit bad34804fe

@ -27,7 +27,8 @@ public class NumberTools {
/**
* this method replaces Long.parseLong/2 where a substring of decimal numbers shall be parsed
* Strings are also auto-trimmed, that means parsing stops at spaces without throwing a NumberFormatException
* Strings are also auto-trimmed, that means parsing stops at trailing spaces without throwing a NumberFormatException
* leading spaces are skip'd and parse stops at first none digit character
* @param s
* @param startPos
* @return the number
@ -54,6 +55,7 @@ public class NumberTools {
int digit;
char c;
while (s.charAt(i) == ' ') i++; // skip leading blanks
char firstChar = s.charAt(i);
if (firstChar < '0') {
if (firstChar == '-') {
@ -66,7 +68,7 @@ public class NumberTools {
multmin = limit / 10;
while (i < endPos) {
c = s.charAt(i++);
if (c == ' ') break;
if (c < '0' || c > '9') break; // stop at first non digit character
digit = c - '0';
if (digit < 0 || digit > 9 || result < multmin) throw new NumberFormatException(s);
result *= 10;
@ -97,6 +99,7 @@ public class NumberTools {
int digit;
char c;
while (s.charAt(i) == ' ') i++; // skip leading blanks
char firstChar = s.charAt(i);
if (firstChar < '0') {
if (firstChar == '-') {
@ -109,7 +112,7 @@ public class NumberTools {
multmin = limit / 10;
while (i < endPos) {
c = s.charAt(i++);
if (c == ' ') break;
if (c < '0' || c > '9') break; // stop at first non digit character
digit = c - '0';
if (digit < 0 || digit > 9 || result < multmin) throw new NumberFormatException(s);
result *= 10;

@ -28,7 +28,6 @@ import java.io.Writer;
import java.lang.reflect.Array;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
@ -415,15 +414,14 @@ public class ContentScraper extends AbstractScraper implements Scraper {
if (src.length() > 0) {
final DigestURL url = absolutePath(src);
if (url != null) {
// use Numberformat.parse to allow parse of "550px"
NumberFormat intnum = NumberFormat.getIntegerInstance ();
final int width = intnum.parse(tag.opts.getProperty("width", "-1")).intValue(); // Integer.parseInt fails on "200px"
final int height = intnum.parse(tag.opts.getProperty("height", "-1")).intValue();
// use to allow parse of "550px", with better performance as Numberformat.parse
final int width = NumberTools.parseIntDecSubstring(tag.opts.getProperty("width", "-1")); // Integer.parseInt fails on "200px"
final int height = NumberTools.parseIntDecSubstring(tag.opts.getProperty("height", "-1"));
final ImageEntry ie = new ImageEntry(url, tag.opts.getProperty("alt", EMPTY_STRING), width, height, -1);
this.images.add(ie);
}
}
} catch (final ParseException e) {}
} catch (final NumberFormatException e) {}
this.evaluationScores.match(Element.imgpath, src);
} else if(tag.name.equalsIgnoreCase("base")) {
try {

@ -0,0 +1,27 @@
package net.yacy.cora.util;
import static net.yacy.cora.util.NumberTools.parseIntDecSubstring;
import org.junit.Test;
import static org.junit.Assert.*;
public class NumberToolsTest {
/**
* Test of parseLongDecSubstring method, of class NumberTools.
*/
@Test
public void testParseIntDecSubstring() {
String[] TestNumbers = new String[]{
"101", " 102", " 103", " 104 ",
"+105", " -106", " +107 ", " -108 ",
"109px", " 110px"};
int i=101;
for (String s : TestNumbers) {
int result = parseIntDecSubstring(s);
assertEquals (s + " = " + Integer.toString(i),i,Math.abs(result));
i++;
}
}
}
Loading…
Cancel
Save