From a7591d3ed05f879d72501c38ae12267519c39dd6 Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 27 Dec 2015 01:59:15 +0100 Subject: [PATCH] fix mediawikiimporter number format exception on coordinate parsing handle uncomplete metadata like "NS=43/50//N". For other {expr ... } type entries a try catch added --- source/net/yacy/data/wiki/WikiCode.java | 55 +++++++++++-------- .../document/importer/MediawikiImporter.java | 19 +++---- 2 files changed, 42 insertions(+), 32 deletions(-) diff --git a/source/net/yacy/data/wiki/WikiCode.java b/source/net/yacy/data/wiki/WikiCode.java index 0dad95d5a..7ca013074 100644 --- a/source/net/yacy/data/wiki/WikiCode.java +++ b/source/net/yacy/data/wiki/WikiCode.java @@ -640,16 +640,16 @@ public class WikiCode extends AbstractWikiParser implements WikiParser { } line = line.substring(0, positionOfOpeningTag) + "" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK); - } + } // this is the part of the code that is responsible for Youtube video links supporting only the video ID as parameter else if (kl.startsWith(WIKI_VIDEO_YOUTUBE)) { kl = kl.substring(LEN_WIKI_VIDEO_YOUTUBE); - line = line.substring(0, positionOfOpeningTag) + "" + ""; + line = line.substring(0, positionOfOpeningTag) + "" + ""; } // this is the part of the code that is responsible for Vimeo video links supporting only the video ID as parameter else if (kl.startsWith(WIKI_VIDEO_VIMEO)) { kl = kl.substring(LEN_WIKI_VIDEO_VIMEO); - line = line.substring(0, positionOfOpeningTag) + "" + ""; + line = line.substring(0, positionOfOpeningTag) + "" + ""; } // if it's no image, it might be an internal link else { @@ -1046,28 +1046,39 @@ public class WikiCode extends AbstractWikiParser implements WikiParser { // {{Coordinate |NS 45/37/43.0/N |EW. 07/58/41.0/E |type=landmark |region=IT-BI}} ## means: degree/minute/second // {{Coordinate |NS 51.48994 |EW. 7.33249 |type=landmark |region=DE-NW}} final String b[] = a.split("\\|"); - float lon = Float.NaN, lat = Float.NaN; - float lonm = 0.0f, latm = 0.0f; + float lon = Float.NaN, lat = Float.NaN; // degree + float lonm = 0.0f, latm = 0.0f; // minutes (including sec as fraction) String lono = "E", lato = "N"; String name = ""; - for (final String c: b) { - if (c.toLowerCase().startsWith("name=")) { - name = c.substring(5); - } - if (c.toUpperCase().startsWith("NS=")) { - final String d[] = c.substring(3).split("/"); - if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);} - else if (d.length == 2) {lat = Float.parseFloat(d[0]); latm = Float.parseFloat(d[1]);} - else if (d.length >= 3) {lat = Float.parseFloat(d[0]); latm = Float.parseFloat(d[1]) + Float.parseFloat(d[2]) / 60.0f;} - if (d[d.length-1].toUpperCase().equals("S")) {lato = "S";} - } - if (c.toUpperCase().startsWith("EW=")) { - final String d[] = c.substring(3).split("/"); - if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);} - else if (d.length == 2) {lon = Float.parseFloat(d[0]); lonm = Float.parseFloat(d[1]);} - else if (d.length >= 3) {lon = Float.parseFloat(d[0]); lonm = Float.parseFloat(d[1]) + Float.parseFloat(d[2]) / 60.0f;} - if (d[d.length-1].toUpperCase().equals("W")) {lato = "W";} + try { + for (final String c : b) { + if (c.toLowerCase().startsWith("name=")) { + name = c.substring(5); + } + if (c.toUpperCase().startsWith("NS=")) { + final String d[] = c.substring(3).split("/"); + if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);} + else if (d.length > 1) { //format: NS deg/min/sec/N + lat = Float.parseFloat(d[0]); // degree + if (!d[1].isEmpty()) latm = Float.parseFloat(d[1]); // minutes + if (d.length >= 3 && !d[2].isEmpty()) {latm += (Float.parseFloat(d[2]) / 60.0f);} // sec (check empty because format found "45/10//N" ) + if (d[d.length - 1].toUpperCase().equals("S")) lato = "S"; + } + } + if (c.toUpperCase().startsWith("EW=")) { + final String d[] = c.substring(3).split("/"); + if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);} + else if (d.length > 1) { + lon = Float.parseFloat(d[0]); + if (!d[1].isEmpty()) lonm = Float.parseFloat(d[1]); + if (d.length >= 3 && !d[2].isEmpty()) {lonm += (Float.parseFloat(d[2]) / 60.0f);} + if (d[d.length-1].toUpperCase().equals("W")) {lono = "W";} + } + } } + } catch (NumberFormatException nsExcept) { + // catch parseFloat exception (may still happen if wiki code contains expressions) + continue; } if (!Float.isNaN(lon) && !Float.isNaN(lat)) { // replace this with a format that the html parser can understand diff --git a/source/net/yacy/document/importer/MediawikiImporter.java b/source/net/yacy/document/importer/MediawikiImporter.java index 3a0b0c81c..07407895b 100644 --- a/source/net/yacy/document/importer/MediawikiImporter.java +++ b/source/net/yacy/document/importer/MediawikiImporter.java @@ -39,7 +39,6 @@ import java.io.RandomAccessFile; import java.io.UnsupportedEncodingException; import java.lang.reflect.Array; import java.net.MalformedURLException; -import java.util.Date; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; @@ -713,7 +712,7 @@ public class MediawikiImporter extends Thread implements Importer { record.document.writeXML(this.osw); this.rc++; if (this.rc >= 10000) { - this.osw.write("\n"); + this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n"); this.osw.close(); final String finalfilename = this.targetstub + "." + this.fc + ".xml"; new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename)); @@ -733,14 +732,14 @@ public class MediawikiImporter extends Thread implements Importer { } catch (final IOException e) { ConcurrentLog.logException(e); } finally { - try { - this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n"); - this.osw.close(); - final String finalfilename = this.targetstub + "." + this.fc + ".xml"; - new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename)); - } catch (final IOException e) { - ConcurrentLog.logException(e); - } + try { + this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n"); + this.osw.close(); + final String finalfilename = this.targetstub + "." + this.fc + ".xml"; + new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename)); + } catch (final IOException e) { + ConcurrentLog.logException(e); + } } ConcurrentLog.info("WIKITRANSLATION", "*** convertWriter has terminated"); return Integer.valueOf(0);