diff --git a/source/net/yacy/data/wiki/WikiCode.java b/source/net/yacy/data/wiki/WikiCode.java index 0dad95d5a..7ca013074 100644 --- a/source/net/yacy/data/wiki/WikiCode.java +++ b/source/net/yacy/data/wiki/WikiCode.java @@ -640,16 +640,16 @@ public class WikiCode extends AbstractWikiParser implements WikiParser { } line = line.substring(0, positionOfOpeningTag) + "" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK); - } + } // this is the part of the code that is responsible for Youtube video links supporting only the video ID as parameter else if (kl.startsWith(WIKI_VIDEO_YOUTUBE)) { kl = kl.substring(LEN_WIKI_VIDEO_YOUTUBE); - line = line.substring(0, positionOfOpeningTag) + "" + ""; + line = line.substring(0, positionOfOpeningTag) + "" + ""; } // this is the part of the code that is responsible for Vimeo video links supporting only the video ID as parameter else if (kl.startsWith(WIKI_VIDEO_VIMEO)) { kl = kl.substring(LEN_WIKI_VIDEO_VIMEO); - line = line.substring(0, positionOfOpeningTag) + "" + ""; + line = line.substring(0, positionOfOpeningTag) + "" + ""; } // if it's no image, it might be an internal link else { @@ -1046,28 +1046,39 @@ public class WikiCode extends AbstractWikiParser implements WikiParser { // {{Coordinate |NS 45/37/43.0/N |EW. 07/58/41.0/E |type=landmark |region=IT-BI}} ## means: degree/minute/second // {{Coordinate |NS 51.48994 |EW. 7.33249 |type=landmark |region=DE-NW}} final String b[] = a.split("\\|"); - float lon = Float.NaN, lat = Float.NaN; - float lonm = 0.0f, latm = 0.0f; + float lon = Float.NaN, lat = Float.NaN; // degree + float lonm = 0.0f, latm = 0.0f; // minutes (including sec as fraction) String lono = "E", lato = "N"; String name = ""; - for (final String c: b) { - if (c.toLowerCase().startsWith("name=")) { - name = c.substring(5); - } - if (c.toUpperCase().startsWith("NS=")) { - final String d[] = c.substring(3).split("/"); - if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);} - else if (d.length == 2) {lat = Float.parseFloat(d[0]); latm = Float.parseFloat(d[1]);} - else if (d.length >= 3) {lat = Float.parseFloat(d[0]); latm = Float.parseFloat(d[1]) + Float.parseFloat(d[2]) / 60.0f;} - if (d[d.length-1].toUpperCase().equals("S")) {lato = "S";} - } - if (c.toUpperCase().startsWith("EW=")) { - final String d[] = c.substring(3).split("/"); - if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);} - else if (d.length == 2) {lon = Float.parseFloat(d[0]); lonm = Float.parseFloat(d[1]);} - else if (d.length >= 3) {lon = Float.parseFloat(d[0]); lonm = Float.parseFloat(d[1]) + Float.parseFloat(d[2]) / 60.0f;} - if (d[d.length-1].toUpperCase().equals("W")) {lato = "W";} + try { + for (final String c : b) { + if (c.toLowerCase().startsWith("name=")) { + name = c.substring(5); + } + if (c.toUpperCase().startsWith("NS=")) { + final String d[] = c.substring(3).split("/"); + if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);} + else if (d.length > 1) { //format: NS deg/min/sec/N + lat = Float.parseFloat(d[0]); // degree + if (!d[1].isEmpty()) latm = Float.parseFloat(d[1]); // minutes + if (d.length >= 3 && !d[2].isEmpty()) {latm += (Float.parseFloat(d[2]) / 60.0f);} // sec (check empty because format found "45/10//N" ) + if (d[d.length - 1].toUpperCase().equals("S")) lato = "S"; + } + } + if (c.toUpperCase().startsWith("EW=")) { + final String d[] = c.substring(3).split("/"); + if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);} + else if (d.length > 1) { + lon = Float.parseFloat(d[0]); + if (!d[1].isEmpty()) lonm = Float.parseFloat(d[1]); + if (d.length >= 3 && !d[2].isEmpty()) {lonm += (Float.parseFloat(d[2]) / 60.0f);} + if (d[d.length-1].toUpperCase().equals("W")) {lono = "W";} + } + } } + } catch (NumberFormatException nsExcept) { + // catch parseFloat exception (may still happen if wiki code contains expressions) + continue; } if (!Float.isNaN(lon) && !Float.isNaN(lat)) { // replace this with a format that the html parser can understand diff --git a/source/net/yacy/document/importer/MediawikiImporter.java b/source/net/yacy/document/importer/MediawikiImporter.java index 3a0b0c81c..4e7de8208 100644 --- a/source/net/yacy/document/importer/MediawikiImporter.java +++ b/source/net/yacy/document/importer/MediawikiImporter.java @@ -39,7 +39,6 @@ import java.io.RandomAccessFile; import java.io.UnsupportedEncodingException; import java.lang.reflect.Array; import java.net.MalformedURLException; -import java.util.Date; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; @@ -147,6 +146,11 @@ public class MediawikiImporter extends Thread implements Importer { @Override public void run() { this.start = System.currentTimeMillis(); + final int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1); + // out keeps a outputfile open until poisened, to make sure underlaying thread gets the end condition + // regardless of any exception (e.g. eof memory) a add(poison) is added to the most outer final block + final BlockingQueue out = new ArrayBlockingQueue(threads * 10); + final wikiparserrecord poison = newRecord(); try { String targetstub = this.sourcefile.getName(); int p = targetstub.lastIndexOf("\\."); @@ -162,10 +166,7 @@ public class MediawikiImporter extends Thread implements Importer { StringBuilder sb = new StringBuilder(); boolean page = false, text = false; String title = null; - final wikiparserrecord poison = newRecord(); - final int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1); final BlockingQueue in = new ArrayBlockingQueue(threads * 10); - final BlockingQueue out = new ArrayBlockingQueue(threads * 10); final ExecutorService service = Executors.newCachedThreadPool(); final convertConsumer[] consumers = new convertConsumer[threads]; final Future[] consumerResults = (Future[]) Array.newInstance(Future.class, threads); @@ -262,8 +263,6 @@ public class MediawikiImporter extends Thread implements Importer { for (int i = 0; i < threads; i++) { consumerResults[i].get(10000, TimeUnit.MILLISECONDS); } - out.put(poison); - writerResult.get(10000, TimeUnit.MILLISECONDS); } catch (final InterruptedException e) { ConcurrentLog.logException(e); } catch (final ExecutionException e) { @@ -272,11 +271,18 @@ public class MediawikiImporter extends Thread implements Importer { ConcurrentLog.logException(e); } catch (final Exception e) { ConcurrentLog.logException(e); + } finally { + out.put(poison); // output thread condition (for file.close) + writerResult.get(10000, TimeUnit.MILLISECONDS); } } catch (final IOException e) { ConcurrentLog.logException(e); } catch (final Exception e) { ConcurrentLog.logException(e); + } finally { + try { + out.put(poison); // out keeps output file open until poisened, to close file if exception happend in this block + } catch (InterruptedException ex) { } } } @@ -713,7 +719,7 @@ public class MediawikiImporter extends Thread implements Importer { record.document.writeXML(this.osw); this.rc++; if (this.rc >= 10000) { - this.osw.write("\n"); + this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n"); this.osw.close(); final String finalfilename = this.targetstub + "." + this.fc + ".xml"; new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename)); @@ -733,14 +739,16 @@ public class MediawikiImporter extends Thread implements Importer { } catch (final IOException e) { ConcurrentLog.logException(e); } finally { - try { - this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n"); - this.osw.close(); - final String finalfilename = this.targetstub + "." + this.fc + ".xml"; - new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename)); - } catch (final IOException e) { - ConcurrentLog.logException(e); - } + try { + if (osw != null) { // maybe null on poison (immediately) + this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n"); + this.osw.close(); + final String finalfilename = this.targetstub + "." + this.fc + ".xml"; + new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename)); + } + } catch (final IOException e) { + ConcurrentLog.logException(e); + } } ConcurrentLog.info("WIKITRANSLATION", "*** convertWriter has terminated"); return Integer.valueOf(0);