diff --git a/source/net/yacy/data/wiki/WikiCode.java b/source/net/yacy/data/wiki/WikiCode.java
index 0dad95d5a..7ca013074 100644
--- a/source/net/yacy/data/wiki/WikiCode.java
+++ b/source/net/yacy/data/wiki/WikiCode.java
@@ -640,16 +640,16 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
}
line = line.substring(0, positionOfOpeningTag) + "
" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK);
- }
+ }
// this is the part of the code that is responsible for Youtube video links supporting only the video ID as parameter
else if (kl.startsWith(WIKI_VIDEO_YOUTUBE)) {
kl = kl.substring(LEN_WIKI_VIDEO_YOUTUBE);
- line = line.substring(0, positionOfOpeningTag) + "" + "";
+ line = line.substring(0, positionOfOpeningTag) + "" + "";
}
// this is the part of the code that is responsible for Vimeo video links supporting only the video ID as parameter
else if (kl.startsWith(WIKI_VIDEO_VIMEO)) {
kl = kl.substring(LEN_WIKI_VIDEO_VIMEO);
- line = line.substring(0, positionOfOpeningTag) + "" + "";
+ line = line.substring(0, positionOfOpeningTag) + "" + "";
}
// if it's no image, it might be an internal link
else {
@@ -1046,28 +1046,39 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
// {{Coordinate |NS 45/37/43.0/N |EW. 07/58/41.0/E |type=landmark |region=IT-BI}} ## means: degree/minute/second
// {{Coordinate |NS 51.48994 |EW. 7.33249 |type=landmark |region=DE-NW}}
final String b[] = a.split("\\|");
- float lon = Float.NaN, lat = Float.NaN;
- float lonm = 0.0f, latm = 0.0f;
+ float lon = Float.NaN, lat = Float.NaN; // degree
+ float lonm = 0.0f, latm = 0.0f; // minutes (including sec as fraction)
String lono = "E", lato = "N";
String name = "";
- for (final String c: b) {
- if (c.toLowerCase().startsWith("name=")) {
- name = c.substring(5);
- }
- if (c.toUpperCase().startsWith("NS=")) {
- final String d[] = c.substring(3).split("/");
- if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);}
- else if (d.length == 2) {lat = Float.parseFloat(d[0]); latm = Float.parseFloat(d[1]);}
- else if (d.length >= 3) {lat = Float.parseFloat(d[0]); latm = Float.parseFloat(d[1]) + Float.parseFloat(d[2]) / 60.0f;}
- if (d[d.length-1].toUpperCase().equals("S")) {lato = "S";}
- }
- if (c.toUpperCase().startsWith("EW=")) {
- final String d[] = c.substring(3).split("/");
- if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);}
- else if (d.length == 2) {lon = Float.parseFloat(d[0]); lonm = Float.parseFloat(d[1]);}
- else if (d.length >= 3) {lon = Float.parseFloat(d[0]); lonm = Float.parseFloat(d[1]) + Float.parseFloat(d[2]) / 60.0f;}
- if (d[d.length-1].toUpperCase().equals("W")) {lato = "W";}
+ try {
+ for (final String c : b) {
+ if (c.toLowerCase().startsWith("name=")) {
+ name = c.substring(5);
+ }
+ if (c.toUpperCase().startsWith("NS=")) {
+ final String d[] = c.substring(3).split("/");
+ if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);}
+ else if (d.length > 1) { //format: NS deg/min/sec/N
+ lat = Float.parseFloat(d[0]); // degree
+ if (!d[1].isEmpty()) latm = Float.parseFloat(d[1]); // minutes
+ if (d.length >= 3 && !d[2].isEmpty()) {latm += (Float.parseFloat(d[2]) / 60.0f);} // sec (check empty because format found "45/10//N" )
+ if (d[d.length - 1].toUpperCase().equals("S")) lato = "S";
+ }
+ }
+ if (c.toUpperCase().startsWith("EW=")) {
+ final String d[] = c.substring(3).split("/");
+ if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);}
+ else if (d.length > 1) {
+ lon = Float.parseFloat(d[0]);
+ if (!d[1].isEmpty()) lonm = Float.parseFloat(d[1]);
+ if (d.length >= 3 && !d[2].isEmpty()) {lonm += (Float.parseFloat(d[2]) / 60.0f);}
+ if (d[d.length-1].toUpperCase().equals("W")) {lono = "W";}
+ }
+ }
}
+ } catch (NumberFormatException nsExcept) {
+ // catch parseFloat exception (may still happen if wiki code contains expressions)
+ continue;
}
if (!Float.isNaN(lon) && !Float.isNaN(lat)) {
// replace this with a format that the html parser can understand
diff --git a/source/net/yacy/document/importer/MediawikiImporter.java b/source/net/yacy/document/importer/MediawikiImporter.java
index 3a0b0c81c..4e7de8208 100644
--- a/source/net/yacy/document/importer/MediawikiImporter.java
+++ b/source/net/yacy/document/importer/MediawikiImporter.java
@@ -39,7 +39,6 @@ import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Array;
import java.net.MalformedURLException;
-import java.util.Date;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
@@ -147,6 +146,11 @@ public class MediawikiImporter extends Thread implements Importer {
@Override
public void run() {
this.start = System.currentTimeMillis();
+ final int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
+ // out keeps a outputfile open until poisened, to make sure underlaying thread gets the end condition
+ // regardless of any exception (e.g. eof memory) a add(poison) is added to the most outer final block
+ final BlockingQueue out = new ArrayBlockingQueue(threads * 10);
+ final wikiparserrecord poison = newRecord();
try {
String targetstub = this.sourcefile.getName();
int p = targetstub.lastIndexOf("\\.");
@@ -162,10 +166,7 @@ public class MediawikiImporter extends Thread implements Importer {
StringBuilder sb = new StringBuilder();
boolean page = false, text = false;
String title = null;
- final wikiparserrecord poison = newRecord();
- final int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
final BlockingQueue in = new ArrayBlockingQueue(threads * 10);
- final BlockingQueue out = new ArrayBlockingQueue(threads * 10);
final ExecutorService service = Executors.newCachedThreadPool();
final convertConsumer[] consumers = new convertConsumer[threads];
final Future>[] consumerResults = (Future>[]) Array.newInstance(Future.class, threads);
@@ -262,8 +263,6 @@ public class MediawikiImporter extends Thread implements Importer {
for (int i = 0; i < threads; i++) {
consumerResults[i].get(10000, TimeUnit.MILLISECONDS);
}
- out.put(poison);
- writerResult.get(10000, TimeUnit.MILLISECONDS);
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
} catch (final ExecutionException e) {
@@ -272,11 +271,18 @@ public class MediawikiImporter extends Thread implements Importer {
ConcurrentLog.logException(e);
} catch (final Exception e) {
ConcurrentLog.logException(e);
+ } finally {
+ out.put(poison); // output thread condition (for file.close)
+ writerResult.get(10000, TimeUnit.MILLISECONDS);
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final Exception e) {
ConcurrentLog.logException(e);
+ } finally {
+ try {
+ out.put(poison); // out keeps output file open until poisened, to close file if exception happend in this block
+ } catch (InterruptedException ex) { }
}
}
@@ -713,7 +719,7 @@ public class MediawikiImporter extends Thread implements Importer {
record.document.writeXML(this.osw);
this.rc++;
if (this.rc >= 10000) {
- this.osw.write("\n");
+ this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n");
this.osw.close();
final String finalfilename = this.targetstub + "." + this.fc + ".xml";
new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename));
@@ -733,14 +739,16 @@ public class MediawikiImporter extends Thread implements Importer {
} catch (final IOException e) {
ConcurrentLog.logException(e);
} finally {
- try {
- this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n");
- this.osw.close();
- final String finalfilename = this.targetstub + "." + this.fc + ".xml";
- new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename));
- } catch (final IOException e) {
- ConcurrentLog.logException(e);
- }
+ try {
+ if (osw != null) { // maybe null on poison (immediately)
+ this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n");
+ this.osw.close();
+ final String finalfilename = this.targetstub + "." + this.fc + ".xml";
+ new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename));
+ }
+ } catch (final IOException e) {
+ ConcurrentLog.logException(e);
+ }
}
ConcurrentLog.info("WIKITRANSLATION", "*** convertWriter has terminated");
return Integer.valueOf(0);