Merge branch 'master' of git@github.com:yacy/yacy_search_server.git

pull/38/head
Michael Peter Christen 9 years ago
commit 582d059fb7

@ -640,16 +640,16 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
}
line = line.substring(0, positionOfOpeningTag) + "<img src=\"" + kl + "\"" + align + alt + ">" + line.substring(positionOfClosingTag + LEN_WIKI_CLOSE_LINK);
}
}
// this is the part of the code that is responsible for Youtube video links supporting only the video ID as parameter
else if (kl.startsWith(WIKI_VIDEO_YOUTUBE)) {
kl = kl.substring(LEN_WIKI_VIDEO_YOUTUBE);
line = line.substring(0, positionOfOpeningTag) + "" + "<object width=\"425\" height=\"350\"><param name=\"movie\" value=\"http://www.youtube.com/v/" + kl + "\"></param><param name=\"wmode\" value=\"transparent\"></param><embed src=\"http://www.youtube.com/v/" + kl + "\" type=\"application/x-shockwave-flash\" wmode=\"transparent\" width=\"425\" height=\"350\"></embed></object>";
line = line.substring(0, positionOfOpeningTag) + "" + "<object width=\"425\" height=\"350\"><param name=\"movie\" value=\"http://www.youtube.com/v/" + kl + "\"></param><param name=\"wmode\" value=\"transparent\"></param><embed src=\"http://www.youtube.com/v/" + kl + "\" type=\"application/x-shockwave-flash\" wmode=\"transparent\" width=\"425\" height=\"350\"></embed></object>";
}
// this is the part of the code that is responsible for Vimeo video links supporting only the video ID as parameter
else if (kl.startsWith(WIKI_VIDEO_VIMEO)) {
kl = kl.substring(LEN_WIKI_VIDEO_VIMEO);
line = line.substring(0, positionOfOpeningTag) + "" + "<iframe src=\"http://player.vimeo.com/video/" + kl + "\" width=\"425\" height=\"350\" frameborder=\"0\" webkitAllowFullScreen mozallowfullscreen allowFullScreen></iframe>";
line = line.substring(0, positionOfOpeningTag) + "" + "<iframe src=\"http://player.vimeo.com/video/" + kl + "\" width=\"425\" height=\"350\" frameborder=\"0\" webkitAllowFullScreen mozallowfullscreen allowFullScreen></iframe>";
}
// if it's no image, it might be an internal link
else {
@ -1046,28 +1046,39 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
// {{Coordinate |NS 45/37/43.0/N |EW. 07/58/41.0/E |type=landmark |region=IT-BI}} ## means: degree/minute/second
// {{Coordinate |NS 51.48994 |EW. 7.33249 |type=landmark |region=DE-NW}}
final String b[] = a.split("\\|");
float lon = Float.NaN, lat = Float.NaN;
float lonm = 0.0f, latm = 0.0f;
float lon = Float.NaN, lat = Float.NaN; // degree
float lonm = 0.0f, latm = 0.0f; // minutes (including sec as fraction)
String lono = "E", lato = "N";
String name = "";
for (final String c: b) {
if (c.toLowerCase().startsWith("name=")) {
name = c.substring(5);
}
if (c.toUpperCase().startsWith("NS=")) {
final String d[] = c.substring(3).split("/");
if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);}
else if (d.length == 2) {lat = Float.parseFloat(d[0]); latm = Float.parseFloat(d[1]);}
else if (d.length >= 3) {lat = Float.parseFloat(d[0]); latm = Float.parseFloat(d[1]) + Float.parseFloat(d[2]) / 60.0f;}
if (d[d.length-1].toUpperCase().equals("S")) {lato = "S";}
}
if (c.toUpperCase().startsWith("EW=")) {
final String d[] = c.substring(3).split("/");
if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);}
else if (d.length == 2) {lon = Float.parseFloat(d[0]); lonm = Float.parseFloat(d[1]);}
else if (d.length >= 3) {lon = Float.parseFloat(d[0]); lonm = Float.parseFloat(d[1]) + Float.parseFloat(d[2]) / 60.0f;}
if (d[d.length-1].toUpperCase().equals("W")) {lato = "W";}
try {
for (final String c : b) {
if (c.toLowerCase().startsWith("name=")) {
name = c.substring(5);
}
if (c.toUpperCase().startsWith("NS=")) {
final String d[] = c.substring(3).split("/");
if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lato = "S"; l = -l;} lat = (float) Math.floor(l); latm = 60.0f * (l - lat);}
else if (d.length > 1) { //format: NS deg/min/sec/N
lat = Float.parseFloat(d[0]); // degree
if (!d[1].isEmpty()) latm = Float.parseFloat(d[1]); // minutes
if (d.length >= 3 && !d[2].isEmpty()) {latm += (Float.parseFloat(d[2]) / 60.0f);} // sec (check empty because format found "45/10//N" )
if (d[d.length - 1].toUpperCase().equals("S")) lato = "S";
}
}
if (c.toUpperCase().startsWith("EW=")) {
final String d[] = c.substring(3).split("/");
if (d.length == 1) {float l = Float.parseFloat(d[0]); if (l < 0) {lono = "W"; l = -l;} lon = (float) Math.floor(l); lonm = 60.0f * (l - lon);}
else if (d.length > 1) {
lon = Float.parseFloat(d[0]);
if (!d[1].isEmpty()) lonm = Float.parseFloat(d[1]);
if (d.length >= 3 && !d[2].isEmpty()) {lonm += (Float.parseFloat(d[2]) / 60.0f);}
if (d[d.length-1].toUpperCase().equals("W")) {lono = "W";}
}
}
}
} catch (NumberFormatException nsExcept) {
// catch parseFloat exception (may still happen if wiki code contains expressions)
continue;
}
if (!Float.isNaN(lon) && !Float.isNaN(lat)) {
// replace this with a format that the html parser can understand

@ -39,7 +39,6 @@ import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Array;
import java.net.MalformedURLException;
import java.util.Date;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
@ -147,6 +146,11 @@ public class MediawikiImporter extends Thread implements Importer {
@Override
public void run() {
this.start = System.currentTimeMillis();
final int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
// out keeps a outputfile open until poisened, to make sure underlaying thread gets the end condition
// regardless of any exception (e.g. eof memory) a add(poison) is added to the most outer final block
final BlockingQueue<wikiparserrecord> out = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
final wikiparserrecord poison = newRecord();
try {
String targetstub = this.sourcefile.getName();
int p = targetstub.lastIndexOf("\\.");
@ -162,10 +166,7 @@ public class MediawikiImporter extends Thread implements Importer {
StringBuilder sb = new StringBuilder();
boolean page = false, text = false;
String title = null;
final wikiparserrecord poison = newRecord();
final int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
final BlockingQueue<wikiparserrecord> in = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
final BlockingQueue<wikiparserrecord> out = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
final ExecutorService service = Executors.newCachedThreadPool();
final convertConsumer[] consumers = new convertConsumer[threads];
final Future<?>[] consumerResults = (Future<?>[]) Array.newInstance(Future.class, threads);
@ -262,8 +263,6 @@ public class MediawikiImporter extends Thread implements Importer {
for (int i = 0; i < threads; i++) {
consumerResults[i].get(10000, TimeUnit.MILLISECONDS);
}
out.put(poison);
writerResult.get(10000, TimeUnit.MILLISECONDS);
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
} catch (final ExecutionException e) {
@ -272,11 +271,18 @@ public class MediawikiImporter extends Thread implements Importer {
ConcurrentLog.logException(e);
} catch (final Exception e) {
ConcurrentLog.logException(e);
} finally {
out.put(poison); // output thread condition (for file.close)
writerResult.get(10000, TimeUnit.MILLISECONDS);
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final Exception e) {
ConcurrentLog.logException(e);
} finally {
try {
out.put(poison); // out keeps output file open until poisened, to close file if exception happend in this block
} catch (InterruptedException ex) { }
}
}
@ -713,7 +719,7 @@ public class MediawikiImporter extends Thread implements Importer {
record.document.writeXML(this.osw);
this.rc++;
if (this.rc >= 10000) {
this.osw.write("</surrogates>\n");
this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n");
this.osw.close();
final String finalfilename = this.targetstub + "." + this.fc + ".xml";
new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename));
@ -733,14 +739,16 @@ public class MediawikiImporter extends Thread implements Importer {
} catch (final IOException e) {
ConcurrentLog.logException(e);
} finally {
try {
this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n");
this.osw.close();
final String finalfilename = this.targetstub + "." + this.fc + ".xml";
new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename));
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
try {
if (osw != null) { // maybe null on poison (immediately)
this.osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n");
this.osw.close();
final String finalfilename = this.targetstub + "." + this.fc + ".xml";
new File(this.targetdir, this.outputfilename).renameTo(new File(this.targetdir, finalfilename));
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
ConcurrentLog.info("WIKITRANSLATION", "*** convertWriter has terminated");
return Integer.valueOf(0);

Loading…
Cancel
Save