abstraction of surrogate main element (xmlns:geo was missing for wiki extracts)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7727 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 021840e5ba
commit 205cc75157

@ -52,6 +52,17 @@ import org.xml.sax.helpers.DefaultHandler;
public class SurrogateReader extends DefaultHandler implements Runnable { public class SurrogateReader extends DefaultHandler implements Runnable {
// definition of the surrogate main element
public final static String SURROGATES_MAIN_ELEMENT_NAME =
"surrogates";
public final static String SURROGATES_MAIN_ELEMENT_OPEN =
"<" + SURROGATES_MAIN_ELEMENT_NAME +
" xmlns:dc=\"http://purl.org/dc/elements/1.1/\"" +
" xmlns:yacy=\"http://yacy.net/\"" +
" xmlns:geo=\"http://www.w3.org/2003/01/geo/wgs84_pos#\">";
public final static String SURROGATES_MAIN_ELEMENT_CLOSE =
"</" + SURROGATES_MAIN_ELEMENT_NAME + ">";
// class variables // class variables
private final StringBuilder buffer; private final StringBuilder buffer;
private boolean parsingValue; private boolean parsingValue;

@ -40,6 +40,7 @@ import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import net.yacy.document.content.DCEntry; import net.yacy.document.content.DCEntry;
import net.yacy.document.content.SurrogateReader;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
@ -275,7 +276,7 @@ public class PhpBB3Dao implements Dao {
if (outputfiletmp.exists()) outputfiletmp.delete(); if (outputfiletmp.exists()) outputfiletmp.delete();
if (outputfile.exists()) outputfile.delete(); if (outputfile.exists()) outputfile.delete();
osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8"); osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8");
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n"); osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n");
} }
e.writeXML(osw); e.writeXML(osw);
c++; c++;
@ -288,7 +289,7 @@ public class PhpBB3Dao implements Dao {
fc++; fc++;
} }
} }
osw.write("</surrogates>\n"); osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n");
osw.close(); osw.close();
outputfiletmp.renameTo(outputfile); outputfiletmp.renameTo(outputfile);
return fc + 1; return fc + 1;

@ -26,6 +26,7 @@ import net.yacy.cora.document.UTF8;
import net.yacy.document.Document; import net.yacy.document.Document;
import net.yacy.document.Parser; import net.yacy.document.Parser;
import net.yacy.document.TextParser; import net.yacy.document.TextParser;
import net.yacy.document.content.SurrogateReader;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.ByteBuffer; import net.yacy.kelondro.util.ByteBuffer;
@ -695,7 +696,7 @@ public class MediawikiImporter extends Thread implements Importer {
// start writing a new file // start writing a new file
this.outputfilename = targetstub + "." + fc + ".xml.prt"; this.outputfilename = targetstub + "." + fc + ".xml.prt";
this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8"); this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8");
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n"); osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n");
} }
Log.logInfo("WIKITRANSLATION", "[CONSUME] Title: " + record.title); Log.logInfo("WIKITRANSLATION", "[CONSUME] Title: " + record.title);
record.document.writeXML(osw, new Date()); record.document.writeXML(osw, new Date());
@ -709,10 +710,8 @@ public class MediawikiImporter extends Thread implements Importer {
fc++; fc++;
outputfilename = targetstub + "." + fc + ".xml.prt"; outputfilename = targetstub + "." + fc + ".xml.prt";
osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8"); osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8");
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n"); osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n");
} }
} }
} catch (InterruptedException e) { } catch (InterruptedException e) {
Log.logException(e); Log.logException(e);
@ -724,7 +723,7 @@ public class MediawikiImporter extends Thread implements Importer {
Log.logException(e); Log.logException(e);
} finally { } finally {
try { try {
osw.write("</surrogates>\n"); osw.write(SurrogateReader.SURROGATES_MAIN_ELEMENT_CLOSE + "\n");
osw.close(); osw.close();
String finalfilename = targetstub + "." + fc + ".xml"; String finalfilename = targetstub + "." + fc + ".xml";
new File(targetdir, outputfilename).renameTo(new File(targetdir, finalfilename)); new File(targetdir, outputfilename).renameTo(new File(targetdir, finalfilename));

Loading…
Cancel
Save