- yacy can import phpbb3 posts without crawling - all data is written as surrogate - indexed surrogate files can be re-used git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5985 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
f1a9253baa
commit
4b4bddca00
@ -0,0 +1,83 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Content Integration: Retrieval from phpBB3 Databases</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="ContentIntegrationPHPBB3">
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuIndexCreate.template%#
|
||||
<h2>Content Integration: Retrieval from phpBB3 Databases</h2>
|
||||
<p>
|
||||
It is possible to extract texts directly from mySQL and postgreSQL databases.
|
||||
Each extraction is specific to the data that is hosted in the database.
|
||||
This interface gives you access to the phpBB3 forums software content.
|
||||
</p>
|
||||
<p>
|
||||
When a export is started, surrogate files are generated into DATA/SURROGATE/in which are automatically fetched by an indexer thread.
|
||||
All indexed surrogate files are then moved to DATA/SURROGATE/out and can be re-cycled when a index is deleted.
|
||||
</p>
|
||||
|
||||
<form action="ContentIntegrationPHPBB3_p.html">
|
||||
<fieldset>
|
||||
<dl>
|
||||
<dt><b>The URL stub</b>,<br />like http://forum.yacy-websuche.de<br />this must be the path right in front of '/viewtopic.php?'</dt>
|
||||
<dd><input type="text" name="content.phpbb3.urlstub" value="#[content.phpbb3.urlstub]#" size="60" /></dd>
|
||||
|
||||
<dt><b>Type</b> of database<br />(use either 'mysql' or 'pgsql')</dt>
|
||||
<dd><input type="text" name="content.phpbb3.dbtype" value="#[content.phpbb3.dbtype]#" size="6" /></dd>
|
||||
|
||||
<dt><b>Host</b> of the database</dt>
|
||||
<dd><input type="text" name="content.phpbb3.dbhost" value="#[content.phpbb3.dbhost]#" size="40" /></dd>
|
||||
|
||||
<dt><b>Port</b> of database service<br />(usually 3306 for mySQL)</dt>
|
||||
<dd><input type="text" name="content.phpbb3.dbport" value="#[content.phpbb3.dbport]#" size="6" /></dd>
|
||||
|
||||
<dt><b>Name of the database</b> on the host</dt>
|
||||
<dd><input type="text" name="content.phpbb3.dbname" value="#[content.phpbb3.dbname]#" size="20" /></dd>
|
||||
|
||||
<dt><b>User</b> that can access the database</dt>
|
||||
<dd><input type="text" name="content.phpbb3.dbuser" value="#[content.phpbb3.dbuser]#" size="20" /></dd>
|
||||
|
||||
<dt><b>Password</b> for the account of that user given above</dt>
|
||||
<dd><input type="text" name="content.phpbb3.dbpw" value="#[content.phpbb3.dbpw]#" size="20" /></dd>
|
||||
|
||||
<dt><b>Posts per file</b><br />in exported surrogates</dt>
|
||||
<dd><input type="text" name="content.phpbb3.ppf" value="#[content.phpbb3.ppf]#" size="20" /></dd>
|
||||
|
||||
<dt></dt>
|
||||
<dd>
|
||||
<input type="submit" name="check" value="Check database connection" />
|
||||
<input type="submit" name="export" value="Export Content to Surrogates" />
|
||||
</dd>
|
||||
</dl>
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
#(check)#::
|
||||
<form>
|
||||
<fieldset>
|
||||
<dl>
|
||||
<dt>Posts in database</dt>
|
||||
<dd>#[posts]#</dd>
|
||||
|
||||
<dt>first entry</dt>
|
||||
<dd>#[first]#</dd>
|
||||
|
||||
<dt>last entry</dt>
|
||||
<dd>#[last]#</dd>
|
||||
|
||||
</dl>
|
||||
</fieldset>
|
||||
</form>::
|
||||
<p>Info failed: #[error]#</p>
|
||||
#(/check)#
|
||||
|
||||
#(export)#::
|
||||
<p>Export successful! Wrote #[files]# files in DATA/SURROGATES/in</p>::
|
||||
<p>Export failed: #[error]#</p>
|
||||
#(/export)#
|
||||
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,123 @@
|
||||
// ContentIntegrationPHPBB3_p.java
|
||||
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 27.05.2009 on http://yacy.net
|
||||
//
|
||||
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
|
||||
// $LastChangedRevision: 1986 $
|
||||
// $LastChangedBy: orbiter $
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import de.anomic.content.dao.Dao;
|
||||
import de.anomic.content.dao.PhpBB3Dao;
|
||||
import de.anomic.http.httpRequestHeader;
|
||||
import de.anomic.kelondro.util.DateFormatter;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class ContentIntegrationPHPBB3_p {
|
||||
|
||||
public static serverObjects respond(final httpRequestHeader header, final serverObjects post, final serverSwitch<?> env) {
|
||||
final serverObjects prop = new serverObjects();
|
||||
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
|
||||
|
||||
prop.put("check", 0);
|
||||
prop.put("export", 0);
|
||||
|
||||
if (post != null) {
|
||||
|
||||
String urlstub = post.get("content.phpbb3.urlstub", "");
|
||||
String dbtype = post.get("content.phpbb3.dbtype", "");
|
||||
String dbhost = post.get("content.phpbb3.dbhost", "");
|
||||
int dbport = post.getInt("content.phpbb3.dbport", 3306);
|
||||
String dbname = post.get("content.phpbb3.dbname", "");
|
||||
String dbuser = post.get("content.phpbb3.dbuser", "");
|
||||
String dbpw = post.get("content.phpbb3.dbpw", "");
|
||||
int ppf = post.getInt("content.phpbb3.ppf", 1000);
|
||||
|
||||
|
||||
sb.setConfig("content.phpbb3.urlstub", urlstub);
|
||||
sb.setConfig("content.phpbb3.dbtype", dbtype);
|
||||
sb.setConfig("content.phpbb3.dbhost", dbhost);
|
||||
sb.setConfig("content.phpbb3.dbport", dbport);
|
||||
sb.setConfig("content.phpbb3.dbname", dbname);
|
||||
sb.setConfig("content.phpbb3.dbuser", dbuser);
|
||||
sb.setConfig("content.phpbb3.dbpw", dbpw);
|
||||
sb.setConfig("content.phpbb3.ppf", ppf);
|
||||
|
||||
if (post.containsKey("check")) {
|
||||
try {
|
||||
Dao db = new PhpBB3Dao(
|
||||
urlstub,
|
||||
dbtype,
|
||||
dbhost,
|
||||
dbport,
|
||||
dbname,
|
||||
dbuser,
|
||||
dbpw
|
||||
);
|
||||
prop.put("check", 1);
|
||||
prop.put("check_posts", db.size());
|
||||
prop.putHTML("check_first", db.first().toString());
|
||||
prop.putHTML("check_last", db.latest().toString());
|
||||
db.close();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
prop.put("check", 2);
|
||||
prop.put("check_error", e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (post.containsKey("export")) {
|
||||
try {
|
||||
Dao db = new PhpBB3Dao(
|
||||
urlstub,
|
||||
dbtype,
|
||||
dbhost,
|
||||
dbport,
|
||||
dbname,
|
||||
dbuser,
|
||||
dbpw
|
||||
);
|
||||
|
||||
int files = db.writeSurrogates(db.query(0, -1, 100), sb.surrogatesInPath, "fullexport-" + DateFormatter.formatShortSecond(), ppf);
|
||||
prop.put("export", 1);
|
||||
prop.put("export_files", files);
|
||||
db.close();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
prop.put("export", 2);
|
||||
prop.put("export_error", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
prop.putHTML("content.phpbb3.urlstub", sb.getConfig("content.phpbb3.urlstub", ""));
|
||||
prop.putHTML("content.phpbb3.dbtype", sb.getConfig("content.phpbb3.dbtype", ""));
|
||||
prop.putHTML("content.phpbb3.dbhost", sb.getConfig("content.phpbb3.dbhost", ""));
|
||||
prop.putHTML("content.phpbb3.dbport", sb.getConfig("content.phpbb3.dbport", ""));
|
||||
prop.putHTML("content.phpbb3.dbname", sb.getConfig("content.phpbb3.dbname", ""));
|
||||
prop.putHTML("content.phpbb3.dbuser", sb.getConfig("content.phpbb3.dbuser", ""));
|
||||
prop.putHTML("content.phpbb3.dbpw", sb.getConfig("content.phpbb3.dbpw", ""));
|
||||
prop.putHTML("content.phpbb3.ppf", sb.getConfig("content.phpbb3.ppf", ""));
|
||||
|
||||
return prop;
|
||||
}
|
||||
}
|
@ -0,0 +1,393 @@
|
||||
// PhpBB3Dao.java
|
||||
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 26.05.2009 on http://yacy.net
|
||||
//
|
||||
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
|
||||
// $LastChangedRevision: 1986 $
|
||||
// $LastChangedBy: orbiter $
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.content.dao;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
import de.anomic.content.DCEntry;
|
||||
import de.anomic.yacy.yacyURL;
|
||||
|
||||
public class PhpBB3Dao implements Dao {
|
||||
|
||||
private Connection conn = null;
|
||||
private String urlstub;
|
||||
private HashMap<Integer, String> users;
|
||||
|
||||
public PhpBB3Dao(
|
||||
String urlstub,
|
||||
String dbType,
|
||||
String host,
|
||||
int port,
|
||||
String dbname,
|
||||
String user,
|
||||
String pw) throws Exception {
|
||||
this.conn = getConnection(dbType, host, port, dbname, user, pw);
|
||||
this.urlstub = urlstub;
|
||||
this.users = new HashMap<Integer, String>();
|
||||
}
|
||||
|
||||
protected void finalize() throws Throwable {
|
||||
closeConnection();
|
||||
}
|
||||
|
||||
private Connection getConnection(final String dbType, String host, int port, String dbname, String user, String pw) throws Exception {
|
||||
String dbDriverStr = null, dbConnStr = null;
|
||||
if (dbType.equalsIgnoreCase("mysql")) {
|
||||
dbDriverStr = "com.mysql.jdbc.Driver";
|
||||
dbConnStr = "jdbc:mysql://" + host + ":" + port + "/" + dbname;
|
||||
} else if (dbType.equalsIgnoreCase("pgsql")) {
|
||||
dbDriverStr = "org.postgresql.Driver";
|
||||
dbConnStr = "jdbc:postgresql://" + host + ":" + port + "/" + dbname;
|
||||
} else throw new IllegalArgumentException();
|
||||
|
||||
try {
|
||||
Class.forName(dbDriverStr).newInstance();
|
||||
} catch (final Exception e) {
|
||||
throw new Exception("Unable to load the jdbc driver: " + e.getMessage(),e);
|
||||
}
|
||||
|
||||
try {
|
||||
return DriverManager.getConnection(dbConnStr, user, pw);
|
||||
} catch (final Exception e) {
|
||||
throw new Exception("Unable to establish a database connection: " + e.getMessage(),e);
|
||||
}
|
||||
}
|
||||
|
||||
public void closeConnection() {
|
||||
if (conn != null) {
|
||||
try {
|
||||
conn.close();
|
||||
conn = null;
|
||||
} catch (SQLException e) {
|
||||
System.out.println("PhpBB3Dao: " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Date first() {
|
||||
StringBuilder sql = new StringBuilder(256);
|
||||
sql.append("select min(post_time) from phpbb_posts");
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
rs = stmt.executeQuery(sql.toString());
|
||||
if (rs.next()) {
|
||||
return new Date(rs.getLong(1) * 1000L);
|
||||
}
|
||||
return null;
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
} finally {
|
||||
if (rs != null) try {rs.close();} catch (SQLException e) {}
|
||||
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
public Date latest() {
|
||||
StringBuilder sql = new StringBuilder(256);
|
||||
sql.append("select max(post_time) from phpbb_posts");
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
rs = stmt.executeQuery(sql.toString());
|
||||
if (rs.next()) {
|
||||
return new Date(rs.getLong(1) * 1000L);
|
||||
}
|
||||
return null;
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
} finally {
|
||||
if (rs != null) try {rs.close();} catch (SQLException e) {}
|
||||
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
public int size() {
|
||||
StringBuilder sql = new StringBuilder(256);
|
||||
sql.append("select count(*) from phpbb_posts");
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
rs = stmt.executeQuery(sql.toString());
|
||||
if (rs.next()) {
|
||||
return rs.getInt(1);
|
||||
}
|
||||
return 0;
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
return 0;
|
||||
} finally {
|
||||
if (rs != null) try {rs.close();} catch (SQLException e) {}
|
||||
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
public DCEntry get(int item) {
|
||||
StringBuilder sql = new StringBuilder(256);
|
||||
sql.append("select * from phpbb_posts where post_id = ");
|
||||
sql.append(item);
|
||||
return getOne(sql);
|
||||
}
|
||||
|
||||
public BlockingQueue<DCEntry> query(int from, int until, int queueSize) {
|
||||
// define the sql query
|
||||
final StringBuilder sql = new StringBuilder(256);
|
||||
sql.append("select * from phpbb_posts where post_id >= ");
|
||||
sql.append(from);
|
||||
if (until > from) {
|
||||
sql.append(" and post_id < ");
|
||||
sql.append(until);
|
||||
}
|
||||
sql.append(" order by post_id");
|
||||
|
||||
// execute the query and push entries to a queue concurrently
|
||||
return toQueue(sql, queueSize);
|
||||
}
|
||||
|
||||
public BlockingQueue<DCEntry> query(Date from, int queueSize) {
|
||||
// define the sql query
|
||||
final StringBuilder sql = new StringBuilder(256);
|
||||
sql.append("select * from phpbb_posts where post_time >= ");
|
||||
sql.append(from.getTime() / 1000);
|
||||
sql.append(" order by post_id");
|
||||
|
||||
// execute the query and push entries to a queue concurrently
|
||||
return toQueue(sql, queueSize);
|
||||
}
|
||||
|
||||
|
||||
private DCEntry getOne(StringBuilder sql) {
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
rs = stmt.executeQuery(sql.toString());
|
||||
if (rs.next()) {
|
||||
try {
|
||||
return parseResultSet(rs);
|
||||
} catch (MalformedURLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
} finally {
|
||||
if (rs != null) try {rs.close();} catch (SQLException e) {}
|
||||
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
private BlockingQueue<DCEntry> toQueue(final StringBuilder sql, int queueSize) {
|
||||
// execute the query and push entries to a queue concurrently
|
||||
final BlockingQueue<DCEntry> queue = new ArrayBlockingQueue<DCEntry>(queueSize);
|
||||
Thread dbreader = new Thread() {
|
||||
public void run() {
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
rs = stmt.executeQuery(sql.toString());
|
||||
while (rs.next()) {
|
||||
try {
|
||||
queue.put(parseResultSet(rs));
|
||||
} catch (MalformedURLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
queue.put(DCEntry.poison);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
if (rs != null) try {rs.close();} catch (SQLException e) {}
|
||||
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
|
||||
}
|
||||
}
|
||||
};
|
||||
dbreader.start();
|
||||
return queue;
|
||||
}
|
||||
|
||||
private DCEntry parseResultSet(ResultSet rs) throws SQLException, MalformedURLException {
|
||||
yacyURL url;
|
||||
int item = rs.getInt("post_id");
|
||||
url = new yacyURL(this.urlstub + "/viewtopic.php?t=" + item);
|
||||
String subject = rs.getString("post_subject");
|
||||
String text = xmlCleaner(rs.getString("post_text"));
|
||||
String user = getUser(rs.getInt("poster_id"));
|
||||
Date date = new Date(rs.getLong("post_time") * 1000L);
|
||||
return new DCEntry(url, date, subject, user, text);
|
||||
}
|
||||
|
||||
public static String xmlCleaner(String s) {
|
||||
if (s == null) return null;
|
||||
|
||||
StringBuilder sbOutput = new StringBuilder(s.length());
|
||||
char c;
|
||||
|
||||
for (int i = 0; i < s.length(); i++ ) {
|
||||
c = s.charAt(i);
|
||||
if ((c >= 0x0020 && c <= 0xD7FF) ||
|
||||
(c >= 0xE000 && c <= 0xFFFD) ||
|
||||
c == 0x0009 ||
|
||||
c == 0x000A ||
|
||||
c == 0x000D ) {
|
||||
sbOutput.append(c);
|
||||
}
|
||||
}
|
||||
return sbOutput.toString().trim();
|
||||
}
|
||||
|
||||
private String getUser(int poster_id) {
|
||||
String nick = this.users.get(poster_id);
|
||||
if (nick != null) return nick;
|
||||
|
||||
StringBuilder sql = new StringBuilder(256);
|
||||
sql.append("select * from phpbb_users where user_id = ");
|
||||
sql.append(poster_id);
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
rs = stmt.executeQuery(sql.toString());
|
||||
if (rs.next()) nick = rs.getString("username");
|
||||
if (nick == null) nick = "";
|
||||
this.users.put(poster_id, nick);
|
||||
return nick;
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
return "";
|
||||
} finally {
|
||||
if (rs != null) try {rs.close();} catch (SQLException e) {}
|
||||
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
public int writeSurrogates(
|
||||
BlockingQueue<DCEntry> queue,
|
||||
File targetdir,
|
||||
String versioninfo,
|
||||
int maxEntriesInFile
|
||||
) {
|
||||
try {
|
||||
// generate output file name and attributes
|
||||
String targethost = new yacyURL(this.urlstub, null).getHost();
|
||||
int fc = 0;
|
||||
File outputfiletmp = null, outputfile = null;
|
||||
|
||||
// write the result from the query concurrently in a file
|
||||
OutputStreamWriter osw = null;
|
||||
DCEntry e;
|
||||
int c = 0;
|
||||
while ((e = queue.take()) != DCEntry.poison) {
|
||||
if (osw == null) {
|
||||
outputfiletmp = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml.tmp");
|
||||
outputfile = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml");
|
||||
if (outputfiletmp.exists()) outputfiletmp.delete();
|
||||
if (outputfile.exists()) outputfile.delete();
|
||||
osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8");
|
||||
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n");
|
||||
}
|
||||
e.writeXML(osw);
|
||||
c++;
|
||||
if (c >= maxEntriesInFile) {
|
||||
osw.write("</surrogates>\n");
|
||||
osw.close();
|
||||
outputfiletmp.renameTo(outputfile);
|
||||
osw = null;
|
||||
c = 0;
|
||||
fc++;
|
||||
}
|
||||
}
|
||||
osw.write("</surrogates>\n");
|
||||
osw.close();
|
||||
outputfiletmp.renameTo(outputfile);
|
||||
return fc + 1;
|
||||
} catch (MalformedURLException e) {
|
||||
e.printStackTrace();
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
try {
|
||||
this.conn.close();
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
PhpBB3Dao db;
|
||||
try {
|
||||
db = new PhpBB3Dao(
|
||||
"http://forum.yacy-websuche.de",
|
||||
"mysql",
|
||||
"localhost",
|
||||
3306,
|
||||
"forum",
|
||||
"root",
|
||||
""
|
||||
);
|
||||
System.out.println("Posts in database : " + db.size());
|
||||
System.out.println("First entry : " + db.first());
|
||||
System.out.println("Last entry : " + db.latest());
|
||||
File targetdir = new File("x").getParentFile();
|
||||
db.writeSurrogates(db.query(0, -1, 100), targetdir, "id0-current", 3000);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue