// PhpBB3Dao.java // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 26.05.2009 on http://yacy.net // // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ // $LastChangedRevision: 1986 $ // $LastChangedBy: orbiter $ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package net.yacy.document.content.dao; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Date; import java.util.HashMap; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import net.yacy.document.content.DCEntry; import net.yacy.kelondro.data.meta.DigestURI; public class PhpBB3Dao implements Dao { protected DatabaseConnection conn = null; private String urlstub, prefix; private HashMap users; public PhpBB3Dao( String urlstub, String dbType, String host, int port, String dbname, String prefix, String user, String pw) throws Exception { this.conn = new DatabaseConnection(dbType, host, port, dbname, user, pw); this.urlstub = urlstub; this.prefix = prefix; this.users = new HashMap(); } protected void finalize() throws Throwable { close(); } public Date first() { Statement stmt = null; ResultSet rs = null; try { stmt = conn.statement(); rs = stmt.executeQuery("select min(post_time) from " + prefix + "posts"); if (rs.next()) { return new Date(rs.getLong(1) * 1000L); } return null; } catch (SQLException e) { e.printStackTrace(); return null; } finally { if (rs != null) try {rs.close();} catch (SQLException e) {} if (stmt != null) try {stmt.close();} catch (SQLException e) {} } } public Date latest() { Statement stmt = null; ResultSet rs = null; try { stmt = conn.statement(); rs = stmt.executeQuery("select max(post_time) from " + prefix + "posts"); if (rs.next()) { return new Date(rs.getLong(1) * 1000L); } return null; } catch (SQLException e) { e.printStackTrace(); return null; } finally { if (rs != null) try {rs.close();} catch (SQLException e) {} if (stmt != null) try {stmt.close();} catch (SQLException e) {} } } public int size() throws SQLException { return this.conn.count(prefix + "posts"); } public DCEntry get(int item) { return getOne("select * from " + prefix + "posts where post_id = " + item); } public BlockingQueue query(int from, int until, int queueSize) { // define the sql query final StringBuilder sql = new StringBuilder(256); sql.append("select * from " + prefix + "posts where post_id >= "); sql.append(from); if (until > from) { sql.append(" and post_id < "); sql.append(until); } sql.append(" order by post_id"); // execute the query and push entries to a queue concurrently return toQueue(sql, queueSize); } public BlockingQueue query(Date from, int queueSize) { // define the sql query final StringBuilder sql = new StringBuilder(256); sql.append("select * from " + prefix + "posts where post_time >= "); sql.append(from.getTime() / 1000); sql.append(" order by post_id"); // execute the query and push entries to a queue concurrently return toQueue(sql, queueSize); } private DCEntry getOne(String sql) { Statement stmt = null; ResultSet rs = null; try { stmt = conn.statement(); rs = stmt.executeQuery(sql); if (rs.next()) { try { return parseResultSet(rs); } catch (MalformedURLException e) { e.printStackTrace(); } } return null; } catch (SQLException e) { e.printStackTrace(); return null; } finally { if (rs != null) try {rs.close();} catch (SQLException e) {} if (stmt != null) try {stmt.close();} catch (SQLException e) {} } } private BlockingQueue toQueue(final StringBuilder sql, int queueSize) { // execute the query and push entries to a queue concurrently final BlockingQueue queue = new ArrayBlockingQueue(queueSize); Thread dbreader = new Thread() { public void run() { Statement stmt = null; ResultSet rs = null; try { stmt = conn.statement(); rs = stmt.executeQuery(sql.toString()); while (rs.next()) { try { queue.put(parseResultSet(rs)); } catch (MalformedURLException e) { e.printStackTrace(); } } queue.put(DCEntry.poison); } catch (InterruptedException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } finally { if (rs != null) try {rs.close();} catch (SQLException e) {} if (stmt != null) try {stmt.close();} catch (SQLException e) {} } } }; dbreader.start(); return queue; } protected DCEntry parseResultSet(ResultSet rs) throws SQLException, MalformedURLException { DigestURI url; int item = rs.getInt("post_id"); url = new DigestURI(this.urlstub + "/viewtopic.php?t=" + item); String subject = rs.getString("post_subject"); String text = xmlCleaner(rs.getString("post_text")); String user = getUser(rs.getInt("poster_id")); Date date = new Date(rs.getLong("post_time") * 1000L); return new DCEntry(url, date, subject, user, text); } public static String xmlCleaner(String s) { if (s == null) return null; StringBuilder sbOutput = new StringBuilder(s.length()); char c; for (int i = 0; i < s.length(); i++ ) { c = s.charAt(i); if ((c >= 0x0020 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD) || c == 0x0009 || c == 0x000A || c == 0x000D ) { sbOutput.append(c); } } return sbOutput.toString().trim(); } private String getUser(int poster_id) { String nick = this.users.get(poster_id); if (nick != null) return nick; StringBuilder sql = new StringBuilder(256); sql.append("select * from " + prefix + "users where user_id = "); sql.append(poster_id); Statement stmt = null; ResultSet rs = null; try { stmt = conn.statement(); rs = stmt.executeQuery(sql.toString()); if (rs.next()) nick = rs.getString("username"); if (nick == null) nick = ""; this.users.put(poster_id, nick); return nick; } catch (SQLException e) { e.printStackTrace(); return ""; } finally { if (rs != null) try {rs.close();} catch (SQLException e) {} if (stmt != null) try {stmt.close();} catch (SQLException e) {} } } public int writeSurrogates( BlockingQueue queue, File targetdir, String versioninfo, int maxEntriesInFile ) { try { // generate output file name and attributes String targethost = new DigestURI(this.urlstub, null).getHost(); int fc = 0; File outputfiletmp = null, outputfile = null; // write the result from the query concurrently in a file OutputStreamWriter osw = null; DCEntry e; int c = 0; while ((e = queue.take()) != DCEntry.poison) { if (osw == null) { outputfiletmp = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml.prt"); outputfile = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml"); if (outputfiletmp.exists()) outputfiletmp.delete(); if (outputfile.exists()) outputfile.delete(); osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8"); osw.write("\n\n"); } e.writeXML(osw); c++; if (c >= maxEntriesInFile) { osw.write("\n"); osw.close(); outputfiletmp.renameTo(outputfile); osw = null; c = 0; fc++; } } osw.write("\n"); osw.close(); outputfiletmp.renameTo(outputfile); return fc + 1; } catch (MalformedURLException e) { e.printStackTrace(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return 0; } public void close() { this.conn.close(); } public static void main(String[] args) { PhpBB3Dao db; try { db = new PhpBB3Dao( "http://forum.yacy-websuche.de", "mysql", "localhost", 3306, "forum", "forum_", "root", "" ); System.out.println("Posts in database : " + db.size()); System.out.println("First entry : " + db.first()); System.out.println("Last entry : " + db.latest()); File targetdir = new File("x").getParentFile(); db.writeSurrogates(db.query(0, -1, 100), targetdir, "id0-current", 3000); } catch (Exception e) { e.printStackTrace(); } } }