added new submenu to crawler menu: import of phpbb3 forum postings from mysql

- yacy can import phpbb3 posts without crawling
- all data is written as surrogate
- indexed surrogate files can be re-used

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5985 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent f1a9253baa
commit 4b4bddca00

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5
# Release Configuration
releaseVersion=0.81
releaseVersion=0.82
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -899,3 +899,13 @@ cgi.suffixes = cgi,pl
# whether this is a version for a web browser
browserintegration = false
# content integration settings
content.phpbb3.urlstub = http://<mydomain>/
content.phpbb3.dbtype = mysql
content.phpbb3.dbhost = localhost
content.phpbb3.dbport = 3306
content.phpbb3.dbname = forum
content.phpbb3.dbuser = notroot
content.phpbb3.dbpw = joshua
content.phpbb3.ppf = 1000

@ -0,0 +1,83 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Content Integration: Retrieval from phpBB3 Databases</title>
#%env/templates/metas.template%#
</head>
<body id="ContentIntegrationPHPBB3">
#%env/templates/header.template%#
#%env/templates/submenuIndexCreate.template%#
<h2>Content Integration: Retrieval from phpBB3 Databases</h2>
<p>
It is possible to extract texts directly from mySQL and postgreSQL databases.
Each extraction is specific to the data that is hosted in the database.
This interface gives you access to the phpBB3 forums software content.
</p>
<p>
When a export is started, surrogate files are generated into DATA/SURROGATE/in which are automatically fetched by an indexer thread.
All indexed surrogate files are then moved to DATA/SURROGATE/out and can be re-cycled when a index is deleted.
</p>
<form action="ContentIntegrationPHPBB3_p.html">
<fieldset>
<dl>
<dt><b>The URL stub</b>,<br />like http://forum.yacy-websuche.de<br />this must be the path right in front of '/viewtopic.php?'</dt>
<dd><input type="text" name="content.phpbb3.urlstub" value="#[content.phpbb3.urlstub]#" size="60" /></dd>
<dt><b>Type</b> of database<br />(use either 'mysql' or 'pgsql')</dt>
<dd><input type="text" name="content.phpbb3.dbtype" value="#[content.phpbb3.dbtype]#" size="6" /></dd>
<dt><b>Host</b> of the database</dt>
<dd><input type="text" name="content.phpbb3.dbhost" value="#[content.phpbb3.dbhost]#" size="40" /></dd>
<dt><b>Port</b> of database service<br />(usually 3306 for mySQL)</dt>
<dd><input type="text" name="content.phpbb3.dbport" value="#[content.phpbb3.dbport]#" size="6" /></dd>
<dt><b>Name of the database</b> on the host</dt>
<dd><input type="text" name="content.phpbb3.dbname" value="#[content.phpbb3.dbname]#" size="20" /></dd>
<dt><b>User</b> that can access the database</dt>
<dd><input type="text" name="content.phpbb3.dbuser" value="#[content.phpbb3.dbuser]#" size="20" /></dd>
<dt><b>Password</b> for the account of that user given above</dt>
<dd><input type="text" name="content.phpbb3.dbpw" value="#[content.phpbb3.dbpw]#" size="20" /></dd>
<dt><b>Posts per file</b><br />in exported surrogates</dt>
<dd><input type="text" name="content.phpbb3.ppf" value="#[content.phpbb3.ppf]#" size="20" /></dd>
<dt></dt>
<dd>
<input type="submit" name="check" value="Check database connection" />&nbsp;&nbsp;
<input type="submit" name="export" value="Export Content to Surrogates" />
</dd>
</dl>
</fieldset>
</form>
#(check)#::
<form>
<fieldset>
<dl>
<dt>Posts in database</dt>
<dd>#[posts]#</dd>
<dt>first entry</dt>
<dd>#[first]#</dd>
<dt>last entry</dt>
<dd>#[last]#</dd>
</dl>
</fieldset>
</form>::
<p>Info failed: #[error]#</p>
#(/check)#
#(export)#::
<p>Export successful! Wrote #[files]# files in DATA/SURROGATES/in</p>::
<p>Export failed: #[error]#</p>
#(/export)#
#%env/templates/footer.template%#
</body>
</html>

@ -0,0 +1,123 @@
// ContentIntegrationPHPBB3_p.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 27.05.2009 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import de.anomic.content.dao.Dao;
import de.anomic.content.dao.PhpBB3Dao;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.DateFormatter;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class ContentIntegrationPHPBB3_p {
public static serverObjects respond(final httpRequestHeader header, final serverObjects post, final serverSwitch<?> env) {
final serverObjects prop = new serverObjects();
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
prop.put("check", 0);
prop.put("export", 0);
if (post != null) {
String urlstub = post.get("content.phpbb3.urlstub", "");
String dbtype = post.get("content.phpbb3.dbtype", "");
String dbhost = post.get("content.phpbb3.dbhost", "");
int dbport = post.getInt("content.phpbb3.dbport", 3306);
String dbname = post.get("content.phpbb3.dbname", "");
String dbuser = post.get("content.phpbb3.dbuser", "");
String dbpw = post.get("content.phpbb3.dbpw", "");
int ppf = post.getInt("content.phpbb3.ppf", 1000);
sb.setConfig("content.phpbb3.urlstub", urlstub);
sb.setConfig("content.phpbb3.dbtype", dbtype);
sb.setConfig("content.phpbb3.dbhost", dbhost);
sb.setConfig("content.phpbb3.dbport", dbport);
sb.setConfig("content.phpbb3.dbname", dbname);
sb.setConfig("content.phpbb3.dbuser", dbuser);
sb.setConfig("content.phpbb3.dbpw", dbpw);
sb.setConfig("content.phpbb3.ppf", ppf);
if (post.containsKey("check")) {
try {
Dao db = new PhpBB3Dao(
urlstub,
dbtype,
dbhost,
dbport,
dbname,
dbuser,
dbpw
);
prop.put("check", 1);
prop.put("check_posts", db.size());
prop.putHTML("check_first", db.first().toString());
prop.putHTML("check_last", db.latest().toString());
db.close();
} catch (Exception e) {
e.printStackTrace();
prop.put("check", 2);
prop.put("check_error", e.getMessage());
}
}
if (post.containsKey("export")) {
try {
Dao db = new PhpBB3Dao(
urlstub,
dbtype,
dbhost,
dbport,
dbname,
dbuser,
dbpw
);
int files = db.writeSurrogates(db.query(0, -1, 100), sb.surrogatesInPath, "fullexport-" + DateFormatter.formatShortSecond(), ppf);
prop.put("export", 1);
prop.put("export_files", files);
db.close();
} catch (Exception e) {
e.printStackTrace();
prop.put("export", 2);
prop.put("export_error", e.getMessage());
}
}
}
prop.putHTML("content.phpbb3.urlstub", sb.getConfig("content.phpbb3.urlstub", ""));
prop.putHTML("content.phpbb3.dbtype", sb.getConfig("content.phpbb3.dbtype", ""));
prop.putHTML("content.phpbb3.dbhost", sb.getConfig("content.phpbb3.dbhost", ""));
prop.putHTML("content.phpbb3.dbport", sb.getConfig("content.phpbb3.dbport", ""));
prop.putHTML("content.phpbb3.dbname", sb.getConfig("content.phpbb3.dbname", ""));
prop.putHTML("content.phpbb3.dbuser", sb.getConfig("content.phpbb3.dbuser", ""));
prop.putHTML("content.phpbb3.dbpw", sb.getConfig("content.phpbb3.dbpw", ""));
prop.putHTML("content.phpbb3.ppf", sb.getConfig("content.phpbb3.ppf", ""));
return prop;
}
}

@ -1,10 +1,10 @@
<div class="SubMenu">
<h3>Web Crawler Control</h3>
<h3>Content Integration</h3>
</div>
<div class="SubMenu">
<div class="SubMenugroup">
<h3>Start a Web Crawl</h3>
<h3>Web Crawler</h3>
<ul class="SubMenu">
<li><a href="/CrawlStart_p.html" class="MenuItemLink lock">Crawl Start</a></li>
<li><a href="/CrawlProfileEditor_p.html" class="MenuItemLink lock">Crawl Profile Editor</a></li>
@ -30,6 +30,13 @@
</ul>
</div>
<div class="SubMenugroup">
<h3>Database Import</h3>
<ul class="SubMenu">
<li><a href="/ContentIntegrationPHPBB3_p.html" class="MenuItemLink lock">phpBB3 forum</a></li>
</ul>
</div>
<!---
<div class="SubMenugroup">
<h3>Media Crawl Queues</h3>

@ -25,6 +25,9 @@
package de.anomic.content;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.Date;
@ -37,23 +40,26 @@ import de.anomic.yacy.yacyURL;
public class DCEntry extends HashMap<String, String> {
private static final long serialVersionUID = -2050291583515701559L;
private static final long serialVersionUID = -2050291583515701559L;
public static final DCEntry poison = new DCEntry();
public DCEntry() {
super();
}
public DCEntry(
yacyURL url,
Date date,
String title,
String author,
String body
) {
super();
this.put("url", url.toNormalform(true, false));
this.put("title", title);
this.put("author", author);
this.put("body", body);
this.put("dc:Date", DateFormatter.formatISO8601(date));
this.put("dc:Title", title);
this.put("dc:Creator", author);
this.put("dc:Description", body);
}
/*
@ -103,7 +109,7 @@ public class DCEntry extends HashMap<String, String> {
public String language() {
String l = this.get("language");
if (l == null) l = this.get("dc:Language");
if (l == null) return "en"; else return l;
if (l == null) return url().language(); else return l;
}
public String title() {
@ -150,9 +156,30 @@ public class DCEntry extends HashMap<String, String> {
HashSet<String> languages = new HashSet<String>();
languages.add(language());
return new plasmaParserDocument(url(), "text/html", "utf-8", languages,
categories(), title(), "",
null, "",
body().getBytes(), null, null);
try {
return new plasmaParserDocument(
url(),
"text/html",
"UTF-8",
languages,
categories(),
title(),
author(),
null,
"",
body().getBytes("UTF-8"),
null,
null);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return null;
}
}
public void writeXML(OutputStreamWriter os) throws IOException {
plasmaParserDocument doc = document();
if (doc != null) {
doc.writeXML(os, this.date());
}
}
}

@ -24,8 +24,9 @@
package de.anomic.content.dao;
import java.util.ArrayList;
import java.io.File;
import java.util.Date;
import java.util.concurrent.BlockingQueue;
import de.anomic.content.DCEntry;
@ -37,25 +38,27 @@ public interface Dao {
// item-oriented retrieval
/**
* get the maximum number of items in the database
* get the maximum number of possible DCEntry items in the database
*/
public int maxItems();
public int size();
/**
* retrieve a single item from the database
* @param item
* @return
* @return a single result entry in Dublin Core format
*/
public DCEntry get(int item);
/**
* retrieve a list of entries in the database;
* the object denoted with until is not contained in the list
* @param from
* @param until
* @return
* retrieve a set of entries in the database;
* the object denoted with until is not contained in the result set
* all retrieved objects are pushed concurrently to a blocking queue
* @param from the first id
* @param until the limit of the last id (the id is not included)
* @param queueSize the maximum number of entries in the blocing queue
* @return a quere where the results are written in concurrently
*/
public ArrayList<DCEntry> get(int from, int until);
public BlockingQueue<DCEntry> query(int from, int until, int queueSize);
// date-oriented retrieval
@ -63,20 +66,38 @@ public interface Dao {
/**
* return the date of the first entry
*/
public Date firstEntry();
public Date first();
/**
* return the date of the latest entry
* @return
* @return the date of the latest entry
*/
public Date latestEntry();
public Date latest();
/**
* get a list of entries in the database;
* the returned list contains all entries up to the most recent
* retrieve a set of entries in the database;
* the result set contains all entries up to the most recent
* all retrieved objects are pushed to the blocking queue
* @param from
* @return
* @return a quere where the results are written in concurrently
*/
public ArrayList<DCEntry> get(Date from);
public BlockingQueue<DCEntry> query(Date from, int queueSize);
// export methods
public int writeSurrogates(
BlockingQueue<DCEntry> queue,
File targetdir,
String versioninfo,
int maxEntriesInFile
);
// workflow
/**
* close the connection to the database
*/
public void close();
}

@ -0,0 +1,393 @@
// PhpBB3Dao.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 26.05.2009 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.content.dao;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;
import java.util.HashMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import de.anomic.content.DCEntry;
import de.anomic.yacy.yacyURL;
public class PhpBB3Dao implements Dao {
private Connection conn = null;
private String urlstub;
private HashMap<Integer, String> users;
public PhpBB3Dao(
String urlstub,
String dbType,
String host,
int port,
String dbname,
String user,
String pw) throws Exception {
this.conn = getConnection(dbType, host, port, dbname, user, pw);
this.urlstub = urlstub;
this.users = new HashMap<Integer, String>();
}
protected void finalize() throws Throwable {
closeConnection();
}
private Connection getConnection(final String dbType, String host, int port, String dbname, String user, String pw) throws Exception {
String dbDriverStr = null, dbConnStr = null;
if (dbType.equalsIgnoreCase("mysql")) {
dbDriverStr = "com.mysql.jdbc.Driver";
dbConnStr = "jdbc:mysql://" + host + ":" + port + "/" + dbname;
} else if (dbType.equalsIgnoreCase("pgsql")) {
dbDriverStr = "org.postgresql.Driver";
dbConnStr = "jdbc:postgresql://" + host + ":" + port + "/" + dbname;
} else throw new IllegalArgumentException();
try {
Class.forName(dbDriverStr).newInstance();
} catch (final Exception e) {
throw new Exception("Unable to load the jdbc driver: " + e.getMessage(),e);
}
try {
return DriverManager.getConnection(dbConnStr, user, pw);
} catch (final Exception e) {
throw new Exception("Unable to establish a database connection: " + e.getMessage(),e);
}
}
public void closeConnection() {
if (conn != null) {
try {
conn.close();
conn = null;
} catch (SQLException e) {
System.out.println("PhpBB3Dao: " + e);
}
}
}
public Date first() {
StringBuilder sql = new StringBuilder(256);
sql.append("select min(post_time) from phpbb_posts");
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
if (rs.next()) {
return new Date(rs.getLong(1) * 1000L);
}
return null;
} catch (SQLException e) {
e.printStackTrace();
return null;
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
public Date latest() {
StringBuilder sql = new StringBuilder(256);
sql.append("select max(post_time) from phpbb_posts");
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
if (rs.next()) {
return new Date(rs.getLong(1) * 1000L);
}
return null;
} catch (SQLException e) {
e.printStackTrace();
return null;
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
public int size() {
StringBuilder sql = new StringBuilder(256);
sql.append("select count(*) from phpbb_posts");
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
if (rs.next()) {
return rs.getInt(1);
}
return 0;
} catch (SQLException e) {
e.printStackTrace();
return 0;
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
public DCEntry get(int item) {
StringBuilder sql = new StringBuilder(256);
sql.append("select * from phpbb_posts where post_id = ");
sql.append(item);
return getOne(sql);
}
public BlockingQueue<DCEntry> query(int from, int until, int queueSize) {
// define the sql query
final StringBuilder sql = new StringBuilder(256);
sql.append("select * from phpbb_posts where post_id >= ");
sql.append(from);
if (until > from) {
sql.append(" and post_id < ");
sql.append(until);
}
sql.append(" order by post_id");
// execute the query and push entries to a queue concurrently
return toQueue(sql, queueSize);
}
public BlockingQueue<DCEntry> query(Date from, int queueSize) {
// define the sql query
final StringBuilder sql = new StringBuilder(256);
sql.append("select * from phpbb_posts where post_time >= ");
sql.append(from.getTime() / 1000);
sql.append(" order by post_id");
// execute the query and push entries to a queue concurrently
return toQueue(sql, queueSize);
}
private DCEntry getOne(StringBuilder sql) {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
if (rs.next()) {
try {
return parseResultSet(rs);
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
return null;
} catch (SQLException e) {
e.printStackTrace();
return null;
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
private BlockingQueue<DCEntry> toQueue(final StringBuilder sql, int queueSize) {
// execute the query and push entries to a queue concurrently
final BlockingQueue<DCEntry> queue = new ArrayBlockingQueue<DCEntry>(queueSize);
Thread dbreader = new Thread() {
public void run() {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
while (rs.next()) {
try {
queue.put(parseResultSet(rs));
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
queue.put(DCEntry.poison);
} catch (InterruptedException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
};
dbreader.start();
return queue;
}
private DCEntry parseResultSet(ResultSet rs) throws SQLException, MalformedURLException {
yacyURL url;
int item = rs.getInt("post_id");
url = new yacyURL(this.urlstub + "/viewtopic.php?t=" + item);
String subject = rs.getString("post_subject");
String text = xmlCleaner(rs.getString("post_text"));
String user = getUser(rs.getInt("poster_id"));
Date date = new Date(rs.getLong("post_time") * 1000L);
return new DCEntry(url, date, subject, user, text);
}
public static String xmlCleaner(String s) {
if (s == null) return null;
StringBuilder sbOutput = new StringBuilder(s.length());
char c;
for (int i = 0; i < s.length(); i++ ) {
c = s.charAt(i);
if ((c >= 0x0020 && c <= 0xD7FF) ||
(c >= 0xE000 && c <= 0xFFFD) ||
c == 0x0009 ||
c == 0x000A ||
c == 0x000D ) {
sbOutput.append(c);
}
}
return sbOutput.toString().trim();
}
private String getUser(int poster_id) {
String nick = this.users.get(poster_id);
if (nick != null) return nick;
StringBuilder sql = new StringBuilder(256);
sql.append("select * from phpbb_users where user_id = ");
sql.append(poster_id);
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
if (rs.next()) nick = rs.getString("username");
if (nick == null) nick = "";
this.users.put(poster_id, nick);
return nick;
} catch (SQLException e) {
e.printStackTrace();
return "";
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
public int writeSurrogates(
BlockingQueue<DCEntry> queue,
File targetdir,
String versioninfo,
int maxEntriesInFile
) {
try {
// generate output file name and attributes
String targethost = new yacyURL(this.urlstub, null).getHost();
int fc = 0;
File outputfiletmp = null, outputfile = null;
// write the result from the query concurrently in a file
OutputStreamWriter osw = null;
DCEntry e;
int c = 0;
while ((e = queue.take()) != DCEntry.poison) {
if (osw == null) {
outputfiletmp = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml.tmp");
outputfile = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml");
if (outputfiletmp.exists()) outputfiletmp.delete();
if (outputfile.exists()) outputfile.delete();
osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8");
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n");
}
e.writeXML(osw);
c++;
if (c >= maxEntriesInFile) {
osw.write("</surrogates>\n");
osw.close();
outputfiletmp.renameTo(outputfile);
osw = null;
c = 0;
fc++;
}
}
osw.write("</surrogates>\n");
osw.close();
outputfiletmp.renameTo(outputfile);
return fc + 1;
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
return 0;
}
public void close() {
try {
this.conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
PhpBB3Dao db;
try {
db = new PhpBB3Dao(
"http://forum.yacy-websuche.de",
"mysql",
"localhost",
3306,
"forum",
"root",
""
);
System.out.println("Posts in database : " + db.size());
System.out.println("First entry : " + db.first());
System.out.println("Last entry : " + db.latest());
File targetdir = new File("x").getParentFile();
db.writeSurrogates(db.query(0, -1, 100), targetdir, "id0-current", 3000);
} catch (Exception e) {
e.printStackTrace();
}
}
}

@ -643,13 +643,13 @@ public final class httpdFileHandler {
InputStream is = new BufferedInputStream(p.getInputStream());
StringBuilder stringBuffer = new StringBuilder(1024);
StringBuilder StringBuilder = new StringBuilder(1024);
while (is.available() > 0) {
stringBuffer.append((char) is.read());
StringBuilder.append((char) is.read());
}
String cgiReturn = stringBuffer.toString();
String cgiReturn = StringBuilder.toString();
int indexOfDelimiter = cgiReturn.indexOf("\n\n");
String[] cgiHeader = new String[0];
if (indexOfDelimiter > -1) {

@ -467,6 +467,7 @@ dc_rights
os.write("<record>\n");
os.write("<dc:Title><![CDATA[" + this.dc_title() + "]]></dc:Title>\n");
os.write("<dc:Identifier>" + this.dc_identifier() + "</dc:Identifier>\n");
os.write("<dc:Creator><![CDATA[" + this.dc_creator() + "]]></dc:Creator>\n");
os.write("<dc:Description><![CDATA[");
byte[] buffer = new byte[1000];
int c = 0;

Loading…
Cancel
Save