pull/1/head
sixcooler 13 years ago
commit 3b70ff7046

@ -26,21 +26,21 @@ public class Table_YMark_p {
prop.put("showtable", 0);
prop.put("showedit", 0);
prop.put("showselection", 0);
String table = (post == null) ? "admin_bookmarks" : post.get("table", "admin_bookmarks");
if (table != null && !sb.tables.hasHeap(table)) table = null;
// get the user name for the selected table
String bmk_user = null;
if (table != null)
bmk_user = table.substring(0,table.indexOf('_'));
// currently selected table
prop.put("showselection_table", table);
// show table selection
int count = 0;
Iterator<String> ti = sb.tables.tables();
final Iterator<String> ti = sb.tables.tables();
String tablename;
prop.put("showselection", 1);
while (ti.hasNext()) {
@ -55,7 +55,7 @@ public class Table_YMark_p {
prop.put("showselection_pattern", "");
if (post == null) return prop; // return rewrite properties
// get available tags and folders
count = 0;
/*
@ -89,13 +89,13 @@ public class Table_YMark_p {
Log.logException(e);
}
*/
final String counts = post.get("count", null);
int maxcount = (counts == null || counts.equals("all")) ? Integer.MAX_VALUE : post.getInt("count", 10);
String pattern = post.get("search", "");
Pattern matcher = (pattern.isEmpty() || pattern.equals(".*")) ? null : Pattern.compile(".*" + pattern + ".*");
final String pattern = post.get("search", "");
final Pattern matcher = (pattern.isEmpty() || pattern.equals(".*")) ? null : Pattern.compile(".*" + pattern + ".*");
prop.put("pattern", pattern);
List<String> columns = new ArrayList<String>();
for (final Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getKey().startsWith("col_")) {
@ -104,10 +104,10 @@ public class Table_YMark_p {
}
if (columns.isEmpty() && table != null) try {
columns = sb.tables.columns(table);
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
count = 0;
if (table != null) {
Iterator<String> cit;
@ -120,22 +120,20 @@ public class Table_YMark_p {
prop.put("showselection_columns_" + count + "_checked", columns.contains(col) ? 1 : 0);
count++;
}
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
}
prop.put("showselection_columns", count);
// apply deletion requests
if (!post.get("deletetable", "").isEmpty()) try {
if (!post.get("deletetable", "").isEmpty()) {
sb.tables.clear(table);
sb.tables.clear(YMarkTables.TABLES.FOLDERS.tablename(bmk_user));
sb.tables.clear(YMarkTables.TABLES.TAGS.tablename(bmk_user));
} catch (IOException e) {
Log.logException(e);
}
// apply rebuildIndex request
/*
if (!post.get("rebuildindex", "").isEmpty()) try {
@ -145,19 +143,19 @@ public class Table_YMark_p {
Log.logException(e);
}
*/
if (!post.get("deleterows", "").isEmpty()) {
for (final Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getValue().startsWith("mark_")) try {
sb.tables.bookmarks.deleteBookmark(bmk_user, entry.getValue().substring(5).getBytes());
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
Log.logException(e);
}
}
}
if (!post.get("commitrow", "").isEmpty()) {
final YMarkEntry bmk = new YMarkEntry();
for (final Map.Entry<String, String> entry: post.entrySet()) {
@ -167,19 +165,19 @@ public class Table_YMark_p {
}
try {
sb.tables.bookmarks.addBookmark(bmk_user, bmk, false, false);
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
Log.logException(e);
}
}
// generate table
prop.put("showtable", 0);
prop.put("showedit", 0);
if (table != null) {
if (post.containsKey("editrow")) {
// check if we can find a key
String pk = null;
@ -193,46 +191,46 @@ public class Table_YMark_p {
if (pk != null && sb.tables.has(table, pk.getBytes())) {
setEdit(sb, prop, table, pk, columns);
}
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
Log.logException(e);
}
} else if (post.containsKey("addrow")) try {
// get a new key
final String pk = UTF8.String(sb.tables.createRow(table));
setEdit(sb, prop, table, pk, columns);
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
Log.logException(e);
} else {
prop.put("showtable", 1);
prop.put("showtable_table", table);
try {
prop.put("showtable_bmksize", sb.tables.size(table));
prop.put("showtable_tagsize", sb.tables.size(YMarkTables.TABLES.TAGS.tablename(bmk_user)));
prop.put("showtable_foldersize", sb.tables.size(YMarkTables.TABLES.FOLDERS.tablename(bmk_user)));
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
prop.put("showtable_bmksize", 0);
prop.put("showtable_tagsize", 0);
prop.put("showtable_foldersize", 0);
}
// insert the columns
for (int i = 0; i < columns.size(); i++) {
prop.putHTML("showtable_columns_" + i + "_header", columns.get(i));
}
prop.put("showtable_columns", columns.size());
// insert all rows
try {
maxcount = Math.min(maxcount, sb.tables.size(table));
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
maxcount = 0;
}
@ -244,19 +242,19 @@ public class Table_YMark_p {
mapIterator = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, post.get("folders"));
} else if(post.containsKey("tags") && !post.get("tags").isEmpty()) {
// mapIterator = sb.tables.orderByPK(sb.tables.bookmarks.tags.getBookmarks(bmk_user, post.get("tags")), maxcount).iterator();
final String[] tagArray = YMarkUtil.cleanTagsString(post.get(YMarkEntry.BOOKMARK.TAGS.key())).split(YMarkUtil.TAGS_SEPARATOR);
final String[] tagArray = YMarkUtil.cleanTagsString(post.get(YMarkEntry.BOOKMARK.TAGS.key())).split(YMarkUtil.TAGS_SEPARATOR);
mapIterator = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagArray);
} else {
mapIterator = sb.tables.orderByPK(sb.tables.iterator(table, matcher), maxcount).iterator();
}
Tables.Row row;
boolean dark = true;
byte[] cell;
while (mapIterator.hasNext() && count < maxcount) {
row = mapIterator.next();
if (row == null) continue;
// write table content
prop.put("showtable_list_" + count + "_dark", ((dark) ? 1 : 0) ); dark=!dark;
prop.put("showtable_list_" + count + "_pk", UTF8.String(row.getPK()));
@ -268,27 +266,27 @@ public class Table_YMark_p {
prop.put("showtable_list_" + count + "_columns", columns.size());
count++;
}
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
prop.put("showtable_list", count);
prop.put("showtable_num", count);
}
}
// adding the peer address
prop.put("address", sb.peers.mySeed().getPublicAddress());
// return rewrite properties
return prop;
}
private static void setEdit(final Switchboard sb, final serverObjects prop, final String table, final String pk, List<String> columns) throws IOException, RowSpaceExceededException {
private static void setEdit(final Switchboard sb, final serverObjects prop, final String table, final String pk, final List<String> columns) throws IOException, RowSpaceExceededException {
prop.put("showedit", 1);
prop.put("showedit_table", table);
prop.put("showedit_pk", pk);
Tables.Row row = sb.tables.select(table, pk.getBytes());
final Tables.Row row = sb.tables.select(table, pk.getBytes());
if (row == null) return;
int count = 0;
byte[] cell;

@ -31,12 +31,11 @@ import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Tables_p {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
@ -44,10 +43,10 @@ public class Tables_p {
prop.put("showtable", 0);
prop.put("showedit", 0);
prop.put("showselection", 0);
String table = (post == null) ? null : post.get("table", null);
if (table != null && !sb.tables.hasHeap(table)) table = null;
// show table selection
int count = 0;
final Iterator<String> ti = sb.tables.tables();
@ -63,38 +62,35 @@ public class Tables_p {
prop.put("showselection_pattern", "");
if (post == null) return prop; // return rewrite properties
final String counts = post.get("count", null);
int maxcount = (counts == null || counts.equals("all")) ? Integer.MAX_VALUE : post.getInt("count", 10);
final String pattern = post.get("search", "");
final Pattern matcher = (pattern.length() == 0 || pattern.equals(".*")) ? null : Pattern.compile(".*" + pattern + ".*");
prop.put("pattern", pattern);
List<String> columns = null;
if (table != null) try {
columns = sb.tables.columns(table);
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
columns = new ArrayList<String>();
}
// apply deletion requests
if (post.get("deletetable", "").length() > 0) try {
if (post.get("deletetable", "").length() > 0)
sb.tables.clear(table);
} catch (IOException e) {
Log.logException(e);
}
if (post.get("deleterows", "").length() > 0) {
for (final Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getValue().startsWith("mark_")) try {
sb.tables.delete(table, entry.getValue().substring(5).getBytes());
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
}
}
if (post.get("commitrow", "").length() > 0) {
final String pk = post.get("pk");
final Map<String, byte[]> map = new HashMap<String, byte[]>();
@ -105,17 +101,17 @@ public class Tables_p {
}
try {
sb.tables.update(table, pk.getBytes(), map);
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
}
// generate table
prop.put("showtable", 0);
prop.put("showedit", 0);
if (table != null) {
if (post.containsKey("editrow")) {
// check if we can find a key
String pk = null;
@ -129,34 +125,34 @@ public class Tables_p {
if (pk != null && sb.tables.has(table, pk.getBytes())) {
setEdit(sb, prop, table, pk, columns);
}
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
Log.logException(e);
}
} else if (post.containsKey("addrow")) try {
// get a new key
final String pk = UTF8.String(sb.tables.createRow(table));
setEdit(sb, prop, table, pk, columns);
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
Log.logException(e);
} else {
prop.put("showtable", 1);
prop.put("showtable_table", table);
// insert the columns
for (int i = 0; i < columns.size(); i++) {
prop.putHTML("showtable_columns_" + i + "_header", columns.get(i));
}
prop.put("showtable_columns", columns.size());
// insert all rows
try {
maxcount = Math.min(maxcount, sb.tables.size(table));
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
maxcount = 0;
}
@ -170,7 +166,7 @@ public class Tables_p {
while (mapIterator.hasNext() && count < maxcount) {
row = mapIterator.next();
if (row == null) continue;
// write table content
prop.put("showtable_list_" + count + "_dark", ((dark) ? 1 : 0) ); dark=!dark;
prop.put("showtable_list_" + count + "_pk", UTF8.String(row.getPK()));
@ -182,23 +178,23 @@ public class Tables_p {
prop.put("showtable_list_" + count + "_columns", columns.size());
count++;
}
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
prop.put("showtable_list", count);
prop.put("showtable_num", count);
}
}
// adding the peer address
prop.put("address", sb.peers.mySeed().getPublicAddress());
// return rewrite properties
return prop;
}
private static void setEdit(final Switchboard sb, final serverObjects prop, final String table, final String pk, List<String> columns) throws IOException, RowSpaceExceededException {
private static void setEdit(final Switchboard sb, final serverObjects prop, final String table, final String pk, final List<String> columns) throws IOException, RowSpaceExceededException {
prop.put("showedit", 1);
prop.put("showedit_table", table);
prop.put("showedit_pk", pk);

@ -1,4 +1,4 @@
//plasmaCrawlRobotsTxt.java
//plasmaCrawlRobotsTxt.java
//-------------------------------------
//part of YACY
//(C) by Michael Peter Christen; mc@yacy.net
@ -35,8 +35,6 @@ import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
@ -47,79 +45,81 @@ import net.yacy.kelondro.blob.BEncodedHeap;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.io.ByteCount;
import org.apache.log4j.Logger;
public class RobotsTxt {
private static Logger log = Logger.getLogger(RobotsTxt.class);
protected static final String ROBOTS_DB_PATH_SEPARATOR = ";";
protected static final Pattern ROBOTS_DB_PATH_SEPARATOR_MATCHER = Pattern.compile(ROBOTS_DB_PATH_SEPARATOR);
BEncodedHeap robotsTable;
private final BEncodedHeap robotsTable;
private final ConcurrentHashMap<String, DomSync> syncObjects;
//private static final HashSet<String> loadedRobots = new HashSet<String>(); // only for debugging
private static class DomSync {
private DomSync() {}
}
public RobotsTxt(final BEncodedHeap robotsTable) {
this.robotsTable = robotsTable;
syncObjects = new ConcurrentHashMap<String, DomSync>();
this.syncObjects = new ConcurrentHashMap<String, DomSync>();
log.info("initiated robots table: " + robotsTable.getFile());
}
public void clear() {
log.info("clearing robots table");
this.robotsTable.clear();
syncObjects.clear();
this.syncObjects.clear();
}
public int size() {
return this.robotsTable.size();
}
public RobotsTxtEntry getEntry(final MultiProtocolURI theURL, final Set<String> thisAgents) throws IOException {
if (theURL == null) throw new IllegalArgumentException();
if (!theURL.getProtocol().startsWith("http")) return null;
return getEntry(theURL, thisAgents, true);
}
private RobotsTxtEntry getEntry(final MultiProtocolURI theURL, final Set<String> thisAgents, final boolean fetchOnlineIfNotAvailableOrNotFresh) throws IOException {
// this method will always return a non-null value
String urlHostPort = getHostPort(theURL);
final String urlHostPort = getHostPort(theURL);
RobotsTxtEntry robotsTxt4Host = null;
Map<String, byte[]> record;
try {
record = this.robotsTable.get(this.robotsTable.encodedKey(urlHostPort));
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
log.warn("memory exhausted", e);
record = null;
}
if (record != null) robotsTxt4Host = new RobotsTxtEntry(urlHostPort, record);
if (fetchOnlineIfNotAvailableOrNotFresh && (
robotsTxt4Host == null ||
robotsTxt4Host == null ||
robotsTxt4Host.getLoadedDate() == null ||
System.currentTimeMillis() - robotsTxt4Host.getLoadedDate().getTime() > 7*24*60*60*1000
)) {
// make or get a synchronization object
DomSync syncObj = this.syncObjects.get(urlHostPort);
if (syncObj == null) {
syncObj = new DomSync();
this.syncObjects.put(urlHostPort, syncObj);
}
// we can now synchronize for each host separately
synchronized (syncObj) {
// if we have not found any data or the data is older than 7 days, we need to load it from the remote server
// check the robots table again for all threads that come here because they waited for another one
// to complete a download
try {
record = this.robotsTable.get(this.robotsTable.encodedKey(urlHostPort));
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
log.warn("memory exhausted", e);
record = null;
}
@ -129,16 +129,16 @@ public class RobotsTxt {
System.currentTimeMillis() - robotsTxt4Host.getLoadedDate().getTime() <= 1*24*60*60*1000) {
return robotsTxt4Host;
}
// generating the proper url to download the robots txt
MultiProtocolURI robotsURL = null;
try {
try {
robotsURL = new MultiProtocolURI("http://" + urlHostPort + "/robots.txt");
} catch (final MalformedURLException e) {
log.fatal("Unable to generate robots.txt URL for host:port '" + urlHostPort + "'.", e);
robotsURL = null;
}
Object[] result = null;
if (robotsURL != null) {
if (log.isDebugEnabled()) log.debug("Trying to download the robots.txt file from URL '" + robotsURL + "'.");
@ -155,15 +155,15 @@ public class RobotsTxt {
", robotsTxt4Host=" + ((robotsTxt4Host == null) ? "NULL" : robotsTxt4Host.getLoadedDate().toString());
loadedRobots.add(robotsURL.toNormalform(false, false));
*/
if (result == null) {
// no robots.txt available, make an entry to prevent that the robots loading is done twice
if (robotsTxt4Host == null) {
// generate artificial entry
robotsTxt4Host = new RobotsTxtEntry(
robotsURL,
new ArrayList<String>(),
new ArrayList<String>(),
robotsURL,
new ArrayList<String>(),
new ArrayList<String>(),
new Date(),
new Date(),
null,
@ -173,13 +173,13 @@ public class RobotsTxt {
} else {
robotsTxt4Host.setLoadedDate(new Date());
}
// store the data into the robots DB
int sz = this.robotsTable.size();
final int sz = this.robotsTable.size();
addEntry(robotsTxt4Host);
if (this.robotsTable.size() <= sz) {
log.fatal("new entry in robots.txt table failed, resetting database");
this.clear();
clear();
addEntry(robotsTxt4Host);
}
} else {
@ -189,7 +189,7 @@ public class RobotsTxt {
denyPath = new ArrayList<String>();
denyPath.add("/");
}
// store the data into the robots DB
robotsTxt4Host = addEntry(
robotsURL,
@ -207,14 +207,14 @@ public class RobotsTxt {
return robotsTxt4Host;
}
private RobotsTxtEntry addEntry(
final MultiProtocolURI theURL,
final ArrayList<String> allowPathList,
final ArrayList<String> denyPathList,
final Date loadedDate,
final Date modDate,
final String eTag,
final MultiProtocolURI theURL,
final ArrayList<String> allowPathList,
final ArrayList<String> denyPathList,
final Date loadedDate,
final Date modDate,
final String eTag,
final String sitemap,
final long crawlDelayMillis,
final String agentName
@ -226,7 +226,7 @@ public class RobotsTxt {
addEntry(entry);
return entry;
}
private String addEntry(final RobotsTxtEntry entry) {
// writes a new page and returns key
try {
@ -236,24 +236,24 @@ public class RobotsTxt {
log.warn("cannot write robots.txt entry", e);
return null;
}
}
}
// methods that had been in robotsParser.java:
private static final int DOWNLOAD_ACCESS_RESTRICTED = 0;
private static final int DOWNLOAD_ROBOTS_TXT = 1;
private static final int DOWNLOAD_ETAG = 2;
private static final int DOWNLOAD_MODDATE = 3;
static final String getHostPort(final MultiProtocolURI theURL) {
String urlHostPort = null;
final int port = getPort(theURL);
urlHostPort = theURL.getHost() + ":" + port;
urlHostPort = urlHostPort.toLowerCase().intern();
urlHostPort = urlHostPort.toLowerCase().intern();
return urlHostPort;
}
private static final int getPort(final MultiProtocolURI theURL) {
int port = theURL.getPort();
if (port == -1) {
@ -262,41 +262,41 @@ public class RobotsTxt {
} else if (theURL.getProtocol().equalsIgnoreCase("https")) {
port = 443;
}
}
return port;
}
private static Object[] downloadRobotsTxt(final MultiProtocolURI robotsURL, int redirectionCount, final RobotsTxtEntry entry) throws Exception {
if (robotsURL == null || !robotsURL.getProtocol().startsWith("http")) return null;
if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null};
redirectionCount--;
boolean accessCompletelyRestricted = false;
byte[] robotsTxt = null;
long downloadStart, downloadEnd;
String eTag=null, oldEtag = null;
Date lastMod=null;
downloadStart = System.currentTimeMillis();
// if we previously have downloaded this robots.txt then we can set the if-modified-since header
RequestHeader reqHeaders = new RequestHeader();
// add yacybot user agent
reqHeaders.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
// adding referer
reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true));
if (entry != null) {
oldEtag = entry.getETag();
reqHeaders = new RequestHeader();
final Date modDate = entry.getModDate();
if (modDate != null) reqHeaders.put(RequestHeader.IF_MODIFIED_SINCE, HeaderFramework.formatRFC1123(entry.getModDate()));
}
// setup http-client
//TODO: adding Traffic statistic for robots download?
final HTTPClient client = new HTTPClient();
@ -304,7 +304,7 @@ public class RobotsTxt {
try {
// check for interruption
if (Thread.currentThread().isInterrupted()) throw new InterruptedException("Shutdown in progress.");
// sending the get request
robotsTxt = client.GETbytes(robotsURL);
// statistics:
@ -313,7 +313,7 @@ public class RobotsTxt {
}
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
// check the response status
if (code > 199 && code < 300) {
if (!header.mime().startsWith("text/plain")) {
@ -324,15 +324,15 @@ public class RobotsTxt {
// getting some metadata
eTag = header.containsKey(HeaderFramework.ETAG)?(header.get(HeaderFramework.ETAG)).trim():null;
lastMod = header.lastModified();
// if the robots.txt file was not changed we break here
if ((eTag != null) && (oldEtag != null) && (eTag.equals(oldEtag))) {
if (log.isDebugEnabled()) log.debug("Robots.txt from URL '" + robotsURL + "' was not modified. Abort downloading of new version.");
return null;
}
downloadEnd = System.currentTimeMillis();
downloadEnd = System.currentTimeMillis();
if (log.isDebugEnabled()) log.debug("Robots.txt successfully loaded from URL '" + robotsURL + "' in " + (downloadEnd-downloadStart) + " ms.");
}
} else if (code == 304) {
@ -343,16 +343,16 @@ public class RobotsTxt {
if (redirectionUrlString==null) {
if (log.isDebugEnabled())
log.debug("robots.txt could not be downloaded from URL '" + robotsURL + "' because of missing redirecton header. [" + client.getHttpResponse().getStatusLine() + "].");
robotsTxt = null;
robotsTxt = null;
} else {
redirectionUrlString = redirectionUrlString.trim();
// generating the new URL object
final MultiProtocolURI redirectionUrl = MultiProtocolURI.newURL(robotsURL, redirectionUrlString);
final MultiProtocolURI redirectionUrl = MultiProtocolURI.newURL(robotsURL, redirectionUrlString);
// following the redirection
if (log.isDebugEnabled()) log.debug("Redirection detected for robots.txt with URL '" + robotsURL + "'." +
if (log.isDebugEnabled()) log.debug("Redirection detected for robots.txt with URL '" + robotsURL + "'." +
"\nRedirecting request to: " + redirectionUrl);
return downloadRobotsTxt(redirectionUrl,redirectionCount,entry);
}
@ -363,7 +363,7 @@ public class RobotsTxt {
if (log.isDebugEnabled())
log.debug("robots.txt could not be downloaded from URL '" + robotsURL + "'. [" + client.getHttpResponse().getStatusLine() + "].");
robotsTxt = null;
}
}
} catch (final Exception e) {
throw e;
}

@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -52,13 +52,13 @@ import net.yacy.kelondro.util.LookAheadIterator;
public class Tables {
private static final String suffix = ".bheap";
private static final String system_table_pkcounter = "pkcounter";
private static final String system_table_pkcounter_counterName = "pk";
private File location;
private ConcurrentHashMap<String, BEncodedHeap> tables;
private final File location;
private final ConcurrentHashMap<String, BEncodedHeap> tables;
int keymaxlen;
// use our own formatter to prevent concurrency locks with other processes
@ -69,7 +69,7 @@ public class Tables {
if (!this.location.exists()) this.location.mkdirs();
this.keymaxlen = keymaxlen;
this.tables = new ConcurrentHashMap<String, BEncodedHeap>();
String[] files = this.location.list();
final String[] files = this.location.list();
String tablename;
File file;
for (final String f: files) {
@ -82,16 +82,16 @@ public class Tables {
tablename = f.substring(0, f.length() - suffix.length());
try {
getHeap(tablename);
} catch (IOException e) {
} catch (final IOException e) {
}
}
}
}
public Iterator<String> tables() {
return this.tables.keySet().iterator();
}
public void close(final String tablename) {
final BEncodedHeap heap = this.tables.remove(tablename);
if (heap == null) return;
@ -103,30 +103,33 @@ public class Tables {
this.tables.clear();
}
public void clear(final String tablename) throws IOException {
BEncodedHeap heap = getHeap(tablename);
if (heap == null) return;
heap.clear();
File f = heap.getFile();
heap.close();
public void clear(final String tablename) {
BEncodedHeap heap = null;
try {
heap = getHeap(tablename);
if (heap != null) heap.clear();
heap.close();
} catch (final IOException e) {
}
heap = null;
final File f = new File(this.location, tablename + suffix);
this.tables.remove(tablename);
FileUtils.deletedelete(f);
}
public boolean hasHeap(final String tablename) {
try {
return getHeap(tablename) != null;
} catch (IOException e) {
} catch (final IOException e) {
return false;
}
}
public BEncodedHeap getHeap(final String tablename) throws IOException {
final String table = tablename + suffix;
BEncodedHeap heap = this.tables.get(tablename);
if (heap != null) return heap;
// open a new heap and register it in the tables
final File heapf = new File(this.location, table);
heap = new BEncodedHeap(heapf, this.keymaxlen);
@ -134,12 +137,12 @@ public class Tables {
return heap;
}
public int size(String table) throws IOException {
BEncodedHeap heap = getHeap(table);
public int size(final String table) throws IOException {
final BEncodedHeap heap = getHeap(table);
return heap.size();
}
private byte[] ukey(String tablename) throws IOException, RowSpaceExceededException {
private byte[] ukey(final String tablename) throws IOException, RowSpaceExceededException {
Row row = select(system_table_pkcounter, UTF8.getBytes(tablename));
if (row == null) {
// table counter entry in pkcounter table does not exist: make a new table entry
@ -160,110 +163,110 @@ public class Tables {
}
return pk;
}
private String int2key(int i) {
StringBuilder sb = new StringBuilder(this.keymaxlen);
String is = Integer.toString(i);
private String int2key(final int i) {
final StringBuilder sb = new StringBuilder(this.keymaxlen);
final String is = Integer.toString(i);
for (int j = 0; j < this.keymaxlen - is.length(); j++) sb.append('0');
sb.append(is);
return sb.toString();
}
/**
* insert a map into a table using a new unique key
* @param tablename
* @param map
* @throws RowSpaceExceededException
* @throws IOException
* @throws RowSpaceExceededException
* @throws RowSpaceExceededException
*/
public byte[] insert(final String tablename, Map<String, byte[]> map) throws IOException, RowSpaceExceededException {
byte[] uk = ukey(tablename);
public byte[] insert(final String tablename, final Map<String, byte[]> map) throws IOException, RowSpaceExceededException {
final byte[] uk = ukey(tablename);
update(tablename, uk, map);
BEncodedHeap heap = getHeap(system_table_pkcounter);
final BEncodedHeap heap = getHeap(system_table_pkcounter);
heap.insert(UTF8.getBytes(tablename), system_table_pkcounter_counterName, uk);
return uk;
}
public void insert(final String table, byte[] pk, Map<String, byte[]> map) throws IOException {
BEncodedHeap heap = getHeap(table);
public void insert(final String table, final byte[] pk, final Map<String, byte[]> map) throws IOException {
final BEncodedHeap heap = getHeap(table);
try {
heap.insert(pk, map);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
throw new IOException(e.getMessage());
}
}
public void insert(final String table, Row row) throws IOException {
BEncodedHeap heap = getHeap(table);
public void insert(final String table, final Row row) throws IOException {
final BEncodedHeap heap = getHeap(table);
try {
heap.insert(row.pk, row);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
throw new IOException(e.getMessage());
}
}
public void update(final String table, byte[] pk, Map<String, byte[]> map) throws IOException {
BEncodedHeap heap = getHeap(table);
public void update(final String table, final byte[] pk, final Map<String, byte[]> map) throws IOException {
final BEncodedHeap heap = getHeap(table);
try {
heap.update(pk, map);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
throw new IOException(e.getMessage());
}
}
public void update(final String table, Row row) throws IOException {
BEncodedHeap heap = getHeap(table);
public void update(final String table, final Row row) throws IOException {
final BEncodedHeap heap = getHeap(table);
try {
heap.update(row.pk, row);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
throw new IOException(e.getMessage());
}
}
public byte[] createRow(String table) throws IOException, RowSpaceExceededException {
public byte[] createRow(final String table) throws IOException, RowSpaceExceededException {
return this.insert(table, new ConcurrentHashMap<String, byte[]>());
}
public Row select(final String table, byte[] pk) throws IOException, RowSpaceExceededException {
BEncodedHeap heap = getHeap(table);
public Row select(final String table, final byte[] pk) throws IOException, RowSpaceExceededException {
final BEncodedHeap heap = getHeap(table);
if (heap.containsKey(pk)) return new Row(pk, heap.get(pk));
return null;
}
public void delete(final String table, byte[] pk) throws IOException {
BEncodedHeap heap = getHeap(table);
public void delete(final String table, final byte[] pk) throws IOException {
final BEncodedHeap heap = getHeap(table);
heap.delete(pk);
}
public boolean has(String table, byte[] key) throws IOException {
BEncodedHeap heap = getHeap(table);
public boolean has(final String table, final byte[] key) throws IOException {
final BEncodedHeap heap = getHeap(table);
return heap.containsKey(key);
}
public Iterator<byte[]> keys(String table) throws IOException {
BEncodedHeap heap = getHeap(table);
public Iterator<byte[]> keys(final String table) throws IOException {
final BEncodedHeap heap = getHeap(table);
return heap.keys();
}
public Iterator<Row> iterator(String table) throws IOException {
public Iterator<Row> iterator(final String table) throws IOException {
return new RowIterator(table);
}
public Iterator<Row> iterator(String table, String whereColumn, byte[] whereValue) throws IOException {
public Iterator<Row> iterator(final String table, final String whereColumn, final byte[] whereValue) throws IOException {
return new RowIterator(table, whereColumn, whereValue);
}
public Iterator<Row> iterator(String table, String whereColumn, Pattern wherePattern) throws IOException {
public Iterator<Row> iterator(final String table, final String whereColumn, final Pattern wherePattern) throws IOException {
return new RowIterator(table, whereColumn, wherePattern);
}
public Iterator<Row> iterator(String table, Pattern wherePattern) throws IOException {
public Iterator<Row> iterator(final String table, final Pattern wherePattern) throws IOException {
return new RowIterator(table, wherePattern);
}
public Collection<Row> orderByPK(Iterator<Row> rowIterator, int maxcount) {
TreeMap<String, Row> sortTree = new TreeMap<String, Row>();
public Collection<Row> orderByPK(final Iterator<Row> rowIterator, int maxcount) {
final TreeMap<String, Row> sortTree = new TreeMap<String, Row>();
Row row;
while ((maxcount < 0 || maxcount-- > 0) && rowIterator.hasNext()) {
row = rowIterator.next();
@ -271,9 +274,9 @@ public class Tables {
}
return sortTree.values();
}
public Collection<Row> orderBy(Iterator<Row> rowIterator, int maxcount, String sortColumn) {
TreeMap<String, Row> sortTree = new TreeMap<String, Row>();
public Collection<Row> orderBy(final Iterator<Row> rowIterator, int maxcount, final String sortColumn) {
final TreeMap<String, Row> sortTree = new TreeMap<String, Row>();
Row row;
byte[] r;
while ((maxcount < 0 || maxcount-- > 0) && rowIterator.hasNext()) {
@ -287,9 +290,9 @@ public class Tables {
}
return sortTree.values();
}
public ArrayList<String> columns(String table) throws IOException {
BEncodedHeap heap = getHeap(table);
public ArrayList<String> columns(final String table) throws IOException {
final BEncodedHeap heap = getHeap(table);
return heap.columns();
}
@ -299,20 +302,20 @@ public class Tables {
private final byte[] whereValue;
private final Pattern wherePattern;
private final Iterator<Map.Entry<byte[], Map<String, byte[]>>> i;
/**
* iterator that iterates all elements in the given table
* @param table
* @throws IOException
*/
public RowIterator(String table) throws IOException {
public RowIterator(final String table) throws IOException {
this.whereColumn = null;
this.whereValue = null;
this.wherePattern = null;
BEncodedHeap heap = getHeap(table);
i = heap.iterator();
final BEncodedHeap heap = getHeap(table);
this.i = heap.iterator();
}
/**
* iterator that iterates all elements in the given table
* where a given column is equal to a given value
@ -321,15 +324,15 @@ public class Tables {
* @param whereValue
* @throws IOException
*/
public RowIterator(String table, String whereColumn, byte[] whereValue) throws IOException {
public RowIterator(final String table, final String whereColumn, final byte[] whereValue) throws IOException {
assert whereColumn != null || whereValue == null;
this.whereColumn = whereColumn;
this.whereValue = whereValue;
this.wherePattern = null;
BEncodedHeap heap = getHeap(table);
i = heap.iterator();
final BEncodedHeap heap = getHeap(table);
this.i = heap.iterator();
}
/**
* iterator that iterates all elements in the given table
* where a given column matches with a given value
@ -338,14 +341,14 @@ public class Tables {
* @param wherePattern
* @throws IOException
*/
public RowIterator(String table, String whereColumn, Pattern wherePattern) throws IOException {
public RowIterator(final String table, final String whereColumn, final Pattern wherePattern) throws IOException {
this.whereColumn = whereColumn;
this.whereValue = null;
this.wherePattern = wherePattern;
BEncodedHeap heap = getHeap(table);
i = heap.iterator();
final BEncodedHeap heap = getHeap(table);
this.i = heap.iterator();
}
/**
* iterator that iterates all elements in the given table
* where any column matches with a given value
@ -353,18 +356,18 @@ public class Tables {
* @param pattern
* @throws IOException
*/
public RowIterator(String table, Pattern pattern) throws IOException {
public RowIterator(final String table, final Pattern pattern) throws IOException {
this.whereColumn = null;
this.whereValue = null;
this.wherePattern = pattern;
BEncodedHeap heap = getHeap(table);
i = heap.iterator();
final BEncodedHeap heap = getHeap(table);
this.i = heap.iterator();
}
protected Row next0() {
Row r;
while (i.hasNext()) {
r = new Row(i.next());
while (this.i.hasNext()) {
r = new Row(this.i.next());
if (this.whereValue != null) {
if (ByteBuffer.equals(r.get(this.whereColumn), this.whereValue)) return r;
} else if (this.wherePattern != null) {
@ -379,90 +382,90 @@ public class Tables {
}
} else {
return r;
}
}
}
return null;
}
}
public static class Data extends LinkedHashMap<String, byte[]> {
private static final long serialVersionUID = 978426054043749337L;
public Data() {
super();
}
private Data(final Map<String, byte[]> map) {
super();
assert map != null;
this.putAll(map);
putAll(map);
}
public void put(String colname, String value) {
public void put(final String colname, final String value) {
super.put(colname, UTF8.getBytes(value));
}
public void put(String colname, int value) {
public void put(final String colname, final int value) {
super.put(colname, UTF8.getBytes(Integer.toString(value)));
}
public void put(String colname, long value) {
public void put(final String colname, final long value) {
super.put(colname, UTF8.getBytes(Long.toString(value)));
}
public void put(String colname, Date value) {
public void put(final String colname, final Date value) {
super.put(colname, UTF8.getBytes(my_SHORT_MILSEC_FORMATTER.format(value)));
}
public byte[] get(String colname, byte[] dflt) {
byte[] r = this.get(colname);
public byte[] get(final String colname, final byte[] dflt) {
final byte[] r = this.get(colname);
if (r == null) return dflt;
return r;
}
public String get(String colname, String dflt) {
byte[] r = this.get(colname);
public String get(final String colname, final String dflt) {
final byte[] r = this.get(colname);
if (r == null) return dflt;
return UTF8.String(r);
}
public int get(String colname, int dflt) {
byte[] r = this.get(colname);
public int get(final String colname, final int dflt) {
final byte[] r = this.get(colname);
if (r == null) return dflt;
try {
return (int) ByteArray.parseDecimal(r);
} catch (NumberFormatException e) {
} catch (final NumberFormatException e) {
return dflt;
}
}
public long get(String colname, long dflt) {
byte[] r = this.get(colname);
public long get(final String colname, final long dflt) {
final byte[] r = this.get(colname);
if (r == null) return dflt;
try {
return ByteArray.parseDecimal(r);
} catch (NumberFormatException e) {
} catch (final NumberFormatException e) {
return dflt;
}
}
public Date get(String colname, Date dflt) {
byte[] r = this.get(colname);
public Date get(final String colname, final Date dflt) {
final byte[] r = this.get(colname);
if (r == null) return dflt;
try {
return my_SHORT_MILSEC_FORMATTER.parse(UTF8.String(r));
} catch (ParseException e) {
} catch (final ParseException e) {
return dflt;
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(this.size() * 40);
final StringBuilder sb = new StringBuilder(this.size() * 40);
sb.append('{');
for (final Map.Entry<String, byte[]> entry: this.entrySet()) {
for (final Map.Entry<String, byte[]> entry: entrySet()) {
sb.append(entry.getKey()).append('=').append(UTF8.String(entry.getValue())).append(", ");
}
if (sb.length() > 1) sb.setLength(sb.length() - 2);
@ -470,13 +473,13 @@ public class Tables {
return sb.toString();
}
}
public class Row extends Data {
private static final long serialVersionUID = 978426054043749338L;
private final byte[] pk;
private Row(final Map.Entry<byte[], Map<String, byte[]>> entry) {
super(entry.getValue());
assert entry != null;
@ -484,43 +487,43 @@ public class Tables {
assert entry.getValue() != null;
this.pk = entry.getKey();
}
private Row(final byte[] pk, final Map<String, byte[]> map) {
super(map);
assert pk != null;
assert map != null;
this.pk = pk;
}
private Row(final byte[] pk, String k0, byte[] v0) {
private Row(final byte[] pk, final String k0, final byte[] v0) {
super();
assert k0 != null;
assert v0 != null;
this.put(k0, v0);
this.pk = pk;
}
public byte[] getPK() {
return this.pk;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(keymaxlen + 20 * this.size());
sb.append(UTF8.String(pk)).append(":").append(super.toString());
final StringBuilder sb = new StringBuilder(Tables.this.keymaxlen + 20 * this.size());
sb.append(UTF8.String(this.pk)).append(":").append(super.toString());
return sb.toString();
}
}
public static void main(String[] args) {
public static void main(final String[] args) {
// test the class
File f = new File(new File("maptest").getAbsolutePath());
final File f = new File(new File("maptest").getAbsolutePath());
// System.out.println(f.getAbsolutePath());
// System.out.println(f.getParent());
try {
Tables map = new Tables(f.getParentFile(), 4);
final Tables map = new Tables(f.getParentFile(), 4);
// put some values into the map
Map<String, byte[]> m = new HashMap<String, byte[]>();
final Map<String, byte[]> m = new HashMap<String, byte[]>();
m.put("k", "000".getBytes());
map.update("testdao", "123".getBytes(), m);
m.put("k", "111".getBytes());
@ -528,13 +531,13 @@ public class Tables {
m.put("k", "222".getBytes());
map.update("testdao", "789".getBytes(), m);
// iterate over keys
Iterator<Row> i = map.iterator("testdao");
final Iterator<Row> i = map.iterator("testdao");
while (i.hasNext()) {
System.out.println(i.next().toString());
}
// clean up
map.close();
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
}

@ -535,6 +535,7 @@ public final class FileUtils {
key = escaped_backslash.matcher(key).replaceAll("\\");
String value = escaped_newline.matcher(line.substring(pos + 1).trim()).replaceAll("\n");
value = value.replace("\\\\", "\\"); // does not work: escaped_backslashbackslash.matcher(value).replaceAll("\\");
//System.out.println("key = " + key + ", value = " + value);
props.put(key, value);
}
}

@ -91,9 +91,9 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.TimeoutRequest;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.protocol.http.ProxySettings;
import net.yacy.cora.services.federated.solr.SolrScheme;
import net.yacy.cora.services.federated.solr.SolrShardingConnector;
import net.yacy.cora.services.federated.solr.SolrShardingSelection;
import net.yacy.cora.services.federated.solr.SolrScheme;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
@ -105,6 +105,7 @@ import net.yacy.document.content.SurrogateReader;
import net.yacy.document.importer.OAIListFriendsLoader;
import net.yacy.document.parser.html.Evaluation;
import net.yacy.gui.Tray;
import net.yacy.kelondro.blob.BEncodedHeap;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -512,7 +513,14 @@ public final class Switchboard extends serverSwitch {
// load the robots.txt db
this.log.logConfig("Initializing robots.txt DB");
this.robots = new RobotsTxt(this.tables.getHeap(WorkTables.TABLE_ROBOTS_NAME));
try {
final BEncodedHeap robotsHeap = this.tables.getHeap(WorkTables.TABLE_ROBOTS_NAME);
this.robots = new RobotsTxt(robotsHeap);
} catch (final IOException e) {
this.tables.clear(WorkTables.TABLE_ROBOTS_NAME);
final BEncodedHeap robotsHeap = this.tables.getHeap(WorkTables.TABLE_ROBOTS_NAME);
this.robots = new RobotsTxt(robotsHeap);
}
this.log.logConfig("Loaded robots.txt DB: " + this.robots.size() + " entries");
// start a cache manager

Loading…
Cancel
Save