Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 12 years ago
commit 58fe986cca

@ -25,7 +25,7 @@
<key>Java</key> <key>Java</key>
<dict> <dict>
<key>VMOptions</key> <key>VMOptions</key>
<string>-Xmx600m -Xms180m -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Dfile.encoding=UTF-8</string> <string>-Xmx600m -Xms180m -Xss256k -XX:MaxPermSize=256m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Dfile.encoding=UTF-8</string>
<key>WorkingDirectory</key> <key>WorkingDirectory</key>
<string>$APP_PACKAGE/Contents/Resources/Java</string> <string>$APP_PACKAGE/Contents/Resources/Java</string>
<key>MainClass</key> <key>MainClass</key>

@ -57,7 +57,7 @@ SHUTDOWN_TIMEOUT=50
# Default niceness if not set in config file # Default niceness if not set in config file
NICE_VAL=0 NICE_VAL=0
JAVA_ARGS="-server -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8" JAVA_ARGS="-server -XX:MaxPermSize=256m -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8"
#check if system supports large memory pages and enable it if possible #check if system supports large memory pages and enable it if possible
HUGEPAGESTOTAL="$(cat /proc/meminfo | grep HugePages_Total | sed s/[^0-9]//g)" HUGEPAGESTOTAL="$(cat /proc/meminfo | grep HugePages_Total | sed s/[^0-9]//g)"
if [ -n "$HUGEPAGESTOTAL" ] && [ $HUGEPAGESTOTAL -ne 0 ] if [ -n "$HUGEPAGESTOTAL" ] && [ $HUGEPAGESTOTAL -ne 0 ]

@ -3,7 +3,7 @@ javacSource=1.6
javacTarget=1.6 javacTarget=1.6
# Release Configuration # Release Configuration
releaseVersion=1.52 releaseVersion=1.6
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
sourceReleaseFile=yacy_src_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz sourceReleaseFile=yacy_src_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy releaseFileParentDir=yacy

@ -837,7 +837,7 @@ indexControl.gzipBody = true
indexControl.timeout = 60000 indexControl.timeout = 60000
indexDistribution.timeout = 60000 indexDistribution.timeout = 60000
indexTransfer.timeout = 120000 indexTransfer.timeout = 120000
indexTransfer.maxload = 2.5 indexTransfer.maxload = 2.2
# defining max. allowed amount of open files during index- transfer/distribution # defining max. allowed amount of open files during index- transfer/distribution
indexDistribution.maxOpenFiles = 800 indexDistribution.maxOpenFiles = 800

@ -99,4 +99,4 @@ network.unit.access.blacklist =
# greedy learning: fast information acquisition heuristic for new peers # greedy learning: fast information acquisition heuristic for new peers
greedylearning.enabled = true greedylearning.enabled = true
greedylearning.limit.doccount = 15000 greedylearning.limit.doccount = 1000

@ -35,4 +35,4 @@ network.unit.access.blacklist =
# greedy learning: fast information acquisition heuristic for new peers # greedy learning: fast information acquisition heuristic for new peers
greedylearning.enabled = false greedylearning.enabled = false
greedylearning.limit.doccount = 15000 greedylearning.limit.doccount = 1000

@ -94,4 +94,4 @@ network.unit.access.blacklist =
# greedy learning: fast information acquisition heuristic for new peers # greedy learning: fast information acquisition heuristic for new peers
greedylearning.enabled = false greedylearning.enabled = false
greedylearning.limit.doccount = 15000 greedylearning.limit.doccount = 1000

@ -32,4 +32,4 @@ network.unit.access.blacklist =
# greedy learning: fast information acquisition heuristic for new peers # greedy learning: fast information acquisition heuristic for new peers
greedylearning.enabled = false greedylearning.enabled = false
greedylearning.limit.doccount = 15000 greedylearning.limit.doccount = 1000

@ -30,9 +30,6 @@
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.File; import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -63,18 +60,13 @@ public class BlacklistCleaner_p {
private static final String BLACKLISTS = "blacklists_"; private static final String BLACKLISTS = "blacklists_";
private static final String ENTRIES = "entries_"; private static final String ENTRIES = "entries_";
private final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static final Class<?>[] supportedBLEngines = { public static final Class<?>[] supportedBLEngines = {
Blacklist.class Blacklist.class
}; };
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(env.getDataPath(), env.getConfig("listManager.listsPath", "DATA/LISTS"));
String blacklistToUse = null; String blacklistToUse = null;
prop.put(DISABLED+"checked", "1"); prop.put(DISABLED+"checked", "1");
@ -92,7 +84,7 @@ public class BlacklistCleaner_p {
} }
} }
putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, BLACKLIST_FILENAME_FILTER), blacklistToUse); putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER), blacklistToUse);
if (blacklistToUse != null) { if (blacklistToUse != null) {
prop.put("results", "1"); prop.put("results", "1");
@ -125,7 +117,7 @@ public class BlacklistCleaner_p {
} }
} else { } else {
prop.put("results", "0"); prop.put("results", "0");
putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, BLACKLIST_FILENAME_FILTER), blacklistToUse); putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER), blacklistToUse);
} }
return prop; return prop;
@ -273,26 +265,13 @@ public class BlacklistCleaner_p {
* @return Length of the list of entries to be removed. * @return Length of the list of entries to be removed.
*/ */
private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) { private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) {
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
boolean listChanged = false;
// delete the old entry from file
for (final String entry : entries) { for (final String entry : entries) {
String s = entry; String s = entry;
if (list != null){ // get rid of escape characters which make it impossible to
// properly use contains()
// get rid of escape characters which make it impossible to if (s.contains("\\\\")) {
// properly use contains() s = s.replaceAll(Pattern.quote("\\\\"), Matcher.quoteReplacement("\\"));
if (s.contains("\\\\")) {
s = s.replaceAll(Pattern.quote("\\\\"), Matcher.quoteReplacement("\\"));
}
if (list.contains(s)) {
listChanged = list.remove(s);
}
} }
// remove the entry from the running blacklist engine // remove the entry from the running blacklist engine
@ -301,7 +280,7 @@ public class BlacklistCleaner_p {
final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0)); final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0));
final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1); final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1);
try { try {
Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path); Switchboard.urlBlacklist.remove(supportedBlacklistType, blacklistToUse, host, path);
} catch (final RuntimeException e) { } catch (final RuntimeException e) {
ConcurrentLog.severe("BLACKLIST-CLEANER", e.getMessage() + ": " + host + "/" + path); ConcurrentLog.severe("BLACKLIST-CLEANER", e.getMessage() + ": " + host + "/" + path);
} }
@ -309,9 +288,6 @@ public class BlacklistCleaner_p {
} }
SearchEventCache.cleanupEvents(true); SearchEventCache.cleanupEvents(true);
} }
if (listChanged){
FileUtils.writeList(new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()]));
}
return entries.length; return entries.length;
} }
@ -329,34 +305,23 @@ public class BlacklistCleaner_p {
final String[] oldEntry, final String[] oldEntry,
final String[] newEntry) { final String[] newEntry) {
removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry); removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry);
PrintWriter pw = null; String host, path;
try { for (final String n : newEntry) {
pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklistToUse), true)); final int pos = n.indexOf('/',0);
String host, path; if (pos < 0) {
for (final String n : newEntry) { host = n;
final int pos = n.indexOf('/',0); path = ".*";
if (pos < 0) { } else {
host = n; host = n.substring(0, pos);
path = ".*"; path = n.substring(pos + 1);
} else { }
host = n.substring(0, pos); for (final BlacklistType s : supportedBlacklistTypes) {
path = n.substring(pos + 1); if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) {
} Switchboard.urlBlacklist.add(s, blacklistToUse, host, path);
pw.println(host + "/" + path); }
for (final BlacklistType s : supportedBlacklistTypes) { }
if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) { SearchEventCache.cleanupEvents(true);
Switchboard.urlBlacklist.add( }
s,
host,
path);
}
}
SearchEventCache.cleanupEvents(true);
}
pw.close();
} catch (final IOException e) {
ConcurrentLog.severe("BLACKLIST-CLEANER", "error on writing altered entries to blacklist", e);
}
return newEntry.length; return newEntry.length;
} }
} }

@ -29,12 +29,12 @@
// javac -classpath .:../classes Blacklist_p.java // javac -classpath .:../classes Blacklist_p.java
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.File;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.TreeMap; import java.util.TreeMap;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.ListManager; import net.yacy.data.ListManager;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
@ -49,12 +49,8 @@ public class BlacklistImpExp_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, @SuppressWarnings("unused") final serverObjects post, final serverSwitch env) { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, @SuppressWarnings("unused") final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// loading all blacklist files located in the directory // loading all blacklist files located in the directory
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath); final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
String blacklistToUse = null; String blacklistToUse = null;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
@ -84,7 +80,10 @@ public class BlacklistImpExp_p {
hostList.remove(peername); hostList.remove(peername);
peerCount++; peerCount++;
} }
} catch (final Exception e) {/* */} } catch (final Exception e) {
// Log exception for debug purposes ("catch-all catch")
ConcurrentLog.logException(e);
}
prop.put(DISABLED + "otherHosts", peerCount); prop.put(DISABLED + "otherHosts", peerCount);
} }
@ -93,10 +92,8 @@ public class BlacklistImpExp_p {
int count = 0; int count = 0;
for (String element : dirlist) { for (String element : dirlist) {
if (element.endsWith(".black")) { prop.putHTML("blackListNames_" + count + "_blackListName", element);
prop.putHTML("blackListNames_" + count + "_blackListName", element); count++;
count++;
}
} }
prop.put("blackListNames", count); prop.put("blackListNames", count);

@ -29,11 +29,9 @@
// javac -classpath .:../classes Blacklist_p.java // javac -classpath .:../classes Blacklist_p.java
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.File;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.ListManager;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
@ -43,11 +41,7 @@ import net.yacy.server.serverSwitch;
public class BlacklistTest_p { public class BlacklistTest_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
prop.putHTML("blacklistEngine", Blacklist.getEngineInfo()); prop.putHTML("blacklistEngine", Blacklist.getEngineInfo());

@ -57,11 +57,7 @@ public class Blacklist_p {
private final static String BLACKLIST_SHARED = "BlackLists.Shared"; private final static String BLACKLIST_SHARED = "BlackLists.Shared";
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// load all blacklist files located in the directory // load all blacklist files located in the directory
List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER); List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
@ -549,14 +545,8 @@ public class Blacklist_p {
} }
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (Switchboard.urlBlacklist.getFileName(supportedBlacklistType) != blacklistToUse) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path); Switchboard.urlBlacklist.remove(supportedBlacklistType, blacklistToUse, host, path);
}
else {
Blacklist bl = new Blacklist(ListManager.listsPath);
bl.loadList(supportedBlacklistType, blacklistToUse, "/");
bl.remove(host, path);
} }
} }
@ -618,14 +608,8 @@ public class Blacklist_p {
String path = newEntry.substring(pos + 1); String path = newEntry.substring(pos + 1);
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (Switchboard.urlBlacklist.getFileName(supportedBlacklistType) == blacklistToUse) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.add(supportedBlacklistType, host, path); Switchboard.urlBlacklist.add(supportedBlacklistType, blacklistToUse, host, path);
}
else {
Blacklist bl = new Blacklist(ListManager.listsPath);
bl.loadList(supportedBlacklistType, blacklistToUse, "/");
bl.add(supportedBlacklistType, host, path);
} }
} }

@ -44,6 +44,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
@ -57,7 +58,7 @@ public class ConfigAppearance_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
final String skinPath = new File(env.getDataPath(), env.getConfig("skinPath", "DATA/SKINS")).toString(); final String skinPath = new File(env.getDataPath(), env.getConfig("skinPath", SwitchboardConstants.SKINS_PATH_DEFAULT)).toString();
// Fallback // Fallback
prop.put("currentskin", ""); prop.put("currentskin", "");

@ -182,7 +182,12 @@ public class CrawlResults {
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
try { try {
urle = sb.index.fulltext().getMetadata(UTF8.getBytes(entry.getKey())); byte[] urlhash = UTF8.getBytes(entry.getKey());
urle = sb.index.fulltext().getMetadata(urlhash);
if (urle == null) {
sb.index.fulltext().commit(true);
urle = sb.index.fulltext().getMetadata(urlhash);
}
if (urle == null) { if (urle == null) {
ConcurrentLog.warn("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey()); ConcurrentLog.warn("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey());
urlstr = null; urlstr = null;

@ -25,10 +25,7 @@
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.File;
import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.PrintWriter;
import java.util.Date; import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -367,72 +364,58 @@ public class IndexControlRWIs_p {
URIMetadataRow.rowdef.objectOrder, URIMetadataRow.rowdef.objectOrder,
urlb.size()); urlb.size());
if ( post.containsKey("blacklisturls") ) { if ( post.containsKey("blacklisturls") ) {
PrintWriter pw; final String[] supportedBlacklistTypes =
try { env.getConfig("BlackLists.types", "").split(",");
final String[] supportedBlacklistTypes = DigestURI url;
env.getConfig("BlackLists.types", "").split(","); for ( final byte[] b : urlb ) {
pw = try {
new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true)); urlHashes.put(b);
DigestURI url; } catch (final SpaceExceededException e ) {
for ( final byte[] b : urlb ) { ConcurrentLog.logException(e);
try { }
urlHashes.put(b); url = segment.fulltext().getURL(b);
} catch (final SpaceExceededException e ) { segment.fulltext().remove(b);
ConcurrentLog.logException(e); if ( url != null ) {
} for ( final String supportedBlacklistType : supportedBlacklistTypes ) {
url = segment.fulltext().getURL(b); if ( ListManager.listSetContains(
segment.fulltext().remove(b); supportedBlacklistType + ".BlackLists",
if ( url != null ) { blacklist) ) {
pw.println(url.getHost() + "/" + url.getFile()); Switchboard.urlBlacklist.add(
for ( final String supportedBlacklistType : supportedBlacklistTypes ) { BlacklistType.valueOf(supportedBlacklistType),
if ( ListManager.listSetContains( blacklist,
supportedBlacklistType + ".BlackLists", url.getHost(),
blacklist) ) { url.getFile());
Switchboard.urlBlacklist.add( }
BlacklistType.valueOf(supportedBlacklistType), }
url.getHost(), SearchEventCache.cleanupEvents(true);
url.getFile()); }
} }
}
SearchEventCache.cleanupEvents(true);
}
}
pw.close();
} catch (final IOException e ) {
}
} }
if ( post.containsKey("blacklistdomains") ) { if ( post.containsKey("blacklistdomains") ) {
PrintWriter pw; DigestURI url;
try { for ( final byte[] b : urlb ) {
pw = try {
new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true)); urlHashes.put(b);
DigestURI url; } catch (final SpaceExceededException e ) {
for ( final byte[] b : urlb ) { ConcurrentLog.logException(e);
try { }
urlHashes.put(b); url = segment.fulltext().getURL(b);
} catch (final SpaceExceededException e ) { segment.fulltext().remove(b);
ConcurrentLog.logException(e); if ( url != null ) {
} for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) {
url = segment.fulltext().getURL(b); if ( ListManager.listSetContains(
segment.fulltext().remove(b); supportedBlacklistType + ".BlackLists",
if ( url != null ) { blacklist) ) {
pw.println(url.getHost() + "/.*"); Switchboard.urlBlacklist.add(
for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) { supportedBlacklistType,
if ( ListManager.listSetContains( blacklist,
supportedBlacklistType + ".BlackLists", url.getHost(),
blacklist) ) { ".*");
Switchboard.urlBlacklist.add( }
supportedBlacklistType, }
url.getHost(), }
".*"); }
}
}
}
}
pw.close();
} catch (final IOException e ) {
}
} }
try { try {
segment.termIndex().remove(keyhash, urlHashes); segment.termIndex().remove(keyhash, urlHashes);

@ -13,7 +13,6 @@ public class blacklists {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath); final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath);
int blacklistCount = 0; int blacklistCount = 0;

@ -5,6 +5,7 @@ import java.util.List;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.ListManager; import net.yacy.data.ListManager;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
@ -15,8 +16,7 @@ public class blacklists_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath);
int blacklistCount=0; int blacklistCount=0;
final String blackListName = (post == null) ? "" : post.get("listname", ""); final String blackListName = (post == null) ? "" : post.get("listname", "");

@ -30,9 +30,7 @@
//if the shell's current path is HTROOT //if the shell's current path is HTROOT
import java.io.File; import java.io.File;
import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader; import java.io.StringReader;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashSet; import java.util.HashSet;
@ -50,6 +48,7 @@ import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
@ -69,8 +68,6 @@ public class sharedBlacklist_p {
public static final int STATUS_WRONG_INVOCATION = 5; public static final int STATUS_WRONG_INVOCATION = 5;
public static final int STATUS_PARSE_ERROR = 6; public static final int STATUS_PARSE_ERROR = 6;
private final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
// return variable that accumulates replacements // return variable that accumulates replacements
@ -89,13 +86,8 @@ public class sharedBlacklist_p {
if (post != null) { if (post != null) {
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// loading all blacklist files located in the directory // loading all blacklist files located in the directory
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, BLACKLIST_FILENAME_FILTER); final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
// List BlackLists // List BlackLists
int blacklistCount = 0; int blacklistCount = 0;
@ -210,11 +202,7 @@ public class sharedBlacklist_p {
prop.put("page", "1"); //result page prop.put("page", "1"); //result page
prop.put("status", STATUS_ENTRIES_ADDED); //list of added Entries prop.put("status", STATUS_ENTRIES_ADDED); //list of added Entries
PrintWriter pw = null;
try { try {
// open the blacklist file
pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, selectedBlacklistName), true));
// loop through the received entry list // loop through the received entry list
final int num = post.getInt("num", 0); final int num = post.getInt("num", 0);
for(int i = 0; i < num; i++){ for(int i = 0; i < num; i++){
@ -234,13 +222,10 @@ public class sharedBlacklist_p {
newItem = newItem + "/.*"; newItem = newItem + "/.*";
} }
// append the item to the file
pw.println(newItem);
if (Switchboard.urlBlacklist != null) { if (Switchboard.urlBlacklist != null) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) {
Switchboard.urlBlacklist.add(supportedBlacklistType,newItem.substring(0, pos), newItem.substring(pos + 1)); Switchboard.urlBlacklist.add(supportedBlacklistType,selectedBlacklistName,newItem.substring(0, pos), newItem.substring(pos + 1));
} }
} }
SearchEventCache.cleanupEvents(true); SearchEventCache.cleanupEvents(true);
@ -250,8 +235,6 @@ public class sharedBlacklist_p {
} catch (final Exception e) { } catch (final Exception e) {
prop.put("status", "1"); prop.put("status", "1");
prop.putHTML("status_error", e.getLocalizedMessage()); prop.putHTML("status_error", e.getLocalizedMessage());
} finally {
if (pw != null) try { pw.close(); } catch (final Exception e){ /* */}
} }
/* unable to use prop.putHTML() or prop.putXML() here because they /* unable to use prop.putHTML() or prop.putXML() here because they

@ -40,6 +40,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.Memory;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceRow;
@ -105,7 +106,14 @@ public final class transferRWI {
String result = "ok"; String result = "ok";
final StringBuilder unknownURLs = new StringBuilder(6000); final StringBuilder unknownURLs = new StringBuilder(6000);
if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) { double load = Memory.load();
float maxload = sb.getConfigFloat(SwitchboardConstants.INDEX_TRANSFER_MAXLOAD, 1.5f);
if (load > maxload) {
// too high local load. this is bad but we must reject this to protect ourself!
sb.getLog().info("Rejecting RWIs from peer " + otherPeerName + ", system has too high load = " + load + ", maxload = " + maxload);
result = "not_granted";
pause = (int) (load * 20000);
} else if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
sb.getLog().info("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash); sb.getLog().info("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash);
result = "wrong_target"; result = "wrong_target";
pause = 0; pause = 0;

@ -578,7 +578,7 @@ public class Balancer {
Map.Entry<String, byte[]> hosthash; Map.Entry<String, byte[]> hosthash;
while (k.hasNext()) { while (k.hasNext()) {
hosthash = k.next(); hosthash = k.next();
if (failoverCandidates.get(hosthash) > 2000) break; // thats too long; we want a second chance for this! //if (failoverCandidates.get(hosthash) > 2000) break; // thats too long; we want a second chance for this!
besthost = hosthash.getKey(); besthost = hosthash.getKey();
besturlhash = hosthash.getValue(); besturlhash = hosthash.getValue();
removeHashFromDomainStacks(besthost, besturlhash); removeHashFromDomainStacks(besthost, besturlhash);

@ -37,11 +37,6 @@ public class AugmentParser extends AbstractParser implements Parser {
public Document[] parse(DigestURI url, String mimeType, String charset, InputStream source) throws Parser.Failure, InterruptedException { public Document[] parse(DigestURI url, String mimeType, String charset, InputStream source) throws Parser.Failure, InterruptedException {
Document[] htmlDocs = this.rdfaParser.parse(url, mimeType, charset, source); Document[] htmlDocs = this.rdfaParser.parse(url, mimeType, charset, source);
try {
source.reset();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
for (final Document doc : htmlDocs) { for (final Document doc : htmlDocs) {
/* analyze(doc, url, mimeType, charset); // enrich document text */ /* analyze(doc, url, mimeType, charset); // enrich document text */

@ -206,7 +206,7 @@ public class htmlParser extends AbstractParser implements Parser {
throw new Parser.Failure("IO error:" + e.getMessage(), location); throw new Parser.Failure("IO error:" + e.getMessage(), location);
} finally { } finally {
writer.flush(); writer.flush();
sourceStream.close(); //sourceStream.close(); keep open for multipe parsing (close done by caller)
writer.close(); writer.close();
} }
//OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false); //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);

@ -38,6 +38,7 @@ public class RDFaTripleImpl{
TransformerException, TransformerConfigurationException { TransformerException, TransformerConfigurationException {
BufferedReader bufReader = new BufferedReader(in); BufferedReader bufReader = new BufferedReader(in);
bufReader.mark(2048); // mark position for following reset
String readLine = bufReader.readLine(); String readLine = bufReader.readLine();
if (!readLine.toLowerCase().contains("<!doctype")){ if (!readLine.toLowerCase().contains("<!doctype")){
bufReader.reset(); bufReader.reset();

@ -251,6 +251,7 @@ public class Tables implements Iterable<String> {
try { try {
getHeap(tablename); getHeap(tablename);
} catch (final IOException e) { } catch (final IOException e) {
ConcurrentLog.logException(e);
} }
} }
} }

@ -99,7 +99,7 @@ public class migration {
* copy skins from the release to DATA/SKINS. * copy skins from the release to DATA/SKINS.
*/ */
public static void installSkins(final Switchboard sb){ public static void installSkins(final Switchboard sb){
final File skinsPath = sb.getDataPath("skinPath", "DATA/SKINS"); final File skinsPath = sb.getDataPath("skinPath", SwitchboardConstants.SKINS_PATH_DEFAULT);
final File defaultSkinsPath = new File(sb.getAppPath(), "skins"); final File defaultSkinsPath = new File(sb.getAppPath(), "skins");
if (defaultSkinsPath.exists()) { if (defaultSkinsPath.exists()) {
final List<String> skinFiles = FileUtils.getDirListing(defaultSkinsPath.getAbsolutePath()); final List<String> skinFiles = FileUtils.getDirListing(defaultSkinsPath.getAbsolutePath());
@ -118,7 +118,7 @@ public class migration {
if(skin.equals("")){ if(skin.equals("")){
skin="default"; skin="default";
} }
final File skinsDir=sb.getDataPath("skinPath", "DATA/SKINS"); final File skinsDir=sb.getDataPath("skinPath", SwitchboardConstants.SKINS_PATH_DEFAULT);
final File skinFile=new File(skinsDir, skin+".css"); final File skinFile=new File(skinsDir, skin+".css");
final File htdocsPath=new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT), "env"); final File htdocsPath=new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT), "env");
final File styleFile=new File(htdocsPath, "style.css"); final File styleFile=new File(htdocsPath, "style.css");
@ -149,7 +149,7 @@ public class migration {
} }
} }
public static void migrateBookmarkTagsDB(final Switchboard sb){ public static void migrateBookmarkTagsDB(final Switchboard sb){
sb.bookmarksDB.close(); if (sb.bookmarksDB != null) sb.bookmarksDB.close();
final File tagsDBFile=new File(sb.workPath, "bookmarkTags.db"); final File tagsDBFile=new File(sb.workPath, "bookmarkTags.db");
if(tagsDBFile.exists()){ if(tagsDBFile.exists()){
delete(tagsDBFile); delete(tagsDBFile);

@ -56,6 +56,7 @@ import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.SetTools; import net.yacy.kelondro.util.SetTools;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
public class Blacklist { public class Blacklist {
@ -131,7 +132,7 @@ public class Blacklist {
ConcurrentLog.fine("Blacklist", "All blacklists has been shutdown."); ConcurrentLog.fine("Blacklist", "All blacklists has been shutdown.");
} }
public final void setRootPath(final File rootPath) { private final void setRootPath(final File rootPath) {
if (rootPath == null) { if (rootPath == null) {
throw new NullPointerException("The blacklist root path must not be null."); throw new NullPointerException("The blacklist root path must not be null.");
} }
@ -153,10 +154,6 @@ public class Blacklist {
return this.cachedUrlHashs.get(blacklistType); return this.cachedUrlHashs.get(blacklistType);
} }
public final String getFileName(BlacklistType type) {
return blacklistFiles.get(type);
}
public final File getRootPath() { public final File getRootPath() {
return blacklistRootPath; return blacklistRootPath;
} }
@ -172,8 +169,6 @@ public class Blacklist {
entry.clear(); entry.clear();
} }
blacklistFiles.clear(); blacklistFiles.clear();
blacklistRootPath = null;
} }
public final int size() { public final int size() {
@ -270,16 +265,7 @@ public class Blacklist {
getBlacklistMap(blacklistType, false).remove(host); getBlacklistMap(blacklistType, false).remove(host);
} }
/** public final void remove(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) {
* Removes entry for all blacklist types.
*/
public final void remove(final String host, final String path) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path);
}
}
public final void remove(final BlacklistType blacklistType, final String host, final String path) {
final Map<String, Set<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true); final Map<String, Set<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
Set<Pattern> hostList = blacklistMap.get(host); Set<Pattern> hostList = blacklistMap.get(host);
@ -300,7 +286,7 @@ public class Blacklist {
} }
// load blacklist data from file // load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, getFileName(blacklistType))); final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
// delete the old entry from file // delete the old entry from file
if (list != null) { if (list != null) {
@ -310,20 +296,11 @@ public class Blacklist {
break; break;
} }
} }
FileUtils.writeList(new File(ListManager.listsPath, getFileName(blacklistType)), list.toArray(new String[list.size()])); FileUtils.writeList(new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()]));
}
}
/**
* Adds a new blacklist entry for all types.
*/
public final void add(final String host, final String path) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
add(supportedBlacklistType, host, path);
} }
} }
public final void add(final BlacklistType blacklistType, final String host, final String path) { public final void add(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) {
if (contains(blacklistType, host, path)) { if (contains(blacklistType, host, path)) {
return; return;
} }
@ -355,11 +332,12 @@ public class Blacklist {
// Append the line to the file. // Append the line to the file.
PrintWriter pw = null; PrintWriter pw = null;
try { try {
final String newEntry = h + "/" + pattern;
if (!blacklistFileContains(blacklistRootPath, if (!blacklistFileContains(blacklistRootPath,
getFileName(blacklistType), pattern.toString())) { blacklistToUse, newEntry)) {
pw = new PrintWriter(new FileWriter(new File(blacklistRootPath, pw = new PrintWriter(new FileWriter(new File(blacklistRootPath,
getFileName(blacklistType)), true)); blacklistToUse), true));
pw.println(pattern); pw.println(newEntry);
pw.close(); pw.close();
} }
} catch (final IOException e) { } catch (final IOException e) {
@ -370,7 +348,7 @@ public class Blacklist {
pw.close(); pw.close();
} catch (final Exception e) { } catch (final Exception e) {
ConcurrentLog.warn("Blacklist", "could not close stream to " + ConcurrentLog.warn("Blacklist", "could not close stream to " +
getFileName(blacklistType) + "! " + e.getMessage()); blacklistToUse + "! " + e.getMessage());
} }
} }
} }
@ -630,7 +608,7 @@ public class Blacklist {
} }
private static File DHTCacheFile(final BlacklistType type) { private static File DHTCacheFile(final BlacklistType type) {
String BLACKLIST_DHT_CACHEFILE_NAME = "DATA/LISTS/blacklist_" + type.name() + "_Cache.ser"; final String BLACKLIST_DHT_CACHEFILE_NAME = SwitchboardConstants.LISTS_PATH_DEFAULT + "/blacklist_" + type.name() + "_Cache.ser";
return new File(Switchboard.getSwitchboard().dataPath, BLACKLIST_DHT_CACHEFILE_NAME); return new File(Switchboard.getSwitchboard().dataPath, BLACKLIST_DHT_CACHEFILE_NAME);
} }

@ -1045,7 +1045,7 @@ public final class Switchboard extends serverSwitch {
Long.MAX_VALUE, Long.MAX_VALUE,
30000, 30000,
Long.MAX_VALUE), Long.MAX_VALUE),
8000); 10000);
deployThread( deployThread(
SwitchboardConstants.INDEX_DIST, SwitchboardConstants.INDEX_DIST,
"DHT Distribution", "DHT Distribution",
@ -2720,7 +2720,7 @@ public final class Switchboard extends serverSwitch {
return; return;
} }
if ( !profile.indexText() && !profile.indexMedia() ) { if ( profile != null && !profile.indexText() && !profile.indexMedia() ) {
//if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name()); //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name());
addURLtoErrorDB( addURLtoErrorDB(
url, url,
@ -3475,7 +3475,6 @@ public final class Switchboard extends serverSwitch {
} }
} }
} catch (final Throwable e) { } catch (final Throwable e) {
ConcurrentLog.logException(e);
} }
} }
}.start(); }.start();

@ -521,4 +521,9 @@ public final class SwitchboardConstants {
public static final String GREEDYLEARNING_ENABLED = "greedylearning.enabled"; public static final String GREEDYLEARNING_ENABLED = "greedylearning.enabled";
public static final String GREEDYLEARNING_LIMIT_DOCCOUNT = "greedylearning.limit.doccount"; public static final String GREEDYLEARNING_LIMIT_DOCCOUNT = "greedylearning.limit.doccount";
public static final String GREEDYLEARNING_ACTIVE = "greedylearning.active"; public static final String GREEDYLEARNING_ACTIVE = "greedylearning.active";
/*
* Skins
*/
public static final String SKINS_PATH_DEFAULT = "DATA/SKINS";
} }

@ -133,6 +133,7 @@ import org.apache.solr.common.SolrInputDocument;
} }
} }
} catch (final IOException ex) { } catch (final IOException ex) {
ConcurrentLog.warn("MIGRATION-REINDEX", "remove following query from list due to error, q=" + querylist.remove(0));
ConcurrentLog.logException(ex); ConcurrentLog.logException(ex);
} finally { } finally {
sem.release(); sem.release();

@ -201,7 +201,6 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
final DigestURI digestURI = md.url(); final DigestURI digestURI = md.url();
boolean allAttr = this.isEmpty(); boolean allAttr = this.isEmpty();
if (allAttr || contains(CollectionSchema.failreason_s)) add(doc, CollectionSchema.failreason_s, "");
add(doc, CollectionSchema.id, ASCII.String(md.hash())); add(doc, CollectionSchema.id, ASCII.String(md.hash()));
String us = digestURI.toNormalform(true); String us = digestURI.toNormalform(true);
add(doc, CollectionSchema.sku, us); add(doc, CollectionSchema.sku, us);
@ -354,7 +353,6 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
Set<ProcessType> processTypes = new LinkedHashSet<ProcessType>(); Set<ProcessType> processTypes = new LinkedHashSet<ProcessType>();
add(doc, CollectionSchema.id, id); add(doc, CollectionSchema.id, id);
if (allAttr || contains(CollectionSchema.failreason_s)) add(doc, CollectionSchema.failreason_s, ""); // overwrite a possible fail reason (in case that there was a fail reason before)
String docurl = digestURI.toNormalform(true); String docurl = digestURI.toNormalform(true);
add(doc, CollectionSchema.sku, docurl); add(doc, CollectionSchema.sku, docurl);
@ -1005,11 +1003,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
BlockingQueue<String> ids = connector.concurrentIDsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 0, 1000000, 600000); BlockingQueue<String> ids = connector.concurrentIDsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 0, 1000000, 600000);
String id; String id;
while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) { while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) {
crt.put(ASCII.getBytes(id), new double[]{0.0d,0.0d}); //{old value, new value} this.crt.put(ASCII.getBytes(id), new double[]{0.0d,0.0d}); //{old value, new value}
} }
} catch (final InterruptedException e2) { } catch (final InterruptedException e2) {
} }
this.cr_host_count = crt.size(); this.cr_host_count = this.crt.size();
double initval = 1.0d / cr_host_count; double initval = 1.0d / cr_host_count;
for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) entry.getValue()[0] = initval; for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) entry.getValue()[0] = initval;
this.internal_links_counter = new RowHandleMap(12, Base64Order.enhancedCoder, 8, 100, "internal_links_counter"); this.internal_links_counter = new RowHandleMap(12, Base64Order.enhancedCoder, 8, 100, "internal_links_counter");
@ -1019,8 +1017,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
* @return * @return
*/ */
public Map<byte[], CRV> normalize() { public Map<byte[], CRV> normalize() {
TreeMap<Double, List<byte[]>> reorder = new TreeMap<Double, List<byte[]>>(); final TreeMap<Double, List<byte[]>> reorder = new TreeMap<Double, List<byte[]>>();
for (Map.Entry<byte[], double[]> entry: crt.entrySet()) { for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) {
Double d = entry.getValue()[0]; Double d = entry.getValue()[0];
List<byte[]> ds = reorder.get(d); List<byte[]> ds = reorder.get(d);
if (ds == null) {ds = new ArrayList<byte[]>(); reorder.put(d, ds);} if (ds == null) {ds = new ArrayList<byte[]>(); reorder.put(d, ds);}
@ -1103,7 +1101,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
boolean convergence = true; boolean convergence = true;
double df = (1.0d - damping) / this.cr_host_count; double df = (1.0d - damping) / this.cr_host_count;
try { try {
for (Map.Entry<byte[], double[]> entry: crt.entrySet()) { for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) {
byte[] id = entry.getKey(); byte[] id = entry.getKey();
ReferenceReport rr = this.rrCache.getReferenceReport(id, false); ReferenceReport rr = this.rrCache.getReferenceReport(id, false);
// sum up the cr of the internal links // sum up the cr of the internal links
@ -1112,7 +1110,14 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
for (byte[] iid: iids) { for (byte[] iid: iids) {
int ilc = getInternalLinks(iid); int ilc = getInternalLinks(iid);
if (ilc > 0) { // if (ilc == 0) then the reference report is wrong! if (ilc > 0) { // if (ilc == 0) then the reference report is wrong!
ncr += this.crt.get(iid)[0] / ilc; double[] d = this.crt.get(iid);
// d[] could be empty at some situations
if (d.length > 0) {
ncr += d[0] / ilc;
} else {
// Output a warning that d[] is empty
ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + iid);
}
} }
} }
ncr = df + damping * ncr; ncr = df + damping * ncr;
@ -1120,7 +1125,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
entry.getValue()[1] = ncr; entry.getValue()[1] = ncr;
} }
// after the loop, replace the old value with the new value in crt // after the loop, replace the old value with the new value in crt
for (Map.Entry<byte[], double[]> entry: crt.entrySet()) { for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) {
entry.getValue()[0] = entry.getValue()[1]; entry.getValue()[0] = entry.getValue()[1];
} }
} catch (final IOException e) { } catch (final IOException e) {
@ -1189,7 +1194,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
* @param httpstatus * @param httpstatus
* @throws IOException * @throws IOException
*/ */
public SolrInputDocument err(final DigestURI digestURI, String[] collections, final String failReason, final FailType failType, final int httpstatus) throws IOException { public SolrInputDocument err(final DigestURI digestURI, final String[] collections, final String failReason, final FailType failType, final int httpstatus) throws IOException {
final SolrInputDocument solrdoc = new SolrInputDocument(); final SolrInputDocument solrdoc = new SolrInputDocument();
add(solrdoc, CollectionSchema.id, ASCII.String(digestURI.hash())); add(solrdoc, CollectionSchema.id, ASCII.String(digestURI.hash()));
add(solrdoc, CollectionSchema.sku, digestURI.toNormalform(true)); add(solrdoc, CollectionSchema.sku, digestURI.toNormalform(true));

@ -18,7 +18,7 @@ if exist DATA\SETTINGS\httpProxy.conf GoTo :RENAMEINDEX
if exist DATA\SETTINGS\yacy.conf GoTo :GETSTARTOPTS if exist DATA\SETTINGS\yacy.conf GoTo :GETSTARTOPTS
:STARTJAVA :STARTJAVA
set javacmd=%javacmd% -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8 set javacmd=%javacmd% -XX:-UseGCOverheadLimit -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8
Rem Starting YaCy Rem Starting YaCy
Echo Generated classpath:%CLASSPATH% Echo Generated classpath:%CLASSPATH%
Echo JRE Parameters:%javacmd% Echo JRE Parameters:%javacmd%

@ -6,7 +6,7 @@ PIDFILE="yacy.pid"
OS="`uname`" OS="`uname`"
#get javastart args #get javastart args
JAVA_ARGS="-server -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8"; JAVA_ARGS="-server -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8";
#JAVA_ARGS="-verbose:gc -XX:+PrintGCTimeStamps -XX:+PrintGCDetails $JAVA_ARGS"; #JAVA_ARGS="-verbose:gc -XX:+PrintGCTimeStamps -XX:+PrintGCDetails $JAVA_ARGS";
#check if OS is Sun Solaris or one of the OpenSolaris distributions and use different version of id if necessary #check if OS is Sun Solaris or one of the OpenSolaris distributions and use different version of id if necessary

@ -20,7 +20,7 @@ if exist DATA\SETTINGS\httpProxy.conf GoTo :RENAMEINDEX
if exist DATA\SETTINGS\yacy.conf GoTo :GETSTARTOPTS if exist DATA\SETTINGS\yacy.conf GoTo :GETSTARTOPTS
:STARTJAVA :STARTJAVA
set javacmd=%javacmd% -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8 set javacmd=%javacmd% -XX:-UseGCOverheadLimit -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8
Rem Starting YaCy Rem Starting YaCy
Echo Generated classpath:%CLASSPATH% Echo Generated classpath:%CLASSPATH%
Echo JRE Parameters:%javacmd% Echo JRE Parameters:%javacmd%

Loading…
Cancel
Save