Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 13 years ago
commit 2fe207f813

@ -163,8 +163,8 @@ public class AccessTracker_p {
prop.put("page_list_" + m + "_dark", ((dark) ? 1 : 0) ); prop.put("page_list_" + m + "_dark", ((dark) ? 1 : 0) );
dark =! dark; dark =! dark;
prop.putHTML("page_list_" + m + "_host", query.host); prop.putHTML("page_list_" + m + "_host", query.host);
prop.put("page_list_" + m + "_date", SimpleFormatter.format(new Date(query.time.longValue()))); prop.put("page_list_" + m + "_date", SimpleFormatter.format(new Date(query.starttime)));
prop.put("page_list_" + m + "_timestamp", query.time.longValue()); prop.put("page_list_" + m + "_timestamp", query.starttime);
if (page == 2) { if (page == 2) {
// local search // local search
prop.putNum("page_list_" + m + "_offset", query.offset); prop.putNum("page_list_" + m + "_offset", query.offset);

@ -430,7 +430,7 @@ public final class search {
// update the search tracker // update the search tracker
synchronized (trackerHandles) { synchronized (trackerHandles) {
trackerHandles.add(theQuery.time); // thats the time when the handle was created trackerHandles.add(theQuery.starttime); // thats the time when the handle was created
// we don't need too much entries in the list; remove superfluous // we don't need too much entries in the list; remove superfluous
while (trackerHandles.size() > 36) if (!trackerHandles.remove(trackerHandles.first())) break; while (trackerHandles.size() > 36) if (!trackerHandles.remove(trackerHandles.first())) break;
} }

@ -930,7 +930,7 @@ public class yacysearch {
// update the search tracker // update the search tracker
try { try {
synchronized ( trackerHandles ) { synchronized ( trackerHandles ) {
trackerHandles.add(theQuery.time); trackerHandles.add(theQuery.starttime);
while ( trackerHandles.size() > 600 ) { while ( trackerHandles.size() > 600 ) {
if ( !trackerHandles.remove(trackerHandles.first()) ) { if ( !trackerHandles.remove(trackerHandles.first()) ) {
break; break;

@ -91,7 +91,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
} }
//get words from document //get words from document
final Map<String, Word> words = new Condenser(document, true, true, LibraryProvider.dymLib).words(); final Map<String, Word> words = new Condenser(document, true, true, LibraryProvider.dymLib, false).words();
// generate potential tags from document title, description and subject // generate potential tags from document title, description and subject
final int bufferSize = document.dc_title().length() + document.dc_description().length() + document.dc_subject(' ').length() + 32; final int bufferSize = document.dc_title().length() + document.dc_description().length() + document.dc_subject(' ').length() + 32;

@ -26,7 +26,6 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.text.NumberFormat;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
@ -83,7 +82,6 @@ public final class Condenser {
public static final int flag_cat_hasvideo = 22; // the page refers to (at least one) videos public static final int flag_cat_hasvideo = 22; // the page refers to (at least one) videos
public static final int flag_cat_hasapp = 23; // the page refers to (at least one) application file public static final int flag_cat_hasapp = 23; // the page refers to (at least one) application file
private final static int numlength = 5;
//private Properties analysis; //private Properties analysis;
private final Map<String, Word> words; // a string (the words) to (indexWord) - relation private final Map<String, Word> words; // a string (the words) to (indexWord) - relation
@ -96,19 +94,29 @@ public final class Condenser {
public int RESULT_DIFF_SENTENCES = -1; public int RESULT_DIFF_SENTENCES = -1;
public Bitfield RESULT_FLAGS = new Bitfield(4); public Bitfield RESULT_FLAGS = new Bitfield(4);
private final Identificator languageIdentificator; private final Identificator languageIdentificator;
private final NumberFormat intStringFormatter = NumberFormat.getIntegerInstance(); // use a new instance for each object for a better concurrency /*
private final static int numlength = 5;
private static final ThreadLocal <NumberFormat> intStringFormatter =
new ThreadLocal <NumberFormat>() {
@Override protected NumberFormat initialValue() {
NumberFormat n = NumberFormat.getIntegerInstance();
n.setMinimumIntegerDigits(numlength);
n.setMaximumIntegerDigits(numlength);
return n;
}
};
*/
public Condenser( public Condenser(
final Document document, final Document document,
final boolean indexText, final boolean indexText,
final boolean indexMedia, final boolean indexMedia,
final WordCache meaningLib final WordCache meaningLib,
final boolean doAutotagging
) { ) {
Thread.currentThread().setName("condenser-" + document.dc_identifier()); // for debugging Thread.currentThread().setName("condenser-" + document.dc_identifier()); // for debugging
// if addMedia == true, then all the media links are also parsed and added to the words // if addMedia == true, then all the media links are also parsed and added to the words
// added media words are flagged with the appropriate media flag // added media words are flagged with the appropriate media flag
this.intStringFormatter.setMinimumIntegerDigits(numlength);
this.intStringFormatter.setMaximumIntegerDigits(numlength);
this.words = new HashMap<String, Word>(); this.words = new HashMap<String, Word>();
this.RESULT_FLAGS = new Bitfield(4); this.RESULT_FLAGS = new Bitfield(4);
@ -124,7 +132,7 @@ public final class Condenser {
Map.Entry<MultiProtocolURI, String> entry; Map.Entry<MultiProtocolURI, String> entry;
if (indexText) { if (indexText) {
createCondensement(document.getText(), meaningLib); createCondensement(document.getText(), meaningLib, doAutotagging);
// the phrase counter: // the phrase counter:
// phrase 0 are words taken from the URL // phrase 0 are words taken from the URL
// phrase 1 is the MainTitle // phrase 1 is the MainTitle
@ -262,11 +270,11 @@ public final class Condenser {
} }
} }
public Condenser(final InputStream text, final WordCache meaningLib) { public Condenser(final InputStream text, final WordCache meaningLib, boolean doAutotagging) {
this.languageIdentificator = null; // we don't need that here this.languageIdentificator = null; // we don't need that here
// analysis = new Properties(); // analysis = new Properties();
this.words = new TreeMap<String, Word>(); this.words = new TreeMap<String, Word>();
createCondensement(text, meaningLib); createCondensement(text, meaningLib, doAutotagging);
} }
public int excludeWords(final SortedSet<String> stopwords) { public int excludeWords(final SortedSet<String> stopwords) {
@ -286,7 +294,7 @@ public final class Condenser {
return this.languageIdentificator.getLanguage(); return this.languageIdentificator.getLanguage();
} }
private void createCondensement(final InputStream is, final WordCache meaningLib) { private void createCondensement(final InputStream is, final WordCache meaningLib, boolean doAutotagging) {
assert is != null; assert is != null;
final Set<String> currsentwords = new HashSet<String>(); final Set<String> currsentwords = new HashSet<String>();
String word = ""; String word = "";
@ -312,8 +320,10 @@ public final class Condenser {
if (word.length() < wordminsize) continue; if (word.length() < wordminsize) continue;
// get tags from autotagging // get tags from autotagging
if (doAutotagging) {
tag = LibraryProvider.autotagging.getPrintTagFromWord(word); tag = LibraryProvider.autotagging.getPrintTagFromWord(word);
if (tag != null) this.tags.add(tag); if (tag != null) this.tags.add(tag);
}
// distinguish punctuation and words // distinguish punctuation and words
wordlen = word.length(); wordlen = word.length();
@ -393,7 +403,7 @@ public final class Condenser {
if (text == null) return null; if (text == null) return null;
ByteArrayInputStream buffer; ByteArrayInputStream buffer;
buffer = new ByteArrayInputStream(UTF8.getBytes(text)); buffer = new ByteArrayInputStream(UTF8.getBytes(text));
return new Condenser(buffer, meaningLib).words(); return new Condenser(buffer, meaningLib, false).words();
} }
public static void main(final String[] args) { public static void main(final String[] args) {

@ -32,7 +32,6 @@ import java.io.UnsupportedEncodingException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.document.AbstractParser; import net.yacy.document.AbstractParser;
@ -42,19 +41,20 @@ import net.yacy.document.LibraryProvider;
import net.yacy.document.Parser; import net.yacy.document.Parser;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.util.BDecoder; import net.yacy.kelondro.util.BDecoder;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.BDecoder.BObject; import net.yacy.kelondro.util.BDecoder.BObject;
import net.yacy.kelondro.util.BDecoder.BType; import net.yacy.kelondro.util.BDecoder.BType;
import net.yacy.kelondro.util.FileUtils;
// a BT parser according to http://wiki.theory.org/BitTorrentSpecification // a BT parser according to http://wiki.theory.org/BitTorrentSpecification
public class torrentParser extends AbstractParser implements Parser { public class torrentParser extends AbstractParser implements Parser {
public torrentParser() { public torrentParser() {
super("Torrent Metadata Parser"); super("Torrent Metadata Parser");
SUPPORTED_EXTENSIONS.add("torrent"); this.SUPPORTED_EXTENSIONS.add("torrent");
SUPPORTED_MIME_TYPES.add("application/x-bittorrent"); this.SUPPORTED_MIME_TYPES.add("application/x-bittorrent");
} }
@Override
public Document[] parse(MultiProtocolURI location, String mimeType, String charset, InputStream source) public Document[] parse(MultiProtocolURI location, String mimeType, String charset, InputStream source)
throws Parser.Failure, InterruptedException { throws Parser.Failure, InterruptedException {
byte[] b = null; byte[] b = null;
@ -122,7 +122,7 @@ public class torrentParser extends AbstractParser implements Parser {
byte[] b = FileUtils.read(new File(args[0])); byte[] b = FileUtils.read(new File(args[0]));
torrentParser parser = new torrentParser(); torrentParser parser = new torrentParser();
Document[] d = parser.parse(new MultiProtocolURI("http://localhost/test.torrent"), null, "UTF-8", new ByteArrayInputStream(b)); Document[] d = parser.parse(new MultiProtocolURI("http://localhost/test.torrent"), null, "UTF-8", new ByteArrayInputStream(b));
Condenser c = new Condenser(d[0], true, true, LibraryProvider.dymLib); Condenser c = new Condenser(d[0], true, true, LibraryProvider.dymLib, false);
Map<String, Word> w = c.words(); Map<String, Word> w = c.words();
for (Map.Entry<String, Word> e: w.entrySet()) System.out.println("Word: " + e.getKey() + " - " + e.getValue().posInText); for (Map.Entry<String, Word> e: w.entrySet()) System.out.println("Word: " + e.getKey() + " - " + e.getValue().posInText);
} catch (IOException e) { } catch (IOException e) {

@ -36,6 +36,7 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.order.MicroDate; import net.yacy.kelondro.order.MicroDate;
@ -429,10 +430,11 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
// transform without concurrency to omit thread creation overhead // transform without concurrency to omit thread creation overhead
for (final Row.Entry entry: container) try { for (final Row.Entry entry: container) try {
vars.put(new WordReferenceVars(new WordReferenceRow(entry))); vars.put(new WordReferenceVars(new WordReferenceRow(entry)));
} catch (final InterruptedException e) {} } catch (final InterruptedException e) {} finally {
try { try {
vars.put(WordReferenceVars.poison); vars.put(WordReferenceVars.poison);
} catch (final InterruptedException e) {} } catch (final InterruptedException e) {}
}
return vars; return vars;
} }
final Thread distributor = new TransformDistributor(container, vars, maxtime); final Thread distributor = new TransformDistributor(container, vars, maxtime);
@ -470,13 +472,26 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
while (p > 0) { while (p > 0) {
p--; p--;
worker[p % cores0].add(this.container.get(p, false)); worker[p % cores0].add(this.container.get(p, false));
if (p % 100 == 0 && System.currentTimeMillis() > timeout) break; if (p % 100 == 0 && System.currentTimeMillis() > timeout) {
Log.logWarning("TransformDistributor", "distribution of WordReference entries to worker queues ended with timeout = " + this.maxtime);
break;
}
} }
// insert poison to stop the queues // insert poison to stop the queues
for (int i = 0; i < cores0; i++) { for (int i = 0; i < cores0; i++) {
worker[i].add(WordReferenceRow.poisonRowEntry); worker[i].add(WordReferenceRow.poisonRowEntry);
} }
// wait for the worker to terminate because we want to place a poison entry into the out queue afterwards
for (int i = 0; i < cores0; i++) {
try {
worker[i].join();
} catch (InterruptedException e) {
}
}
this.out.add(WordReferenceVars.poison);
} }
} }
@ -506,7 +521,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
try { try {
while ((entry = this.in.take()) != WordReferenceRow.poisonRowEntry) { while ((entry = this.in.take()) != WordReferenceRow.poisonRowEntry) {
this.out.put(new WordReferenceVars(new WordReferenceRow(entry))); this.out.put(new WordReferenceVars(new WordReferenceRow(entry)));
if (System.currentTimeMillis() > timeout) break; if (System.currentTimeMillis() > timeout) {
Log.logWarning("TransformWorker", "normalization of row entries from row to vars ended with timeout = " + this.maxtime);
break;
}
} }
} catch (final InterruptedException e) {} } catch (final InterruptedException e) {}
} }

@ -520,7 +520,9 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>,
int c0p, c1p; int c0p, c1p;
int o; int o;
final int objectsize = c0.rowdef.objectsize; final int objectsize = c0.rowdef.objectsize;
while (c0i < c0.size() && c1i < c1.size()) { final int c0s = c0.size();
final int c1s = c1.size();
while (c0i < c0s && c1i < c1s) {
c0p = c0i * objectsize; c0p = c0i * objectsize;
c1p = c1i * objectsize; c1p = c1i * objectsize;
o = c0.rowdef.objectOrder.compare( o = c0.rowdef.objectOrder.compare(

@ -137,10 +137,11 @@ public final class ConsoleOutErrHandler extends Handler {
if (record.getLevel().intValue() >= this.splitLevel.intValue()) { if (record.getLevel().intValue() >= this.splitLevel.intValue()) {
this.stdErrHandler.publish(record); this.stdErrHandler.publish(record);
this.stdErrHandler.flush();
} else { } else {
this.stdOutHandler.publish(record); this.stdOutHandler.publish(record);
this.stdOutHandler.flush();
} }
flush();
} }
@Override @Override

@ -34,6 +34,8 @@ import java.util.logging.StreamHandler;
public final class ConsoleOutHandler extends StreamHandler { public final class ConsoleOutHandler extends StreamHandler {
private static int c = 0;
public ConsoleOutHandler() { public ConsoleOutHandler() {
setLevel(Level.FINEST); setLevel(Level.FINEST);
setFormatter(new SimpleFormatter()); setFormatter(new SimpleFormatter());
@ -43,7 +45,7 @@ public final class ConsoleOutHandler extends StreamHandler {
@Override @Override
public final synchronized void publish(final LogRecord record) { public final synchronized void publish(final LogRecord record) {
super.publish(record); super.publish(record);
flush(); if (c++ % 10 == 0) flush(); // not so many flushes, makes too much IO
} }
@Override @Override

@ -40,26 +40,37 @@ import java.util.Locale;
* to the locale set for YaCy. * to the locale set for YaCy.
*/ */
public final class Formatter { public final class Formatter {
// default formatter
private static NumberFormat numForm = NumberFormat.getInstance(new Locale("en"));
// generic formatter that can be used when no localized formatting is allowed
private static final NumberFormat cleanNumForm =
new DecimalFormat("####.##", new DecimalFormatSymbols(Locale.ENGLISH));
static { // default formatter
// just initialize defaults on class load private static Locale locale = new Locale("en");
initDefaults(); /**
* use ThreadLocal to generate new formatter for each Thread since NumberFormat is not synchronized
*/
private static final ThreadLocal <NumberFormat> numForm =
new ThreadLocal <NumberFormat>() {
@Override protected NumberFormat initialValue() {
NumberFormat n = locale == null ? new DecimalFormat("####.##", new DecimalFormatSymbols(Locale.ENGLISH)) : NumberFormat.getInstance(locale);
n.setGroupingUsed(true); // always group int digits
n.setParseIntegerOnly(false); // allow int/double/float
n.setMaximumFractionDigits(2); // 2 decimal digits for float/double
return n;
} }
};
private static final ThreadLocal <NumberFormat> cleanNumForm =
new ThreadLocal <NumberFormat>() {
@Override protected NumberFormat initialValue() {
NumberFormat n = new DecimalFormat("####.##", new DecimalFormatSymbols(Locale.ENGLISH));
return n;
}
};
/** /**
* @param locale the {@link Locale} to set or <code>null</code> to set the special * @param locale the {@link Locale} to set or <code>null</code> to set the special
* empty locale to create unformatted numbers * empty locale to create unformatted numbers
*/ */
public static void setLocale(final Locale locale) { public static void setLocale(final Locale l) {
numForm = (locale == null ? cleanNumForm : NumberFormat.getInstance(locale)); locale = l;
initDefaults();
} }
/** /**
@ -67,28 +78,24 @@ public final class Formatter {
*/ */
public static void setLocale(final String lang) { public static void setLocale(final String lang) {
final String l = (lang.equalsIgnoreCase("default") ? "en" : lang.toLowerCase()); final String l = (lang.equalsIgnoreCase("default") ? "en" : lang.toLowerCase());
setLocale(l.equals("none") ? null : new Locale(l)); setLocale(l.equals("none") ? null : new Locale(l));
} }
private static void initDefaults() {
numForm.setGroupingUsed(true); // always group int digits
numForm.setParseIntegerOnly(false); // allow int/double/float
numForm.setMaximumFractionDigits(2); // 2 decimal digits for float/double
}
public static String number(final double d, final boolean localized) { public static String number(final double d, final boolean localized) {
return (localized ? number(d) : cleanNumForm.format(d)); return (localized ? numForm.get().format(d) : cleanNumForm.get().format(d));
} }
public static String number(final double d) { public static String number(final double d) {
return numForm.format(d); return numForm.get().format(d);
} }
public static String number(final long l, final boolean localized) { public static String number(final long l, final boolean localized) {
return (localized ? number(l) : cleanNumForm.format(l)); return (localized ? numForm.get().format(l) : cleanNumForm.get().format(l));
} }
public static String number(final long l) { public static String number(final long l) {
return numForm.format(l); return numForm.get().format(l);
} }
/** /**

@ -786,7 +786,7 @@ public final class Protocol
// store remote result to local result container // store remote result to local result container
// insert one container into the search result buffer // insert one container into the search result buffer
// one is enough, only the references are used, not the word // one is enough, only the references are used, not the word
containerCache.add(container[0], false, target.getName() + "/" + target.hash, result.joincount, true, 5000); containerCache.add(container[0], false, target.getName() + "/" + target.hash, result.joincount, true, time);
containerCache.addExpectedRemoteReferences(-count); containerCache.addExpectedRemoteReferences(-count);
// insert the containers to the index // insert the containers to the index

@ -93,21 +93,21 @@ public class Blacklist {
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news"; public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
private File blacklistRootPath = null; private File blacklistRootPath = null;
private final ConcurrentMap<String, HandleSet> cachedUrlHashs; private final ConcurrentMap<String, HandleSet> cachedUrlHashs;
private final ConcurrentMap<String, Map<String, List<String>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here private final ConcurrentMap<String, Map<String, List<Pattern>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<String, Map<String, List<String>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here private final ConcurrentMap<String, Map<String, List<Pattern>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public Blacklist(final File rootPath) { public Blacklist(final File rootPath) {
setRootPath(rootPath); setRootPath(rootPath);
// prepare the data structure // prepare the data structure
this.hostpaths_matchable = new ConcurrentHashMap<String, Map<String, List<String>>>(); this.hostpaths_matchable = new ConcurrentHashMap<String, Map<String, List<Pattern>>>();
this.hostpaths_notmatchable = new ConcurrentHashMap<String, Map<String, List<String>>>(); this.hostpaths_notmatchable = new ConcurrentHashMap<String, Map<String, List<Pattern>>>();
this.cachedUrlHashs = new ConcurrentHashMap<String, HandleSet>(); this.cachedUrlHashs = new ConcurrentHashMap<String, HandleSet>();
for (final String blacklistType : BLACKLIST_TYPES) { for (final String blacklistType : BLACKLIST_TYPES) {
this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<String>>()); this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<String>>()); this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
} }
} }
@ -126,7 +126,7 @@ public class Blacklist {
this.blacklistRootPath = rootPath; this.blacklistRootPath = rootPath;
} }
protected Map<String, List<String>> getBlacklistMap(final String blacklistType, final boolean matchable) { protected Map<String, List<Pattern>> getBlacklistMap(final String blacklistType, final boolean matchable) {
if (blacklistType == null) { if (blacklistType == null) {
throw new IllegalArgumentException("Blacklist type not set."); throw new IllegalArgumentException("Blacklist type not set.");
} }
@ -149,10 +149,10 @@ public class Blacklist {
} }
public void clear() { public void clear() {
for (final Map<String, List<String>> entry : this.hostpaths_matchable.values()) { for (final Map<String, List<Pattern>> entry : this.hostpaths_matchable.values()) {
entry.clear(); entry.clear();
} }
for (final Map<String, List<String>> entry : this.hostpaths_notmatchable.values()) { for (final Map<String, List<Pattern>> entry : this.hostpaths_notmatchable.values()) {
entry.clear(); entry.clear();
} }
for (final HandleSet entry : this.cachedUrlHashs.values()) { for (final HandleSet entry : this.cachedUrlHashs.values()) {
@ -163,12 +163,12 @@ public class Blacklist {
public int size() { public int size() {
int size = 0; int size = 0;
for (final String entry : this.hostpaths_matchable.keySet()) { for (final String entry : this.hostpaths_matchable.keySet()) {
for (final List<String> ientry : this.hostpaths_matchable.get(entry).values()) { for (final List<Pattern> ientry : this.hostpaths_matchable.get(entry).values()) {
size += ientry.size(); size += ientry.size();
} }
} }
for (final String entry : this.hostpaths_notmatchable.keySet()) { for (final String entry : this.hostpaths_notmatchable.keySet()) {
for (final List<String> ientry : this.hostpaths_notmatchable.get(entry).values()) { for (final List<Pattern> ientry : this.hostpaths_notmatchable.get(entry).values()) {
size += ientry.size(); size += ientry.size();
} }
} }
@ -188,12 +188,13 @@ public class Blacklist {
* @param sep * @param sep
*/ */
private void loadList(final BlacklistFile blFile, final String sep) { private void loadList(final BlacklistFile blFile, final String sep) {
final Map<String, List<String>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true); final Map<String, List<Pattern>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final Map<String, List<String>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false); final Map<String, List<Pattern>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
Set<Map.Entry<String, List<String>>> loadedBlacklist; Set<Map.Entry<String, List<String>>> loadedBlacklist;
Map.Entry<String, List<String>> loadedEntry; Map.Entry<String, List<String>> loadedEntry;
List<String> paths; List<Pattern> paths;
List<String> loadedPaths; List<String> loadedPaths;
List<Pattern> loadedPathsPattern;
final Set<String> fileNames = blFile.getFileNamesUnified(); final Set<String> fileNames = blFile.getFileNamesUnified();
for (final String fileName : fileNames) { for (final String fileName : fileNames) {
@ -208,19 +209,32 @@ public class Blacklist {
for (final Iterator<Map.Entry<String, List<String>>> mi = loadedBlacklist.iterator(); mi.hasNext();) { for (final Iterator<Map.Entry<String, List<String>>> mi = loadedBlacklist.iterator(); mi.hasNext();) {
loadedEntry = mi.next(); loadedEntry = mi.next();
loadedPaths = loadedEntry.getValue(); loadedPaths = loadedEntry.getValue();
loadedPathsPattern = new ArrayList<Pattern>();
for (String a: loadedPaths) {
if (a.equals("*")) {
loadedPathsPattern.add(Pattern.compile(".*"));
continue;
}
if (a.indexOf("?*",0) > 0) {
// prevent "Dangling meta character '*'" exception
Log.logWarning("Blacklist", "ignored blacklist path to prevent 'Dangling meta character' exception: " + a);
continue;
}
loadedPathsPattern.add(Pattern.compile(a));
}
// create new entry if host mask unknown, otherwise merge // create new entry if host mask unknown, otherwise merge
// existing one with path patterns from blacklist file // existing one with path patterns from blacklist file
paths = (isMatchable(loadedEntry.getKey())) ? blacklistMapMatch.get(loadedEntry.getKey()) : blacklistMapNotMatch.get(loadedEntry.getKey()); paths = (isMatchable(loadedEntry.getKey())) ? blacklistMapMatch.get(loadedEntry.getKey()) : blacklistMapNotMatch.get(loadedEntry.getKey());
if (paths == null) { if (paths == null) {
if (isMatchable(loadedEntry.getKey())) { if (isMatchable(loadedEntry.getKey())) {
blacklistMapMatch.put(loadedEntry.getKey(), loadedPaths); blacklistMapMatch.put(loadedEntry.getKey(), loadedPathsPattern);
} else { } else {
blacklistMapNotMatch.put(loadedEntry.getKey(), loadedPaths); blacklistMapNotMatch.put(loadedEntry.getKey(), loadedPathsPattern);
} }
} else { } else {
// check for duplicates? (refactor List -> Set) // check for duplicates? (refactor List -> Set)
paths.addAll(new HashSet<String>(loadedPaths)); paths.addAll(new HashSet<Pattern>(loadedPathsPattern));
} }
} }
} }
@ -229,7 +243,6 @@ public class Blacklist {
public void loadList(final String blacklistType, final String fileNames, final String sep) { public void loadList(final String blacklistType, final String fileNames, final String sep) {
// method for not breaking older plasmaURLPattern interface // method for not breaking older plasmaURLPattern interface
final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType); final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType);
loadList(blFile, sep); loadList(blFile, sep);
} }
@ -240,8 +253,8 @@ public class Blacklist {
public void remove(final String blacklistType, final String host, final String path) { public void remove(final String blacklistType, final String host, final String path) {
final Map<String, List<String>> blacklistMap = getBlacklistMap(blacklistType, true); final Map<String, List<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
List<String> hostList = blacklistMap.get(host); List<Pattern> hostList = blacklistMap.get(host);
if (hostList != null) { if (hostList != null) {
hostList.remove(path); hostList.remove(path);
if (hostList.isEmpty()) { if (hostList.isEmpty()) {
@ -249,7 +262,7 @@ public class Blacklist {
} }
} }
final Map<String, List<String>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false); final Map<String, List<Pattern>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false);
hostList = blacklistMapNotMatch.get(host); hostList = blacklistMapNotMatch.get(host);
if (hostList != null) { if (hostList != null) {
hostList.remove(path); hostList.remove(path);
@ -268,19 +281,17 @@ public class Blacklist {
} }
final String p = (path.length() > 0 && path.charAt(0) == '/') ? path.substring(1) : path; final String p = (path.length() > 0 && path.charAt(0) == '/') ? path.substring(1) : path;
final Map<String, List<Pattern>> blacklistMap = getBlacklistMap(blacklistType, isMatchable(host));
final Map<String, List<String>> blacklistMap = getBlacklistMap(blacklistType, isMatchable(host));
// avoid PatternSyntaxException e // avoid PatternSyntaxException e
final String h = final String h = ((!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') ? "." + host : host).toLowerCase();
((!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') ? "." + host : host).toLowerCase();
List<String> hostList; List<Pattern> hostList;
if (!(blacklistMap.containsKey(h) && ((hostList = blacklistMap.get(h)) != null))) { if (!(blacklistMap.containsKey(h) && ((hostList = blacklistMap.get(h)) != null))) {
blacklistMap.put(h, (hostList = new ArrayList<String>())); blacklistMap.put(h, (hostList = new ArrayList<Pattern>()));
} }
hostList.add(p); hostList.add(Pattern.compile(p));
} }
public int blacklistCacheSize() { public int blacklistCacheSize() {
@ -300,14 +311,12 @@ public class Blacklist {
boolean ret = false; boolean ret = false;
if (blacklistType != null && host != null && path != null) { if (blacklistType != null && host != null && path != null) {
final Map<String, List<String>> blacklistMap = final Map<String, List<Pattern>> blacklistMap = getBlacklistMap(blacklistType, isMatchable(host));
getBlacklistMap(blacklistType, isMatchable(host));
// avoid PatternSyntaxException e // avoid PatternSyntaxException e
final String h = final String h = ((!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') ? "." + host : host).toLowerCase();
((!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') ? "." + host : host).toLowerCase();
final List<String> hostList = blacklistMap.get(h); final List<Pattern> hostList = blacklistMap.get(h);
if (hostList != null) { if (hostList != null) {
ret = hostList.contains(path); ret = hostList.contains(path);
} }
@ -338,13 +347,11 @@ public class Blacklist {
return true; return true;
} }
private final static Pattern m1 = Pattern.compile("^[a-z0-9.-]*$"); // simple Domain (yacy.net or www.yacy.net)
private final static Pattern m2 = Pattern.compile("^\\*\\.[a-z0-9-.]*$"); // start with *. (not .* and * must follow a dot)
private final static Pattern m3 = Pattern.compile("^[a-z0-9-.]*\\.\\*$"); // ends with .* (not *. and before * must be a dot)
public static boolean isMatchable(final String host) { public static boolean isMatchable(final String host) {
return (m1.matcher(host).matches() || m2.matcher(host).matches() || m3.matcher(host).matches());
return (
(Pattern.matches("^[a-z0-9.-]*$", host)) // simple Domain (yacy.net or www.yacy.net)
|| (Pattern.matches("^\\*\\.[a-z0-9-.]*$", host)) // start with *. (not .* and * must follow a dot)
|| (Pattern.matches("^[a-z0-9-.]*\\.\\*$", host)) // ends with .* (not *. and before * must be a dot)
);
} }
public String getEngineInfo() { public String getEngineInfo() {
@ -360,24 +367,19 @@ public class Blacklist {
} }
// getting the proper blacklist // getting the proper blacklist
final Map<String, List<String>> blacklistMapMatched = getBlacklistMap(blacklistType, true); final Map<String, List<Pattern>> blacklistMapMatched = getBlacklistMap(blacklistType, true);
final String p = (path.length() > 0 && path.charAt(0) == '/') ? path.substring(1) : path; final String p = (path.length() > 0 && path.charAt(0) == '/') ? path.substring(1) : path;
List<String> app; List<Pattern> app;
boolean matched = false; boolean matched = false;
String pp = ""; // path-pattern Pattern pp; // path-pattern
// try to match complete domain // try to match complete domain
if (!matched && (app = blacklistMapMatched.get(hostlow)) != null) { if (!matched && (app = blacklistMapMatched.get(hostlow)) != null) {
for (int i = app.size() - 1; !matched && i > -1; i--) { for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i); pp = app.get(i);
if (pp.indexOf("?*",0) > 0) { matched |= pp.matcher(p).matches();
// prevent "Dangling meta character '*'" exception
Log.logWarning("Blacklist", "ignored blacklist path to prevent 'Dangling meta character' exception: " + pp);
continue;
}
matched |= (("*".equals(pp)) || (p.matches(pp)));
} }
} }
// first try to match the domain with wildcard '*' // first try to match the domain with wildcard '*'
@ -387,13 +389,13 @@ public class Blacklist {
if ((app = blacklistMapMatched.get(hostlow.substring(0, index + 1) + "*")) != null) { if ((app = blacklistMapMatched.get(hostlow.substring(0, index + 1) + "*")) != null) {
for (int i = app.size() - 1; !matched && i > -1; i--) { for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i); pp = app.get(i);
matched |= (("*".equals(pp)) || (p.matches(pp))); matched |= pp.matcher(p).matches();
} }
} }
if ((app = blacklistMapMatched.get(hostlow.substring(0, index))) != null) { if ((app = blacklistMapMatched.get(hostlow.substring(0, index))) != null) {
for (int i = app.size() - 1; !matched && i > -1; i--) { for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i); pp = app.get(i);
matched |= (("*".equals(pp)) || (p.matches(pp))); matched |= pp.matcher(p).matches();
} }
} }
} }
@ -402,13 +404,13 @@ public class Blacklist {
if ((app = blacklistMapMatched.get("*" + hostlow.substring(index, hostlow.length()))) != null) { if ((app = blacklistMapMatched.get("*" + hostlow.substring(index, hostlow.length()))) != null) {
for (int i = app.size() - 1; !matched && i > -1; i--) { for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i); pp = app.get(i);
matched |= (("*".equals(pp)) || (p.matches(pp))); matched |= pp.matcher(p).matches();
} }
} }
if ((app = blacklistMapMatched.get(hostlow.substring(index + 1, hostlow.length()))) != null) { if ((app = blacklistMapMatched.get(hostlow.substring(index + 1, hostlow.length()))) != null) {
for (int i = app.size() - 1; !matched && i > -1; i--) { for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i); pp = app.get(i);
matched |= (("*".equals(pp)) || (p.matches(pp))); matched |= pp.matcher(p).matches();
} }
} }
} }
@ -416,15 +418,15 @@ public class Blacklist {
// loop over all Regexentrys // loop over all Regexentrys
if (!matched) { if (!matched) {
final Map<String, List<String>> blacklistMapNotMatched = getBlacklistMap(blacklistType, false); final Map<String, List<Pattern>> blacklistMapNotMatched = getBlacklistMap(blacklistType, false);
String key; String key;
for (final Entry<String, List<String>> entry : blacklistMapNotMatched.entrySet()) { for (final Entry<String, List<Pattern>> entry : blacklistMapNotMatched.entrySet()) {
key = entry.getKey(); key = entry.getKey();
try { try {
if (Pattern.matches(key, hostlow)) { if (Pattern.matches(key, hostlow)) {
app = entry.getValue(); app = entry.getValue();
for (int i = 0; i < app.size(); i++) { for (int i = 0; i < app.size(); i++) {
if (Pattern.matches(app.get(i), p)) { if (app.get(i).matcher(p).matches()) {
return true; return true;
} }
} }

@ -2504,7 +2504,7 @@ public final class Switchboard extends serverSwitch
condenser[i] = condenser[i] =
new Condenser(in.documents[i], in.queueEntry.profile().indexText(), in.queueEntry new Condenser(in.documents[i], in.queueEntry.profile().indexText(), in.queueEntry
.profile() .profile()
.indexMedia(), LibraryProvider.dymLib); .indexMedia(), LibraryProvider.dymLib, true);
// update image result list statistics // update image result list statistics
// its good to do this concurrently here, because it needs a DNS lookup // its good to do this concurrently here, because it needs a DNS lookup
@ -2776,7 +2776,7 @@ public final class Switchboard extends serverSwitch
throw new Parser.Failure("indexing is denied", url); throw new Parser.Failure("indexing is denied", url);
} }
final Condenser condenser = final Condenser condenser =
new Condenser(document, true, true, LibraryProvider.dymLib); new Condenser(document, true, true, LibraryProvider.dymLib, true);
ResultImages.registerImages(url, document, true); ResultImages.registerImages(url, document, true);
Switchboard.this.webStructure.generateCitationReference(url, document, condenser); Switchboard.this.webStructure.generateCitationReference(url, document, condenser);
storeDocumentIndex( storeDocumentIndex(

@ -158,7 +158,7 @@ public class DocumentIndex extends Segment
final URIMetadataRow[] rows = new URIMetadataRow[documents.length]; final URIMetadataRow[] rows = new URIMetadataRow[documents.length];
int c = 0; int c = 0;
for ( final Document document : documents ) { for ( final Document document : documents ) {
final Condenser condenser = new Condenser(document, true, true, LibraryProvider.dymLib); final Condenser condenser = new Condenser(document, true, true, LibraryProvider.dymLib, true);
rows[c++] = rows[c++] =
super.storeDocument( super.storeDocument(
url, url,

@ -474,7 +474,7 @@ public class Segment {
} }
// get the word set // get the word set
Set<String> words = null; Set<String> words = null;
words = new Condenser(document, true, true, null).words().keySet(); words = new Condenser(document, true, true, null, false).words().keySet();
// delete all word references // delete all word references
int count = 0; int count = 0;

@ -82,7 +82,7 @@ public class AccessTracker {
final long timeout = System.currentTimeMillis() - maxAge; final long timeout = System.currentTimeMillis() - maxAge;
while (list.size() > 0) { while (list.size() > 0) {
final QueryParams q = list.getFirst(); final QueryParams q = list.getFirst();
if (q.time.longValue() > timeout) break; if (q.starttime > timeout) break;
addToDump(list.removeFirst()); addToDump(list.removeFirst());
} }
} }
@ -103,7 +103,7 @@ public class AccessTracker {
//if (query.resultcount == 0) return; //if (query.resultcount == 0) return;
if (query.queryString == null || query.queryString.length() == 0) return; if (query.queryString == null || query.queryString.length() == 0) return;
final StringBuilder sb = new StringBuilder(40); final StringBuilder sb = new StringBuilder(40);
sb.append(GenericFormatter.SHORT_SECOND_FORMATTER.format(new Date(query.time))); sb.append(GenericFormatter.SHORT_SECOND_FORMATTER.format(new Date(query.starttime)));
sb.append(' '); sb.append(' ');
sb.append(Integer.toString(query.resultcount)); sb.append(Integer.toString(query.resultcount));
sb.append(' '); sb.append(' ');

@ -133,7 +133,7 @@ public final class QueryParams {
public final String tenant; public final String tenant;
public final Modifier modifier; public final Modifier modifier;
public Seed remotepeer; public Seed remotepeer;
public final Long time; public final long starttime, maxtime, timeout; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds)
// values that are set after a search: // values that are set after a search:
public int resultcount; // number of found results public int resultcount; // number of found results
public int transmitcount; // number of results that had been shown to the user public int transmitcount; // number of results that had been shown to the user
@ -192,7 +192,9 @@ public final class QueryParams {
this.siteexcludes = null; this.siteexcludes = null;
this.authorhash = null; this.authorhash = null;
this.remotepeer = null; this.remotepeer = null;
this.time = Long.valueOf(System.currentTimeMillis()); this.starttime = Long.valueOf(System.currentTimeMillis());
this.maxtime = 10000;
this.timeout = this.starttime + this.timeout;
this.specialRights = false; this.specialRights = false;
this.navigators = "all"; this.navigators = "all";
this.indexSegment = indexSegment; this.indexSegment = indexSegment;
@ -270,7 +272,9 @@ public final class QueryParams {
this.snippetCacheStrategy = snippetCacheStrategy; this.snippetCacheStrategy = snippetCacheStrategy;
this.host = host; this.host = host;
this.remotepeer = null; this.remotepeer = null;
this.time = Long.valueOf(System.currentTimeMillis()); this.starttime = Long.valueOf(System.currentTimeMillis());
this.maxtime = 10000;
this.timeout = this.starttime + this.timeout;
this.specialRights = specialRights; this.specialRights = specialRights;
this.indexSegment = indexSegment; this.indexSegment = indexSegment;
this.userAgent = userAgent; this.userAgent = userAgent;
@ -378,6 +382,7 @@ public final class QueryParams {
public static final boolean anymatch(final String text, final HandleSet keyhashes) { public static final boolean anymatch(final String text, final HandleSet keyhashes) {
// returns true if any of the word hashes in keyhashes appear in the String text // returns true if any of the word hashes in keyhashes appear in the String text
// to do this, all words in the string must be recognized and transcoded to word hashes // to do this, all words in the string must be recognized and transcoded to word hashes
if (keyhashes == null || keyhashes.isEmpty()) return false;
final HandleSet wordhashes = Word.words2hashesHandles(Condenser.getWords(text, null).keySet()); final HandleSet wordhashes = Word.words2hashesHandles(Condenser.getWords(text, null).keySet());
return SetTools.anymatch(wordhashes, keyhashes); return SetTools.anymatch(wordhashes, keyhashes);
} }

@ -99,6 +99,7 @@ public final class RWIProcess extends Thread
private final ReferenceOrder order; private final ReferenceOrder order;
private boolean addRunning; private boolean addRunning;
private final boolean remote; private final boolean remote;
private final long maxtime;
// navigation scores // navigation scores
private final ScoreMap<String> hostNavigator; // a counter for the appearance of the host hash private final ScoreMap<String> hostNavigator; // a counter for the appearance of the host hash
@ -145,6 +146,7 @@ public final class RWIProcess extends Thread
this.maxExpectedRemoteReferences = new AtomicInteger(0); this.maxExpectedRemoteReferences = new AtomicInteger(0);
this.expectedRemoteReferences = new AtomicInteger(0); this.expectedRemoteReferences = new AtomicInteger(0);
this.receivedRemoteReferences = new AtomicInteger(0); this.receivedRemoteReferences = new AtomicInteger(0);
this.maxtime = query.maxtime;
} }
public void addExpectedRemoteReferences(int x) { public void addExpectedRemoteReferences(int x) {
@ -206,7 +208,7 @@ public final class RWIProcess extends Thread
System.currentTimeMillis() - timer), System.currentTimeMillis() - timer),
false); false);
if ( !index.isEmpty() ) { if ( !index.isEmpty() ) {
add(index, true, "local index: " + this.query.getSegment().getLocation(), -1, true, 10000); add(index, true, "local index: " + this.query.getSegment().getLocation(), -1, true, this.maxtime);
} }
} catch ( final Exception e ) { } catch ( final Exception e ) {
Log.logException(e); Log.logException(e);
@ -260,6 +262,7 @@ public final class RWIProcess extends Thread
this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts", 0) >= 0; this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts", 0) >= 0;
// apply all constraints // apply all constraints
long timeout = System.currentTimeMillis() + maxtime;
try { try {
WordReferenceVars iEntry; WordReferenceVars iEntry;
final String pattern = this.query.urlMask.pattern(); final String pattern = this.query.urlMask.pattern();
@ -269,9 +272,16 @@ public final class RWIProcess extends Thread
|| pattern.equals("ftp://.*") || pattern.equals("ftp://.*")
|| pattern.equals("smb://.*") || pattern.equals("smb://.*")
|| pattern.equals("file://.*"); || pattern.equals("file://.*");
long remaining;
pollloop: while ( true ) { pollloop: while ( true ) {
iEntry = decodedEntries.poll(1, TimeUnit.SECONDS); remaining = timeout - System.currentTimeMillis();
if ( iEntry == null || iEntry == WordReferenceVars.poison ) { if (remaining <= 0) break;
iEntry = decodedEntries.poll(remaining, TimeUnit.MILLISECONDS);
if ( iEntry == null ) {
Log.logWarning("RWIProcess", "terminated 'add' loop after poll time-out = " + remaining);
break pollloop;
}
if ( iEntry == WordReferenceVars.poison ) {
break pollloop; break pollloop;
} }
assert (iEntry.urlhash().length == index.row().primaryKeyLength); assert (iEntry.urlhash().length == index.row().primaryKeyLength);
@ -363,6 +373,7 @@ public final class RWIProcess extends Thread
//} //}
} }
} }
if (System.currentTimeMillis() >= timeout) Log.logWarning("RWIProcess", "rwi normalization ended with timeout = " + maxtime);
} catch ( final InterruptedException e ) { } catch ( final InterruptedException e ) {
} catch ( final RowSpaceExceededException e ) { } catch ( final RowSpaceExceededException e ) {
@ -602,9 +613,10 @@ public final class RWIProcess extends Thread
final String pagetitle = page.dc_title().toLowerCase(); final String pagetitle = page.dc_title().toLowerCase();
// check exclusion // check exclusion
if ( (QueryParams.anymatch(pagetitle, this.query.excludeHashes)) if ( this.query.excludeHashes != null && !this.query.excludeHashes.isEmpty() &&
((QueryParams.anymatch(pagetitle, this.query.excludeHashes))
|| (QueryParams.anymatch(pageurl.toLowerCase(), this.query.excludeHashes)) || (QueryParams.anymatch(pageurl.toLowerCase(), this.query.excludeHashes))
|| (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.excludeHashes)) ) { || (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.excludeHashes)))) {
this.sortout++; this.sortout++;
continue; continue;
} }
@ -634,7 +646,7 @@ public final class RWIProcess extends Thread
if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) > 0.0d && (lon = page.lon()) > 0.0d) { if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) > 0.0d && (lon = page.lon()) > 0.0d) {
double latDelta = this.query.lat - lat; double latDelta = this.query.lat - lat;
double lonDelta = this.query.lon - lon; double lonDelta = this.query.lon - lon;
double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta) / 2; // pythagoras
if (distance > this.query.radius) { if (distance > this.query.radius) {
this.sortout++; this.sortout++;
continue; continue;

@ -365,7 +365,7 @@ public class SnippetProcess {
(this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0)) { (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0)) {
break; break;
} }
worker = new Worker(i, 10000, this.query.snippetCacheStrategy, this.query.snippetMatcher, neededResults); worker = new Worker(i, this.query.maxtime, this.query.snippetCacheStrategy, this.query.snippetMatcher, neededResults);
worker.start(); worker.start();
this.workerThreads[i] = worker; this.workerThreads[i] = worker;
if (this.rankingProcess.expectMoreRemoteReferences()) { if (this.rankingProcess.expectMoreRemoteReferences()) {
@ -387,7 +387,7 @@ public class SnippetProcess {
break; break;
} }
if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) { if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) {
worker = new Worker(i, 10000, this.query.snippetCacheStrategy, this.query.snippetMatcher, neededResults); worker = new Worker(i, this.query.maxtime, this.query.snippetCacheStrategy, this.query.snippetMatcher, neededResults);
worker.start(); worker.start();
this.workerThreads[i] = worker; this.workerThreads[i] = worker;
deployCount--; deployCount--;
@ -533,6 +533,9 @@ public class SnippetProcess {
SnippetProcess.this.rankingProcess.addTopics(resultEntry); SnippetProcess.this.rankingProcess.addTopics(resultEntry);
} }
} }
if (System.currentTimeMillis() >= this.timeout) {
Log.logWarning("SnippetProcess", "worker ended with timoeout");
}
//System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops); //System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops);
} catch (final Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);

@ -71,10 +71,6 @@ public class ReferenceOrder {
if (container.size() < 100) threads = 2; if (container.size() < 100) threads = 2;
final Thread distributor = new NormalizeDistributor(container, out, threads, maxtime); final Thread distributor = new NormalizeDistributor(container, out, threads, maxtime);
distributor.start(); distributor.start();
try {
distributor.join(10); // let the distributor work for at least 10 milliseconds
} catch (final InterruptedException e) {
}
// return the resulting queue while the processing queues are still working // return the resulting queue while the processing queues are still working
return out; return out;
@ -103,17 +99,21 @@ public class ReferenceOrder {
final Semaphore termination = new Semaphore(this.threads); final Semaphore termination = new Semaphore(this.threads);
final NormalizeWorker[] worker = new NormalizeWorker[this.threads]; final NormalizeWorker[] worker = new NormalizeWorker[this.threads];
for (int i = 0; i < this.threads; i++) { for (int i = 0; i < this.threads; i++) {
worker[i] = new NormalizeWorker(this.out, termination); worker[i] = new NormalizeWorker(this.out, termination, this.maxtime);
worker[i].start(); worker[i].start();
} }
// fill the queue // fill the queue
WordReferenceVars iEntry; WordReferenceVars iEntry;
int p = 0; int p = 0;
long timeout = System.currentTimeMillis() + this.maxtime;
try { try {
while ((iEntry = vars.take()) != WordReferenceVars.poison) { while ((iEntry = vars.take()) != WordReferenceVars.poison) {
worker[p % this.threads].add(iEntry); worker[p % this.threads].add(iEntry);
p++; p++;
if (System.currentTimeMillis() > timeout) {
Log.logWarning("NormalizeDistributor", "adding of decoded rows to workers ended with timeout = " + this.maxtime);
}
} }
} catch (final InterruptedException e) { } catch (final InterruptedException e) {
} }
@ -136,11 +136,13 @@ public class ReferenceOrder {
private final BlockingQueue<WordReferenceVars> out; private final BlockingQueue<WordReferenceVars> out;
private final Semaphore termination; private final Semaphore termination;
private final BlockingQueue<WordReferenceVars> decodedEntries; private final BlockingQueue<WordReferenceVars> decodedEntries;
private final long maxtime;
public NormalizeWorker(final BlockingQueue<WordReferenceVars> out, final Semaphore termination) { public NormalizeWorker(final BlockingQueue<WordReferenceVars> out, final Semaphore termination, long maxtime) {
this.out = out; this.out = out;
this.termination = termination; this.termination = termination;
this.decodedEntries = new LinkedBlockingQueue<WordReferenceVars>(); this.decodedEntries = new LinkedBlockingQueue<WordReferenceVars>();
this.maxtime = maxtime;
} }
public void add(final WordReferenceVars entry) { public void add(final WordReferenceVars entry) {
@ -158,6 +160,7 @@ public class ReferenceOrder {
String dom; String dom;
Integer count; Integer count;
final Integer int1 = 1; final Integer int1 = 1;
long timeout = System.currentTimeMillis() + this.maxtime;
while ((iEntry = this.decodedEntries.take()) != WordReferenceVars.poison) { while ((iEntry = this.decodedEntries.take()) != WordReferenceVars.poison) {
// find min/max // find min/max
if (ReferenceOrder.this.min == null) ReferenceOrder.this.min = iEntry.clone(); else ReferenceOrder.this.min.min(iEntry); if (ReferenceOrder.this.min == null) ReferenceOrder.this.min = iEntry.clone(); else ReferenceOrder.this.min.min(iEntry);
@ -171,6 +174,11 @@ public class ReferenceOrder {
} else { } else {
doms0.put(dom, LargeNumberCache.valueOf(count.intValue() + 1)); doms0.put(dom, LargeNumberCache.valueOf(count.intValue() + 1));
} }
if (System.currentTimeMillis() > timeout) {
Log.logWarning("NormalizeWorker", "normlization of decoded rows ended with timeout = " + this.maxtime);
break;
}
} }
// update domain score // update domain score

Loading…
Cancel
Save