performance hacks: more pre-allocated StringBuilder

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7790 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 87bd559c42
commit 7db208c992

@ -61,10 +61,9 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.rwi.ReferenceContainerArray; import net.yacy.kelondro.rwi.ReferenceContainerArray;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import de.anomic.search.MetadataRepository; import de.anomic.search.MetadataRepository;
import de.anomic.search.Segment;
import de.anomic.search.MetadataRepository.Export; import de.anomic.search.MetadataRepository.Export;
import de.anomic.search.Segment;
public class URLAnalysis { public class URLAnalysis {
@ -78,15 +77,15 @@ public class URLAnalysis {
static { static {
try { try {
poison = new DigestURI("http://poison.org/poison"); poison = new DigestURI("http://poison.org/poison");
} catch (MalformedURLException e) { } catch (final MalformedURLException e) {
poison = null; poison = null;
} }
} }
public static class splitter extends Thread { public static class splitter extends Thread {
private ArrayBlockingQueue<DigestURI> in; private final ArrayBlockingQueue<DigestURI> in;
private ConcurrentHashMap<String, Integer> out; private final ConcurrentHashMap<String, Integer> out;
public splitter(final ArrayBlockingQueue<DigestURI> in, final ConcurrentHashMap<String, Integer> out) { public splitter(final ArrayBlockingQueue<DigestURI> in, final ConcurrentHashMap<String, Integer> out) {
this.in = in; this.in = in;
@ -100,15 +99,15 @@ public class URLAnalysis {
final Pattern p = Pattern.compile("~|\\(|\\)|\\+|-|@|:|%|\\.|;|_"); final Pattern p = Pattern.compile("~|\\(|\\)|\\+|-|@|:|%|\\.|;|_");
while (true) { while (true) {
try { try {
url = in.take(); url = this.in.take();
if (url == poison) break; if (url == poison) break;
update(patternMinus.matcher(url.getHost()).replaceAll("\\.").split("\\.")); update(patternMinus.matcher(url.getHost()).replaceAll("\\.").split("\\."));
update(p.matcher(url.getPath()).replaceAll("/").split("/")); update(p.matcher(url.getPath()).replaceAll("/").split("/"));
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
} }
} catch (Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);
} }
} }
@ -117,8 +116,8 @@ public class URLAnalysis {
Integer c; Integer c;
for (final String t: s) { for (final String t: s) {
if (t.length() == 0) continue; if (t.length() == 0) continue;
c = out.get(t); c = this.out.get(t);
out.put(t, (c == null) ? 1 : c.intValue() + 1); this.out.put(t, (c == null) ? 1 : c.intValue() + 1);
} }
} }
} }
@ -165,7 +164,7 @@ public class URLAnalysis {
final File outfile = new File(analysis); final File outfile = new File(analysis);
BufferedReader reader = null; BufferedReader reader = null;
long time = System.currentTimeMillis(); long time = System.currentTimeMillis();
long start = time; final long start = time;
int count = 0; int count = 0;
System.out.println("start processing"); System.out.println("start processing");
@ -178,11 +177,11 @@ public class URLAnalysis {
line = line.trim(); line = line.trim();
if (line.length() > 0) { if (line.length() > 0) {
try { try {
DigestURI url = new DigestURI(line); final DigestURI url = new DigestURI(line);
in.put(url); in.put(url);
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} catch (MalformedURLException e) { } catch (final MalformedURLException e) {
continue; continue;
} }
} }
@ -208,12 +207,12 @@ public class URLAnalysis {
System.out.println("stopping threads"); System.out.println("stopping threads");
for (int i = 0, available = Runtime.getRuntime().availableProcessors() + 1; i < available; i++) try { for (int i = 0, available = Runtime.getRuntime().availableProcessors() + 1; i < available; i++) try {
in.put(poison); in.put(poison);
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
try { try {
spl.join(); spl.join();
} catch (InterruptedException e1) { } catch (final InterruptedException e1) {
Log.logException(e1); Log.logException(e1);
} }
@ -252,7 +251,7 @@ public class URLAnalysis {
} }
} }
os.close(); os.close();
} catch (IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
} }
@ -280,9 +279,9 @@ public class URLAnalysis {
line = line.trim(); line = line.trim();
if (line.length() > 0) { if (line.length() > 0) {
try { try {
DigestURI url = new DigestURI(line); final DigestURI url = new DigestURI(line);
hosts.add(url.getHost()); hosts.add(url.getHost());
} catch (MalformedURLException e) { } catch (final MalformedURLException e) {
continue; continue;
} }
} }
@ -324,7 +323,7 @@ public class URLAnalysis {
// write hosts // write hosts
System.out.println("start writing results"); System.out.println("start writing results");
File outfile = new File(trunk + ((gz) ? ".gz" : "")); final File outfile = new File(trunk + ((gz) ? ".gz" : ""));
long time = System.currentTimeMillis(); long time = System.currentTimeMillis();
try { try {
OutputStream os = new BufferedOutputStream(new FileOutputStream(outfile)); OutputStream os = new BufferedOutputStream(new FileOutputStream(outfile));
@ -340,7 +339,7 @@ public class URLAnalysis {
} }
} }
os.close(); os.close();
} catch (IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
} }
@ -370,9 +369,9 @@ public class URLAnalysis {
line = line.trim(); line = line.trim();
if (line.length() > 0) { if (line.length() > 0) {
try { try {
DigestURI url = new DigestURI(line); final DigestURI url = new DigestURI(line);
urls.add(url.toNormalform(true, true)); urls.add(url.toNormalform(true, true));
} catch (MalformedURLException e) { } catch (final MalformedURLException e) {
continue; continue;
} }
} }
@ -412,7 +411,7 @@ public class URLAnalysis {
idx.dump(new File(statisticPath)); idx.dump(new File(statisticPath));
System.out.println("INDEX REFERENCE COLLECTION finished dump, wrote " + idx.size() + " entries to " + statisticPath); System.out.println("INDEX REFERENCE COLLECTION finished dump, wrote " + idx.size() + " entries to " + statisticPath);
idx.close(); idx.close();
} catch (Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);
} }
} }
@ -454,7 +453,7 @@ public class URLAnalysis {
final Export e = mr.export(new File(export), ".*", hs, format, false); final Export e = mr.export(new File(export), ".*", hs, format, false);
try { try {
e.join(); e.join();
} catch (InterruptedException e1) { } catch (final InterruptedException e1) {
Log.logException(e1); Log.logException(e1);
} }
System.out.println("URL EXPORT finished export, wrote " + ((hs == null) ? mr.size() : hs.size()) + " entries"); System.out.println("URL EXPORT finished export, wrote " + ((hs == null) ? mr.size() : hs.size()) + " entries");
@ -495,7 +494,7 @@ public class URLAnalysis {
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT/METADATA used.dump diffurlcol.dump // java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT/METADATA used.dump diffurlcol.dump
try { try {
diffurlcol(args[1], args[2], args[3]); diffurlcol(args[1], args[2], args[3]);
} catch (Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);
} }
} else if (args[0].equals("-export") && args.length >= 4) { } else if (args[0].equals("-export") && args.length >= 4) {
@ -503,10 +502,10 @@ public class URLAnalysis {
// example: // example:
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -export DATA/INDEX/freeworld/TEXT xml urls.xml diffurlcol.dump // java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -export DATA/INDEX/freeworld/TEXT xml urls.xml diffurlcol.dump
// instead of 'xml' (which is in fact a rss), the format can also be 'text' and 'html' // instead of 'xml' (which is in fact a rss), the format can also be 'text' and 'html'
int format = (args[2].equals("xml")) ? 2 : (args[2].equals("html")) ? 1 : 0; final int format = (args[2].equals("xml")) ? 2 : (args[2].equals("html")) ? 1 : 0;
try { try {
export(args[1], format, args[3], (args.length >= 5) ? args[4] : null); export(args[1], format, args[3], (args.length >= 5) ? args[4] : null);
} catch (Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);
} }
} else if (args[0].equals("-delete") && args.length >= 3) { } else if (args[0].equals("-delete") && args.length >= 3) {
@ -516,7 +515,7 @@ public class URLAnalysis {
// instead of 'xml' (which is in fact a rss), the format can also be 'text' and 'html' // instead of 'xml' (which is in fact a rss), the format can also be 'text' and 'html'
try { try {
delete(args[1], args[2]); delete(args[1], args[2]);
} catch (Exception e) { } catch (final Exception e) {
Log.logException(e); Log.logException(e);
} }
} else { } else {
@ -555,7 +554,7 @@ public class URLAnalysis {
} }
private static final String num(final int i) { private static final String num(final int i) {
StringBuffer s = new StringBuffer(Integer.toString(i)); final StringBuilder s = new StringBuilder(Integer.toString(i));
while (s.length() < 9) s.insert(0, "0"); while (s.length() < 9) s.insert(0, "0");
return s.toString(); return s.toString();
} }

@ -32,13 +32,12 @@ import java.util.Iterator;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
public class YMarkCrawlStart extends HashMap<String,String>{ public class YMarkCrawlStart extends HashMap<String,String>{
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
private WorkTables worktables; private final WorkTables worktables;
public YMarkCrawlStart(final WorkTables worktables) { public YMarkCrawlStart(final WorkTables worktables) {
this.worktables = worktables; this.worktables = worktables;
@ -46,13 +45,13 @@ public class YMarkCrawlStart extends HashMap<String,String>{
public YMarkCrawlStart(final WorkTables worktables, final String url) { public YMarkCrawlStart(final WorkTables worktables, final String url) {
this.worktables = worktables; this.worktables = worktables;
this.clear(); clear();
this.load(url); load(url);
} }
public void load(String url) { public void load(final String url) {
try { try {
final StringBuffer buffer = new StringBuffer(500); final StringBuilder buffer = new StringBuilder(500);
buffer.append("^.*crawlingURL=\\Q"); buffer.append("^.*crawlingURL=\\Q");
buffer.append(url); buffer.append(url);
buffer.append("\\E?.*"); buffer.append("\\E?.*");
@ -78,12 +77,12 @@ public class YMarkCrawlStart extends HashMap<String,String>{
end = buffer.length()-1; end = buffer.length()-1;
value = buffer.substring(start, end); value = buffer.substring(start, end);
start = end+1; start = end+1;
this.put(key, value); put(key, value);
} }
break; break;
} }
} }
} catch (IOException e) { } catch (final IOException e) {
// TODO Auto-generated catch block // TODO Auto-generated catch block
} }
} }

@ -48,13 +48,13 @@ public class YMarkTables {
private String basename; private String basename;
private TABLES(String b) { private TABLES(final String b) {
this.basename = b; this.basename = b;
} }
public String basename() { public String basename() {
return this.basename; return this.basename;
} }
public String tablename(String bmk_user) { public String tablename(final String bmk_user) {
return bmk_user+this.basename; return bmk_user+this.basename;
} }
} }
@ -65,13 +65,13 @@ public class YMarkTables {
private String protocol; private String protocol;
private PROTOCOLS(String s) { private PROTOCOLS(final String s) {
this.protocol = s; this.protocol = s;
} }
public String protocol() { public String protocol() {
return this.protocol; return this.protocol;
} }
public String protocol(String s) { public String protocol(final String s) {
return this.protocol+s; return this.protocol+s;
} }
} }
@ -181,7 +181,7 @@ public class YMarkTables {
public Iterator<Tables.Row> getBookmarksByFolder(final String bmk_user, final String folder) throws IOException { public Iterator<Tables.Row> getBookmarksByFolder(final String bmk_user, final String folder) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user); final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final StringBuffer patternBuilder = new StringBuffer(BUFFER_LENGTH); final StringBuilder patternBuilder = new StringBuilder(BUFFER_LENGTH);
patternBuilder.setLength(0); patternBuilder.setLength(0);
patternBuilder.append(p1); patternBuilder.append(p1);
patternBuilder.append('('); patternBuilder.append('(');
@ -196,7 +196,7 @@ public class YMarkTables {
public Iterator<Tables.Row> getBookmarksByTag(final String bmk_user, final String[] tagArray) throws IOException { public Iterator<Tables.Row> getBookmarksByTag(final String bmk_user, final String[] tagArray) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user); final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final StringBuffer patternBuilder = new StringBuffer(BUFFER_LENGTH); final StringBuilder patternBuilder = new StringBuilder(BUFFER_LENGTH);
patternBuilder.setLength(0); patternBuilder.setLength(0);
patternBuilder.append(p1); patternBuilder.append(p1);
patternBuilder.append(p5); patternBuilder.append(p5);
@ -215,7 +215,7 @@ public class YMarkTables {
} }
public SortedSet<Row> orderBookmarksBy(final Iterator<Row> rowIterator, final String sortname, final String sortorder) { public SortedSet<Row> orderBookmarksBy(final Iterator<Row> rowIterator, final String sortname, final String sortorder) {
TreeSet<Row> sortTree = new TreeSet<Tables.Row>(new TablesRowComparator(sortname)); final TreeSet<Row> sortTree = new TreeSet<Tables.Row>(new TablesRowComparator(sortname));
Row row; Row row;
while (rowIterator.hasNext()) { while (rowIterator.hasNext()) {
row = rowIterator.next(); row = rowIterator.next();
@ -233,7 +233,7 @@ public class YMarkTables {
final YMarkEntry bmk = new YMarkEntry(false); final YMarkEntry bmk = new YMarkEntry(false);
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url); bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(tagString)); bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(tagString));
this.addBookmark(bmk_user, bmk, merge, true); addBookmark(bmk_user, bmk, merge, true);
} }
} }
@ -243,7 +243,7 @@ public class YMarkTables {
final YMarkEntry bmk = new YMarkEntry(false); final YMarkEntry bmk = new YMarkEntry(false);
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url); bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), folder); bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), folder);
this.addBookmark(bmk_user, bmk, true, true); addBookmark(bmk_user, bmk, true, true);
} }
} }
@ -252,7 +252,7 @@ public class YMarkTables {
final YMarkEntry bmk = new YMarkEntry(false); final YMarkEntry bmk = new YMarkEntry(false);
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url); bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.DATE_VISITED.key(), (new YMarkDate()).toString()); bmk.put(YMarkEntry.BOOKMARK.DATE_VISITED.key(), (new YMarkDate()).toString());
this.addBookmark(bmk_user, bmk, true, true); addBookmark(bmk_user, bmk, true, true);
} }
@ -272,7 +272,7 @@ public class YMarkTables {
HashSet<String> oldSet; HashSet<String> oldSet;
HashSet<String> newSet; HashSet<String> newSet;
for (YMarkEntry.BOOKMARK b : YMarkEntry.BOOKMARK.values()) { for (final YMarkEntry.BOOKMARK b : YMarkEntry.BOOKMARK.values()) {
switch(b) { switch(b) {
case DATE_ADDED: case DATE_ADDED:
if(!bmk_row.containsKey(b.key())) if(!bmk_row.containsKey(b.key()))

@ -1377,7 +1377,7 @@ public final class HTTPDFileHandler {
final Pattern p = Pattern.compile("(href=\"|src=\")([^\"]+)|(href='|src=')([^']+)|(url\\(')([^']+)|(url\\(\")([^\"]+)|(url\\()([^\\)]+)"); final Pattern p = Pattern.compile("(href=\"|src=\")([^\"]+)|(href='|src=')([^']+)|(url\\(')([^']+)|(url\\(\")([^\"]+)|(url\\()([^\\)]+)");
final Matcher m = p.matcher(sbuffer); final Matcher m = p.matcher(sbuffer);
final StringBuffer result = new StringBuffer(); final StringBuffer result = new StringBuffer(80);
while (m.find()) { while (m.find()) {
String init = null; String init = null;
if(m.group(1) != null) init = m.group(1); if(m.group(1) != null) init = m.group(1);

@ -37,12 +37,13 @@ import java.util.TreeSet;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Scanner; import net.yacy.cora.protocol.Scanner;
import net.yacy.cora.storage.ConcurrentScoreMap;
import net.yacy.cora.storage.ClusteredScoreMap; import net.yacy.cora.storage.ClusteredScoreMap;
import net.yacy.cora.storage.ConcurrentScoreMap;
import net.yacy.cora.storage.ScoreMap; import net.yacy.cora.storage.ScoreMap;
import net.yacy.cora.storage.WeakPriorityBlockingQueue; import net.yacy.cora.storage.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement;
@ -55,7 +56,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.TermSearch; import net.yacy.kelondro.rwi.TermSearch;
import net.yacy.kelondro.util.EventTracker; import net.yacy.kelondro.util.EventTracker;
import de.anomic.yacy.graphics.ProfilingGraph; import de.anomic.yacy.graphics.ProfilingGraph;
public final class RankingProcess extends Thread { public final class RankingProcess extends Thread {
@ -132,16 +132,16 @@ public final class RankingProcess extends Thread {
// sort the local containers and truncate it to a limited count, // sort the local containers and truncate it to a limited count,
// so following sortings together with the global results will be fast // so following sortings together with the global results will be fast
try { try {
long timer = System.currentTimeMillis(); final long timer = System.currentTimeMillis();
final TermSearch<WordReference> search = this.query.getSegment().termIndex().query( final TermSearch<WordReference> search = this.query.getSegment().termIndex().query(
query.queryHashes, this.query.queryHashes,
query.excludeHashes, this.query.excludeHashes,
null, null,
Segment.wordReferenceFactory, Segment.wordReferenceFactory,
query.maxDistance); this.query.maxDistance);
this.localSearchInclusion = search.inclusion(); this.localSearchInclusion = search.inclusion();
final ReferenceContainer<WordReference> index = search.joined(); final ReferenceContainer<WordReference> index = search.joined();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.JOIN, query.queryString, index.size(), System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(this.query.id(true), SearchEvent.Type.JOIN, this.query.queryString, index.size(), System.currentTimeMillis() - timer), false);
if (index.isEmpty()) { if (index.isEmpty()) {
return; return;
} }
@ -157,7 +157,7 @@ public final class RankingProcess extends Thread {
public void add( public void add(
final ReferenceContainer<WordReference> index, final ReferenceContainer<WordReference> index,
final boolean local, final boolean local,
String resourceName, final String resourceName,
final int fullResource, final int fullResource,
final boolean finalizeAddAtEnd) { final boolean finalizeAddAtEnd) {
// we collect the urlhashes and construct a list with urlEntry objects // we collect the urlhashes and construct a list with urlEntry objects
@ -180,11 +180,11 @@ public final class RankingProcess extends Thread {
// normalize entries // normalize entries
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index); final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.NORMALIZING, resourceName, index.size(), System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(this.query.id(true), SearchEvent.Type.NORMALIZING, resourceName, index.size(), System.currentTimeMillis() - timer), false);
// iterate over normalized entries and select some that are better than currently stored // iterate over normalized entries and select some that are better than currently stored
timer = System.currentTimeMillis(); timer = System.currentTimeMillis();
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0; final boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
// apply all constraints // apply all constraints
try { try {
@ -197,7 +197,7 @@ public final class RankingProcess extends Thread {
// increase flag counts // increase flag counts
for (int j = 0; j < 32; j++) { for (int j = 0; j < 32; j++) {
if (iEntry.flags().get(j)) {flagcount[j]++;} if (iEntry.flags().get(j)) {this.flagcount[j]++;}
} }
// check constraints // check constraints
@ -206,11 +206,11 @@ public final class RankingProcess extends Thread {
} }
// check document domain // check document domain
if (query.contentdom != ContentDomain.TEXT) { if (this.query.contentdom != ContentDomain.TEXT) {
if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) { continue; } if ((this.query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) { continue; }
if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) { continue; } if ((this.query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) { continue; }
if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) { continue; } if ((this.query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) { continue; }
if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) { continue; } if ((this.query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) { continue; }
} }
// check tld domain // check tld domain
@ -226,27 +226,27 @@ public final class RankingProcess extends Thread {
//this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++; //this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++;
// check site constraints // check site constraints
String hosthash = iEntry.hosthash(); final String hosthash = iEntry.hosthash();
if (query.sitehash == null) { if (this.query.sitehash == null) {
// no site constraint there; maybe collect host navigation information // no site constraint there; maybe collect host navigation information
if (nav_hosts && query.urlMask_isCatchall) { if (nav_hosts && this.query.urlMask_isCatchall) {
this.hostNavigator.inc(hosthash); this.hostNavigator.inc(hosthash);
this.hostResolver.put(hosthash, iEntry.urlhash()); this.hostResolver.put(hosthash, iEntry.urlhash());
} }
} else { } else {
if (!hosthash.equals(query.sitehash)) { if (!hosthash.equals(this.query.sitehash)) {
// filter out all domains that do not match with the site constraint // filter out all domains that do not match with the site constraint
continue; continue;
} }
} }
// finally make a double-check and insert result to stack // finally make a double-check and insert result to stack
if (urlhashes.add(iEntry.urlhash())) { if (this.urlhashes.add(iEntry.urlhash())) {
rankingtryloop: while (true) { rankingtryloop: while (true) {
try { try {
stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest) this.stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
break rankingtryloop; break rankingtryloop;
} catch (ArithmeticException e) { } catch (final ArithmeticException e) {
// this may happen if the concurrent normalizer changes values during cardinal computation // this may happen if the concurrent normalizer changes values during cardinal computation
continue rankingtryloop; continue rankingtryloop;
} }
@ -256,12 +256,12 @@ public final class RankingProcess extends Thread {
} }
} }
} catch (InterruptedException e) {} finally { } catch (final InterruptedException e) {} finally {
if (finalizeAddAtEnd) this.addRunning = false; if (finalizeAddAtEnd) this.addRunning = false;
} }
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true); //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), SearchEvent.Type.PRESORT, resourceName, index.size(), System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(this.query.id(true), SearchEvent.Type.PRESORT, resourceName, index.size(), System.currentTimeMillis() - timer), false);
} }
/** /**
@ -281,18 +281,18 @@ public final class RankingProcess extends Thread {
} }
private boolean testFlags(final WordReference ientry) { private boolean testFlags(final WordReference ientry) {
if (query.constraint == null) return true; if (this.query.constraint == null) return true;
// test if ientry matches with filter // test if ientry matches with filter
// if all = true: let only entries pass that has all matching bits // if all = true: let only entries pass that has all matching bits
// if all = false: let all entries pass that has at least one matching bit // if all = false: let all entries pass that has at least one matching bit
if (query.allofconstraint) { if (this.query.allofconstraint) {
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
if ((query.constraint.get(i)) && (!ientry.flags().get(i))) return false; if ((this.query.constraint.get(i)) && (!ientry.flags().get(i))) return false;
} }
return true; return true;
} }
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
if ((query.constraint.get(i)) && (ientry.flags().get(i))) return true; if ((this.query.constraint.get(i)) && (ientry.flags().get(i))) return true;
} }
return false; return false;
} }
@ -300,7 +300,7 @@ public final class RankingProcess extends Thread {
protected Map<byte[], ReferenceContainer<WordReference>> searchContainerMap() { protected Map<byte[], ReferenceContainer<WordReference>> searchContainerMap() {
// direct access to the result maps is needed for abstract generation // direct access to the result maps is needed for abstract generation
// this is only available if execQuery() was called before // this is only available if execQuery() was called before
return localSearchInclusion; return this.localSearchInclusion;
} }
private WeakPriorityBlockingQueue.Element<WordReferenceVars> takeRWI(final boolean skipDoubleDom, final long waitingtime) { private WeakPriorityBlockingQueue.Element<WordReferenceVars> takeRWI(final boolean skipDoubleDom, final long waitingtime) {
@ -313,14 +313,14 @@ public final class RankingProcess extends Thread {
try { try {
//System.out.println("stack.poll: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue()); //System.out.println("stack.poll: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
int loops = 0; // a loop counter to terminate the reading if all the results are from the same domain int loops = 0; // a loop counter to terminate the reading if all the results are from the same domain
long timeout = System.currentTimeMillis() + waitingtime; final long timeout = System.currentTimeMillis() + waitingtime;
while (((!feedingIsFinished() && this.addRunning) || stack.sizeQueue() > 0) && while (((!feedingIsFinished() && this.addRunning) || this.stack.sizeQueue() > 0) &&
(this.query.itemsPerPage < 1 || loops++ < this.query.itemsPerPage)) { (this.query.itemsPerPage < 1 || loops++ < this.query.itemsPerPage)) {
if (waitingtime <= 0) { if (waitingtime <= 0) {
rwi = stack.poll(); rwi = this.stack.poll();
} else timeoutloop:while (System.currentTimeMillis() < timeout) { } else timeoutloop:while (System.currentTimeMillis() < timeout) {
if (feedingIsFinished() && stack.sizeQueue() == 0) break timeoutloop; if (feedingIsFinished() && this.stack.sizeQueue() == 0) break timeoutloop;
rwi = stack.poll(50); rwi = this.stack.poll(50);
if (rwi != null) break timeoutloop; if (rwi != null) break timeoutloop;
} }
if (rwi == null) break; if (rwi == null) break;
@ -335,7 +335,7 @@ public final class RankingProcess extends Thread {
m = this.doubleDomCache.get(hosthash); m = this.doubleDomCache.get(hosthash);
if (m == null) { if (m == null) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned // first appearance of dom. we create an entry to signal that one of that domain was already returned
m = new WeakPriorityBlockingQueue<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); m = new WeakPriorityBlockingQueue<WordReferenceVars>((this.query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll);
this.doubleDomCache.put(hosthash, m); this.doubleDomCache.put(hosthash, m);
return rwi; return rwi;
} }
@ -343,7 +343,7 @@ public final class RankingProcess extends Thread {
m.put(rwi); m.put(rwi);
} }
} }
} catch (InterruptedException e1) {} } catch (final InterruptedException e1) {}
if (this.doubleDomCache.isEmpty()) return null; if (this.doubleDomCache.isEmpty()) return null;
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache // no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
@ -355,7 +355,7 @@ public final class RankingProcess extends Thread {
while (i.hasNext()) { while (i.hasNext()) {
try { try {
m = i.next(); m = i.next();
} catch (ConcurrentModificationException e) { } catch (final ConcurrentModificationException e) {
Log.logException(e); Log.logException(e);
continue; // not the best solution... continue; // not the best solution...
} }
@ -400,7 +400,7 @@ public final class RankingProcess extends Thread {
if (obrwi == null) return null; // all time was already wasted in takeRWI to get another element if (obrwi == null) return null; // all time was already wasted in takeRWI to get another element
final URIMetadataRow page = this.query.getSegment().urlMetadata().load(obrwi); final URIMetadataRow page = this.query.getSegment().urlMetadata().load(obrwi);
if (page == null) { if (page == null) {
misses.add(obrwi.getElement().urlhash()); this.misses.add(obrwi.getElement().urlhash());
continue; continue;
} }
@ -413,9 +413,9 @@ public final class RankingProcess extends Thread {
continue; // rare case where the url is corrupted continue; // rare case where the url is corrupted
} }
if (!query.urlMask_isCatchall) { if (!this.query.urlMask_isCatchall) {
// check url mask // check url mask
if (!metadata.matches(query.urlMask)) { if (!metadata.matches(this.query.urlMask)) {
this.sortout++; this.sortout++;
continue; continue;
} }
@ -439,18 +439,18 @@ public final class RankingProcess extends Thread {
final String pagetitle = metadata.dc_title().toLowerCase(); final String pagetitle = metadata.dc_title().toLowerCase();
// check exclusion // check exclusion
if ((QueryParams.anymatch(pagetitle, query.excludeHashes)) || if ((QueryParams.anymatch(pagetitle, this.query.excludeHashes)) ||
(QueryParams.anymatch(pageurl.toLowerCase(), query.excludeHashes)) || (QueryParams.anymatch(pageurl.toLowerCase(), this.query.excludeHashes)) ||
(QueryParams.anymatch(pageauthor.toLowerCase(), query.excludeHashes))) { (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.excludeHashes))) {
this.sortout++; this.sortout++;
continue; continue;
} }
// check index-of constraint // check index-of constraint
if ((query.constraint != null) && if ((this.query.constraint != null) &&
(query.constraint.get(Condenser.flag_cat_indexof)) && (this.query.constraint.get(Condenser.flag_cat_indexof)) &&
(!(pagetitle.startsWith("index of")))) { (!(pagetitle.startsWith("index of")))) {
final Iterator<byte[]> wi = query.queryHashes.iterator(); final Iterator<byte[]> wi = this.query.queryHashes.iterator();
while (wi.hasNext()) { while (wi.hasNext()) {
this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash()); this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash());
} }
@ -459,18 +459,18 @@ public final class RankingProcess extends Thread {
} }
// check location constraint // check location constraint
if ((query.constraint != null) && if ((this.query.constraint != null) &&
(query.constraint.get(Condenser.flag_cat_haslocation)) && (this.query.constraint.get(Condenser.flag_cat_haslocation)) &&
(metadata.lat() == 0.0f || metadata.lon() == 0.0f)) { (metadata.lat() == 0.0f || metadata.lon() == 0.0f)) {
this.sortout++; this.sortout++;
continue; continue;
} }
// check content domain // check content domain
if ((query.contentdom == ContentDomain.AUDIO && page.laudio() == 0) || if ((this.query.contentdom == ContentDomain.AUDIO && page.laudio() == 0) ||
(query.contentdom == ContentDomain.VIDEO && page.lvideo() == 0) || (this.query.contentdom == ContentDomain.VIDEO && page.lvideo() == 0) ||
(query.contentdom == ContentDomain.IMAGE && page.limage() == 0) || (this.query.contentdom == ContentDomain.IMAGE && page.limage() == 0) ||
(query.contentdom == ContentDomain.APP && page.lapp() == 0)) { (this.query.contentdom == ContentDomain.APP && page.lapp() == 0)) {
this.sortout++; this.sortout++;
continue; continue;
} }
@ -479,7 +479,7 @@ public final class RankingProcess extends Thread {
// author navigation: // author navigation:
if (pageauthor != null && pageauthor.length() > 0) { if (pageauthor != null && pageauthor.length() > 0) {
// add author to the author navigator // add author to the author navigator
String authorhash = ASCII.String(Word.word2hash(pageauthor)); final String authorhash = ASCII.String(Word.word2hash(pageauthor));
// check if we already are filtering for authors // check if we already are filtering for authors
if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) { if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) {
@ -518,31 +518,31 @@ public final class RankingProcess extends Thread {
} }
public int sizeQueue() { public int sizeQueue() {
int c = stack.sizeQueue(); int c = this.stack.sizeQueue();
for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) { for (final WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
c += s.sizeQueue(); c += s.sizeQueue();
} }
return c; return c;
} }
public int sizeAvailable() { public int sizeAvailable() {
int c = stack.sizeAvailable(); int c = this.stack.sizeAvailable();
for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) { for (final WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
c += s.sizeAvailable(); c += s.sizeAvailable();
} }
return c; return c;
} }
public boolean isEmpty() { public boolean isEmpty() {
if (!stack.isEmpty()) return false; if (!this.stack.isEmpty()) return false;
for (WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) { for (final WeakPriorityBlockingQueue<WordReferenceVars> s: this.doubleDomCache.values()) {
if (!s.isEmpty()) return false; if (!s.isEmpty()) return false;
} }
return true; return true;
} }
public int[] flagCount() { public int[] flagCount() {
return flagcount; return this.flagcount;
} }
// "results from a total number of <remote_resourceSize + local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize> remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers." // "results from a total number of <remote_resourceSize + local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize> remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers."
@ -591,7 +591,7 @@ public final class RankingProcess extends Thread {
} }
public ScoreMap<String> getHostNavigator() { public ScoreMap<String> getHostNavigator() {
ScoreMap<String> result = new ConcurrentScoreMap<String>(); final ScoreMap<String> result = new ConcurrentScoreMap<String>();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result; if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result;
final Iterator<String> domhashs = this.hostNavigator.keys(false); final Iterator<String> domhashs = this.hostNavigator.keys(false);
@ -613,14 +613,14 @@ public final class RankingProcess extends Thread {
} }
public static final Comparator<Map.Entry<String, Integer>> mecomp = new Comparator<Map.Entry<String, Integer>>() { public static final Comparator<Map.Entry<String, Integer>> mecomp = new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { public int compare(final Map.Entry<String, Integer> o1, final Map.Entry<String, Integer> o2) {
if (o1.getValue().intValue() < o2.getValue().intValue()) return 1; if (o1.getValue().intValue() < o2.getValue().intValue()) return 1;
if (o2.getValue().intValue() < o1.getValue().intValue()) return -1; if (o2.getValue().intValue() < o1.getValue().intValue()) return -1;
return 0; return 0;
} }
}; };
public ScoreMap<String> getTopicNavigator(int count) { public ScoreMap<String> getTopicNavigator(final int count) {
// create a list of words that had been computed by statistics over all // create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls // words that appeared in the url or the description of all urls
final ScoreMap<String> result = new ConcurrentScoreMap<String>(); final ScoreMap<String> result = new ConcurrentScoreMap<String>();
@ -645,23 +645,25 @@ public final class RankingProcess extends Thread {
counts.put(word, q); counts.put(word, q);
} }
} }
if (max > min) for (Map.Entry<String, Float> ce: counts.entrySet()) { if (max > min) for (final Map.Entry<String, Float> ce: counts.entrySet()) {
result.set(ce.getKey(), (int) (((double) count) * (ce.getValue() - min) / (max - min))); result.set(ce.getKey(), (int) (((double) count) * (ce.getValue() - min) / (max - min)));
} }
return this.ref; return this.ref;
} }
private final static Pattern lettermatch = Pattern.compile("[a-z]+");
public void addTopic(final String[] words) { public void addTopic(final String[] words) {
String word; String word;
for (final String w : words) { for (final String w : words) {
word = w.toLowerCase(); word = w.toLowerCase();
if (word.length() > 2 && if (word.length() > 2 &&
"http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off".indexOf(word) < 0 && "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off".indexOf(word) < 0 &&
!query.queryHashes.has(Word.word2hash(word)) && !this.query.queryHashes.has(Word.word2hash(word)) &&
word.matches("[a-z]+") && lettermatch.matcher(word).matches() &&
!Switchboard.badwords.contains(word) && !Switchboard.badwords.contains(word) &&
!Switchboard.stopwords.contains(word)) { !Switchboard.stopwords.contains(word)) {
ref.inc(word); this.ref.inc(word);
} }
} }
} }

@ -28,6 +28,7 @@ package de.anomic.yacy.dht;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
@ -41,11 +42,9 @@ import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.ByteArray; import net.yacy.kelondro.util.ByteArray;
import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.kelondro.workflow.WorkflowProcessor;
import de.anomic.search.Segment; import de.anomic.search.Segment;
import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
import java.util.List;
public class Dispatcher { public class Dispatcher {
@ -111,14 +110,14 @@ public class Dispatcher {
this.seeds = seeds; this.seeds = seeds;
this.log = new Log("INDEX-TRANSFER-DISPATCHER"); this.log = new Log("INDEX-TRANSFER-DISPATCHER");
this.transmission = new Transmission( this.transmission = new Transmission(
log, this.log,
segment, segment,
seeds, seeds,
gzipBody, gzipBody,
timeout); timeout);
int concurrentSender = Math.min(32, Math.max(10, WorkflowProcessor.availableCPU)); final int concurrentSender = Math.min(32, Math.max(10, WorkflowProcessor.availableCPU));
indexingTransmissionProcessor = new WorkflowProcessor<Transmission.Chunk>( this.indexingTransmissionProcessor = new WorkflowProcessor<Transmission.Chunk>(
"transferDocumentIndex", "transferDocumentIndex",
"This is the RWI transmission process", "This is the RWI transmission process",
new String[]{"RWI/Cache/Collections"}, new String[]{"RWI/Cache/Collections"},
@ -152,7 +151,7 @@ public class Dispatcher {
final int maxtime) throws IOException { final int maxtime) throws IOException {
// prefer file // prefer file
ArrayList<ReferenceContainer<WordReference>> containers = selectContainers(hash, limitHash, maxContainerCount, maxReferenceCount, maxtime, false); final ArrayList<ReferenceContainer<WordReference>> containers = selectContainers(hash, limitHash, maxContainerCount, maxReferenceCount, maxtime, false);
// if ram does not provide any result, take from file // if ram does not provide any result, take from file
//if (containers.isEmpty()) containers = selectContainers(hash, limitHash, maxContainerCount, maxtime, false); //if (containers.isEmpty()) containers = selectContainers(hash, limitHash, maxContainerCount, maxtime, false);
@ -193,12 +192,12 @@ public class Dispatcher {
final ArrayList<ReferenceContainer<WordReference>> rc; final ArrayList<ReferenceContainer<WordReference>> rc;
if (ram) { if (ram) {
// selection was only from ram, so we have to carefully remove only the selected entries // selection was only from ram, so we have to carefully remove only the selected entries
HandleSet urlHashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); final HandleSet urlHashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
Iterator<WordReference> it; Iterator<WordReference> it;
for (ReferenceContainer<WordReference> c: containers) { for (final ReferenceContainer<WordReference> c: containers) {
urlHashes.clear(); urlHashes.clear();
it = c.entries(); it = c.entries();
while (it.hasNext()) try { urlHashes.put(it.next().urlhash()); } catch (RowSpaceExceededException e) { Log.logException(e); } while (it.hasNext()) try { urlHashes.put(it.next().urlhash()); } catch (final RowSpaceExceededException e) { Log.logException(e); }
if (this.log.isFine()) this.log.logFine("selected " + urlHashes.size() + " urls for word '" + ASCII.String(c.getTermHash()) + "'"); if (this.log.isFine()) this.log.logFine("selected " + urlHashes.size() + " urls for word '" + ASCII.String(c.getTermHash()) + "'");
if (!urlHashes.isEmpty()) this.segment.termIndex().remove(c.getTermHash(), urlHashes); if (!urlHashes.isEmpty()) this.segment.termIndex().remove(c.getTermHash(), urlHashes);
} }
@ -207,7 +206,7 @@ public class Dispatcher {
// selection was from whole index, so we can just delete the whole container // selection was from whole index, so we can just delete the whole container
// but to avoid race conditions return the results from the deletes // but to avoid race conditions return the results from the deletes
rc = new ArrayList<ReferenceContainer<WordReference>>(containers.size()); rc = new ArrayList<ReferenceContainer<WordReference>>(containers.size());
for (ReferenceContainer<WordReference> c: containers) { for (final ReferenceContainer<WordReference> c: containers) {
container = this.segment.termIndex().delete(c.getTermHash()); // be aware this might be null! container = this.segment.termIndex().delete(c.getTermHash()); // be aware this might be null!
if (container != null && !container.isEmpty()) { if (container != null && !container.isEmpty()) {
if (this.log.isFine()) this.log.logFine("selected " + container.size() + " urls for word '" + ASCII.String(c.getTermHash()) + "'"); if (this.log.isFine()) this.log.logFine("selected " + container.size() + " urls for word '" + ASCII.String(c.getTermHash()) + "'");
@ -229,24 +228,24 @@ public class Dispatcher {
* @throws RowSpaceExceededException * @throws RowSpaceExceededException
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private List<ReferenceContainer<WordReference>>[] splitContainers(List<ReferenceContainer<WordReference>> containers) throws RowSpaceExceededException { private List<ReferenceContainer<WordReference>>[] splitContainers(final List<ReferenceContainer<WordReference>> containers) throws RowSpaceExceededException {
// init the result vector // init the result vector
int partitionCount = this.seeds.scheme.verticalPartitions(); final int partitionCount = this.seeds.scheme.verticalPartitions();
List<ReferenceContainer<WordReference>>[] partitions = (ArrayList<ReferenceContainer<WordReference>>[]) new ArrayList[partitionCount]; final List<ReferenceContainer<WordReference>>[] partitions = new ArrayList[partitionCount];
for (int i = 0; i < partitions.length; i++) partitions[i] = new ArrayList<ReferenceContainer<WordReference>>(); for (int i = 0; i < partitions.length; i++) partitions[i] = new ArrayList<ReferenceContainer<WordReference>>();
// check all entries and split them to the partitions // check all entries and split them to the partitions
ReferenceContainer<WordReference>[] partitionBuffer = new ReferenceContainer[partitionCount]; final ReferenceContainer<WordReference>[] partitionBuffer = new ReferenceContainer[partitionCount];
WordReference re; WordReference re;
for (ReferenceContainer<WordReference> container: containers) { for (final ReferenceContainer<WordReference> container: containers) {
// init the new partitions // init the new partitions
for (int j = 0; j < partitionBuffer.length; j++) { for (int j = 0; j < partitionBuffer.length; j++) {
partitionBuffer[j] = new ReferenceContainer<WordReference>(Segment.wordReferenceFactory, container.getTermHash(), container.size() / partitionCount); partitionBuffer[j] = new ReferenceContainer<WordReference>(Segment.wordReferenceFactory, container.getTermHash(), container.size() / partitionCount);
} }
// split the container // split the container
Iterator<WordReference> i = container.entries(); final Iterator<WordReference> i = container.entries();
while (i.hasNext()) { while (i.hasNext()) {
re = i.next(); re = i.next();
if (re == null) continue; if (re == null) continue;
@ -272,7 +271,7 @@ public class Dispatcher {
* then no additional IO is necessary. * then no additional IO is necessary.
*/ */
private void enqueueContainersToCloud(final List<ReferenceContainer<WordReference>>[] containers) { private void enqueueContainersToCloud(final List<ReferenceContainer<WordReference>>[] containers) {
if (transmissionCloud == null) return; if (this.transmissionCloud == null) return;
ReferenceContainer<WordReference> lastContainer; ReferenceContainer<WordReference> lastContainer;
byte[] primaryTarget; byte[] primaryTarget;
ByteArray pTArray; ByteArray pTArray;
@ -286,13 +285,13 @@ public class Dispatcher {
// get or make a entry object // get or make a entry object
entry = this.transmissionCloud.get(pTArray); // if this is not null, the entry is extended here entry = this.transmissionCloud.get(pTArray); // if this is not null, the entry is extended here
List<yacySeed> targets = PeerSelection.getAcceptRemoteIndexSeedsList( final List<yacySeed> targets = PeerSelection.getAcceptRemoteIndexSeedsList(
seeds, this.seeds,
primaryTarget, primaryTarget,
seeds.redundancy() * 3, this.seeds.redundancy() * 3,
true); true);
this.log.logInfo("enqueueContainers: selected " + targets.size() + " targets for primary target key " + ASCII.String(primaryTarget) + "/" + vertical + " with " + containers[vertical].size() + " index containers."); this.log.logInfo("enqueueContainers: selected " + targets.size() + " targets for primary target key " + ASCII.String(primaryTarget) + "/" + vertical + " with " + containers[vertical].size() + " index containers.");
if (entry == null) entry = transmission.newChunk(primaryTarget, targets); if (entry == null) entry = this.transmission.newChunk(primaryTarget, targets);
/*/ lookup targets /*/ lookup targets
int sc = 1; int sc = 1;
@ -305,10 +304,10 @@ public class Dispatcher {
}*/ }*/
// fill the entry with the containers // fill the entry with the containers
for (ReferenceContainer<WordReference> c: containers[vertical]) { for (final ReferenceContainer<WordReference> c: containers[vertical]) {
try { try {
entry.add(c); entry.add(c);
} catch (RowSpaceExceededException e) { } catch (final RowSpaceExceededException e) {
Log.logException(e); Log.logException(e);
break; break;
} }
@ -330,7 +329,7 @@ public class Dispatcher {
List<ReferenceContainer<WordReference>> selectedContainerCache; List<ReferenceContainer<WordReference>> selectedContainerCache;
try { try {
selectedContainerCache = selectContainers(hash, limitHash, maxContainerCount, maxReferenceCount, maxtime); selectedContainerCache = selectContainers(hash, limitHash, maxContainerCount, maxReferenceCount, maxtime);
} catch (IOException e) { } catch (final IOException e) {
this.log.logSevere("selectContainersEnqueueToCloud: selectedContainer failed", e); this.log.logSevere("selectContainersEnqueueToCloud: selectedContainer failed", e);
return false; return false;
} }
@ -344,7 +343,7 @@ public class Dispatcher {
List<ReferenceContainer<WordReference>>[] splitContainerCache; List<ReferenceContainer<WordReference>>[] splitContainerCache;
try { try {
splitContainerCache = splitContainers(selectedContainerCache); splitContainerCache = splitContainers(selectedContainerCache);
} catch (RowSpaceExceededException e) { } catch (final RowSpaceExceededException e) {
this.log.logSevere("selectContainersEnqueueToCloud: splitContainers failed because of too low RAM", e); this.log.logSevere("selectContainersEnqueueToCloud: splitContainers failed because of too low RAM", e);
return false; return false;
} }
@ -371,30 +370,37 @@ public class Dispatcher {
* This method returns true if a container was dequeued, false if not * This method returns true if a container was dequeued, false if not
*/ */
public boolean dequeueContainer() { public boolean dequeueContainer() {
if (transmissionCloud == null) return false; if (this.transmissionCloud == null) return false;
if (this.indexingTransmissionProcessor.queueSize() > indexingTransmissionProcessor.concurrency()) return false; if (this.indexingTransmissionProcessor.queueSize() > this.indexingTransmissionProcessor.concurrency()) return false;
ByteArray maxtarget = null; ByteArray maxtarget = null;
int maxsize = -1; int maxsize = -1;
for (Map.Entry<ByteArray, Transmission.Chunk> chunk: this.transmissionCloud.entrySet()) { for (final Map.Entry<ByteArray, Transmission.Chunk> chunk: this.transmissionCloud.entrySet()) {
if (chunk.getValue().containersSize() > maxsize) { if (chunk.getValue().containersSize() > maxsize) {
maxsize = chunk.getValue().containersSize(); maxsize = chunk.getValue().containersSize();
maxtarget = chunk.getKey(); maxtarget = chunk.getKey();
} }
} }
if (maxsize < 0) return false; if (maxsize < 0) return false;
Transmission.Chunk chunk = this.transmissionCloud.remove(maxtarget); final Transmission.Chunk chunk = this.transmissionCloud.remove(maxtarget);
try { try {
this.indexingTransmissionProcessor.enQueue(chunk); this.indexingTransmissionProcessor.enQueue(chunk);
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
} }
return true; return true;
} }
public Transmission.Chunk transferDocumentIndex(Transmission.Chunk chunk) { /**
* transfer job: this method is called using reflection from the switchboard
* the method is called as a Workflow process. That means it is always called whenever
* a job is placed in the workflow queue. This happens in dequeueContainer()
* @param chunk
* @return
*/
public Transmission.Chunk transferDocumentIndex(final Transmission.Chunk chunk) {
// do the transmission // do the transmission
boolean success = chunk.transmit(); final boolean success = chunk.transmit();
if (success && chunk.isFinished()) { if (success && chunk.isFinished()) {
// finished with this queue! // finished with this queue!
@ -407,7 +413,7 @@ public class Dispatcher {
if (chunk.canFinish()) { if (chunk.canFinish()) {
try { try {
if (this.indexingTransmissionProcessor != null) this.indexingTransmissionProcessor.enQueue(chunk); if (this.indexingTransmissionProcessor != null) this.indexingTransmissionProcessor.enQueue(chunk);
} catch (InterruptedException e) { } catch (final InterruptedException e) {
Log.logException(e); Log.logException(e);
return null; return null;
} }
@ -420,12 +426,12 @@ public class Dispatcher {
public void close() { public void close() {
// removes all entries from the dispatcher and puts them back to a RAMRI // removes all entries from the dispatcher and puts them back to a RAMRI
if (indexingTransmissionProcessor != null) this.indexingTransmissionProcessor.announceShutdown(); if (this.indexingTransmissionProcessor != null) this.indexingTransmissionProcessor.announceShutdown();
if (this.transmissionCloud != null) { if (this.transmissionCloud != null) {
outerLoop: for (Map.Entry<ByteArray, Transmission.Chunk> e : this.transmissionCloud.entrySet()) { outerLoop: for (final Map.Entry<ByteArray, Transmission.Chunk> e : this.transmissionCloud.entrySet()) {
for (ReferenceContainer<WordReference> i : e.getValue()) try { for (final ReferenceContainer<WordReference> i : e.getValue()) try {
this.segment.termIndex().add(i); this.segment.termIndex().add(i);
} catch (Exception e1) { } catch (final Exception e1) {
Log.logException(e1); Log.logException(e1);
break outerLoop; break outerLoop;
} }
@ -433,7 +439,7 @@ public class Dispatcher {
this.transmissionCloud.clear(); this.transmissionCloud.clear();
} }
this.transmissionCloud = null; this.transmissionCloud = null;
if (indexingTransmissionProcessor != null) { if (this.indexingTransmissionProcessor != null) {
this.indexingTransmissionProcessor.awaitShutdown(10000); this.indexingTransmissionProcessor.awaitShutdown(10000);
this.indexingTransmissionProcessor.clear(); this.indexingTransmissionProcessor.clear();
} }

@ -464,17 +464,17 @@ public class Table implements Index, Iterable<Row.Entry> {
final int i = (int) this.index.get(key); final int i = (int) this.index.get(key);
if (i == -1) return null; if (i == -1) return null;
final byte[] b = new byte[this.rowdef.objectsize]; final byte[] b = new byte[this.rowdef.objectsize];
if (this.table == null) { final Row.Entry cacherow;
if (this.table == null || (cacherow = this.table.get(i, false)) == null) {
// read row from the file // read row from the file
this.file.get(i, b, 0); this.file.get(i, b, 0);
} else { } else {
// construct the row using the copy in RAM // construct the row using the copy in RAM
final Row.Entry v = this.table.get(i, false); assert cacherow != null;
assert v != null; if (cacherow == null) return null;
if (v == null) return null;
assert key.length == this.rowdef.primaryKeyLength; assert key.length == this.rowdef.primaryKeyLength;
System.arraycopy(key, 0, b, 0, key.length); System.arraycopy(key, 0, b, 0, key.length);
System.arraycopy(v.bytes(), 0, b, this.rowdef.primaryKeyLength, this.rowdef.objectsize - this.rowdef.primaryKeyLength); System.arraycopy(cacherow.bytes(), 0, b, this.rowdef.primaryKeyLength, this.rowdef.objectsize - this.rowdef.primaryKeyLength);
} }
return this.rowdef.newEntry(b); return this.rowdef.newEntry(b);
} }
@ -503,7 +503,7 @@ public class Table implements Index, Iterable<Row.Entry> {
assert this.table == null || this.table.size() == this.index.size() : "table.size() = " + this.table.size() + ", index.size() = " + this.index.size(); assert this.table == null || this.table.size() == this.index.size() : "table.size() = " + this.table.size() + ", index.size() = " + this.index.size();
assert row != null; assert row != null;
assert row.bytes() != null; assert row.bytes() != null;
if ((row == null) || (row.bytes() == null)) return null; if (row == null || row.bytes() == null) return null;
final int i = (int) this.index.get(row.getPrimaryKeyBytes()); final int i = (int) this.index.get(row.getPrimaryKeyBytes());
if (i == -1) { if (i == -1) {
try { try {
@ -517,17 +517,17 @@ public class Table implements Index, Iterable<Row.Entry> {
} }
final byte[] b = new byte[this.rowdef.objectsize]; final byte[] b = new byte[this.rowdef.objectsize];
if (this.table == null) { Row.Entry cacherow;
if (this.table == null || (cacherow = this.table.get(i, false)) == null) {
// read old value // read old value
this.file.get(i, b, 0); this.file.get(i, b, 0);
// write new value // write new value
this.file.put(i, row.bytes(), 0); this.file.put(i, row.bytes(), 0);
} else { } else {
// read old value // read old value
final Row.Entry v = this.table.get(i, false); assert cacherow != null;
assert v != null;
System.arraycopy(row.getPrimaryKeyBytes(), 0, b, 0, this.rowdef.primaryKeyLength); System.arraycopy(row.getPrimaryKeyBytes(), 0, b, 0, this.rowdef.primaryKeyLength);
System.arraycopy(v.bytes(), 0, b, this.rowdef.primaryKeyLength, this.rowdef.objectsize - this.rowdef.primaryKeyLength); System.arraycopy(cacherow.bytes(), 0, b, this.rowdef.primaryKeyLength, this.rowdef.objectsize - this.rowdef.primaryKeyLength);
// write new value // write new value
try { try {
this.table.set(i, this.taildef.newEntry(row.bytes(), this.rowdef.primaryKeyLength, true)); this.table.set(i, this.taildef.newEntry(row.bytes(), this.rowdef.primaryKeyLength, true));
@ -573,13 +573,12 @@ public class Table implements Index, Iterable<Row.Entry> {
this.file.put(i, row.bytes(), 0); this.file.put(i, row.bytes(), 0);
} else { } else {
// write new value // write new value
try { this.file.put(i, row.bytes(), 0);
if (abandonTable()) this.table = null; else try {
this.table.set(i, this.taildef.newEntry(row.bytes(), this.rowdef.primaryKeyLength, true)); this.table.set(i, this.taildef.newEntry(row.bytes(), this.rowdef.primaryKeyLength, true));
} catch (final RowSpaceExceededException e) { } catch (final RowSpaceExceededException e) {
this.table = null; this.table = null;
} }
if (abandonTable()) this.table = null;
this.file.put(i, row.bytes(), 0);
} }
assert this.file.size() == this.index.size() : "file.size() = " + this.file.size() + ", index.size() = " + this.index.size(); assert this.file.size() == this.index.size() : "file.size() = " + this.file.size() + ", index.size() = " + this.index.size();
assert this.table == null || this.table.size() == this.index.size() : "table.size() = " + this.table.size() + ", index.size() = " + this.index.size(); assert this.table == null || this.table.size() == this.index.size() : "table.size() = " + this.table.size() + ", index.size() = " + this.index.size();
@ -665,7 +664,8 @@ public class Table implements Index, Iterable<Row.Entry> {
final int sb = this.index.size(); final int sb = this.index.size();
int ix; int ix;
assert i < this.index.size(); assert i < this.index.size();
if (this.table == null) { final Row.Entry cacherow;
if (this.table == null || (cacherow = this.table.get(i, false)) == null) {
if (i == this.index.size() - 1) { if (i == this.index.size() - 1) {
// element is at last entry position // element is at last entry position
ix = (int) this.index.remove(key); ix = (int) this.index.remove(key);
@ -697,9 +697,8 @@ public class Table implements Index, Iterable<Row.Entry> {
assert this.file.size() == this.index.size() : "file.size() = " + this.file.size() + ", index.size() = " + this.index.size(); assert this.file.size() == this.index.size() : "file.size() = " + this.file.size() + ", index.size() = " + this.index.size();
} else { } else {
// get result value from the table copy, so we don't need to read it from the file // get result value from the table copy, so we don't need to read it from the file
final Row.Entry v = this.table.get(i, false);
System.arraycopy(key, 0, b, 0, key.length); System.arraycopy(key, 0, b, 0, key.length);
System.arraycopy(v.bytes(), 0, b, this.rowdef.primaryKeyLength, this.taildef.objectsize); System.arraycopy(cacherow.bytes(), 0, b, this.rowdef.primaryKeyLength, this.taildef.objectsize);
if (i == this.index.size() - 1) { if (i == this.index.size() - 1) {
// special handling if the entry is the last entry in the file // special handling if the entry is the last entry in the file
@ -911,7 +910,8 @@ public class Table implements Index, Iterable<Row.Entry> {
this.c = (int) Table.this.index.get(k); this.c = (int) Table.this.index.get(k);
if (this.c < 0) throw new ConcurrentModificationException(); // this should only happen if the table was modified during the iteration if (this.c < 0) throw new ConcurrentModificationException(); // this should only happen if the table was modified during the iteration
final byte[] b = new byte[Table.this.rowdef.objectsize]; final byte[] b = new byte[Table.this.rowdef.objectsize];
if (Table.this.table == null) { final Row.Entry cacherow;
if (Table.this.table == null || (cacherow = Table.this.table.get(this.c, false)) == null) {
// read from file // read from file
try { try {
Table.this.file.get(this.c, b, 0); Table.this.file.get(this.c, b, 0);
@ -921,11 +921,10 @@ public class Table implements Index, Iterable<Row.Entry> {
} }
} else { } else {
// compose from table and key // compose from table and key
final Row.Entry v = Table.this.table.get(this.c, false); assert cacherow != null;
assert v != null; if (cacherow == null) return null;
if (v == null) return null;
System.arraycopy(k, 0, b, 0, Table.this.rowdef.primaryKeyLength); System.arraycopy(k, 0, b, 0, Table.this.rowdef.primaryKeyLength);
System.arraycopy(v.bytes(), 0, b, Table.this.rowdef.primaryKeyLength, Table.this.taildef.objectsize); System.arraycopy(cacherow.bytes(), 0, b, Table.this.rowdef.primaryKeyLength, Table.this.taildef.objectsize);
} }
return Table.this.rowdef.newEntry(b); return Table.this.rowdef.newEntry(b);
} }

Loading…
Cancel
Save