some performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6488 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 36fbfdcb21
commit 2d8f3ee301

@ -23,6 +23,8 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.ArrayList;
import net.yacy.kelondro.data.meta.DigestURI;
import de.anomic.crawler.ZURL;
import de.anomic.http.server.RequestHeader;
@ -71,22 +73,25 @@ public class IndexCreateParserErrors_p {
String initiatorHash, executorHash;
yacySeed initiatorSeed, executorSeed;
int j=0;
for (ZURL.Entry entry: sb.crawlQueues.errorURL) {
if (entry == null) continue;
url = entry.url();
if (url == null) continue;
initiatorHash = entry.initiator();
executorHash = entry.executor();
initiatorSeed = sb.peers.getConnected(initiatorHash);
executorSeed = sb.peers.getConnected(executorHash);
prop.putHTML("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
prop.putHTML("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false, true));
prop.putHTML("rejected_list_"+j+"_failreason", entry.anycause());
prop.put("rejected_list_"+j+"_dark", dark ? "1" : "0");
dark = !dark;
j++;
ArrayList<ZURL.Entry> l = sb.crawlQueues.errorURL.list(100);
ZURL.Entry entry;
for (int i = l.size() - 1; i >= 0; i--) {
entry = l.get(i);
if (entry == null) continue;
url = entry.url();
if (url == null) continue;
initiatorHash = entry.initiator();
executorHash = entry.executor();
initiatorSeed = sb.peers.getConnected(initiatorHash);
executorSeed = sb.peers.getConnected(executorHash);
prop.putHTML("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
prop.putHTML("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false, true));
prop.putHTML("rejected_list_"+j+"_failreason", entry.anycause());
prop.put("rejected_list_"+j+"_dark", dark ? "1" : "0");
dark = !dark;
j++;
}
prop.put("rejected_list", j);
}

@ -92,7 +92,7 @@ public final class transferRWI {
// response values
int pause = 0;
String result = "ok";
final StringBuilder unknownURLs = new StringBuilder();
final StringBuilder unknownURLs = new StringBuilder(6000);
if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash);

@ -109,7 +109,7 @@ public class RobotsEntry {
if ((allowPathList != null)&&(allowPathList.size()>0)) {
this.allowPathList.addAll(allowPathList);
final StringBuilder pathListStr = new StringBuilder();
final StringBuilder pathListStr = new StringBuilder(allowPathList.size() * 30);
for (int i=0; i<allowPathList.size();i++) {
pathListStr.append(allowPathList.get(i))
.append(ROBOTS_DB_PATH_SEPARATOR);
@ -120,7 +120,7 @@ public class RobotsEntry {
if ((disallowPathList != null)&&(disallowPathList.size()>0)) {
this.denyPathList.addAll(disallowPathList);
final StringBuilder pathListStr = new StringBuilder();
final StringBuilder pathListStr = new StringBuilder(disallowPathList.size() * 30);
for (int i=0; i<disallowPathList.size();i++) {
pathListStr.append(disallowPathList.get(i))
.append(ROBOTS_DB_PATH_SEPARATOR);
@ -130,7 +130,7 @@ public class RobotsEntry {
}
public String toString() {
final StringBuilder str = new StringBuilder();
final StringBuilder str = new StringBuilder(6000);
str.append((this.hostName==null)?"null":this.hostName)
.append(": ");

@ -28,6 +28,7 @@ package de.anomic.crawler;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.concurrent.ConcurrentLinkedQueue;
@ -61,8 +62,8 @@ public class ZURL implements Iterable<ZURL.Entry> {
);
// the class object
private final ObjectIndex urlIndex;
private final ConcurrentLinkedQueue<String> stack;
protected final ObjectIndex urlIndex;
protected final ConcurrentLinkedQueue<String> stack;
public ZURL(
final File cachePath,
@ -133,6 +134,15 @@ public class ZURL implements Iterable<ZURL.Entry> {
return new EntryIterator();
}
public ArrayList<ZURL.Entry> list(int max) {
ArrayList<ZURL.Entry> l = new ArrayList<ZURL.Entry>();
for (ZURL.Entry entry: this) {
l.add(entry);
if (max-- <= 0) l.remove(0);
}
return l;
}
private class EntryIterator implements Iterator<ZURL.Entry> {
private Iterator<String> hi;
public EntryIterator() {
@ -186,7 +196,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
private final String anycause; // string describing reason for load fail
private boolean stored;
private Entry(
protected Entry(
final Request bentry,
final String executor,
final Date workdate,
@ -203,7 +213,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
stored = false;
}
private Entry(final Row.Entry entry) throws IOException {
protected Entry(final Row.Entry entry) throws IOException {
assert (entry != null);
this.executor = entry.getColString(1, "UTF-8");
this.workdate = new Date(entry.getColLong(2));

@ -144,7 +144,7 @@ public final class HTTPLoader {
String supportError = TextParser.supports(request.url(), res.getResponseHeader().mime());
if (supportError != null) {
sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash, new Date(), 1, supportError);
throw new IOException("REJECTED WRONG MIME TYPE: " + supportError);
throw new IOException("REJECTED WRONG MIME TYPE, mime = " + res.getResponseHeader().mime() + ": " + supportError);
}
}

@ -595,10 +595,7 @@ public class bookmarksDB {
}
public Iterator<Tag> getTagIterator(final boolean priv, final int c) {
Comparator<Tag> comp;
if (c == SORT_SIZE) comp = new tagSizeComparator();
else comp = new tagComparator();
final TreeSet<Tag> set=new TreeSet<Tag>(comp);
final TreeSet<Tag> set=new TreeSet<Tag>((c == SORT_SIZE) ? tagSizeComparator : tagComparator);
final Iterator<Tag> it = tagIterator(true);
Tag tag;
while(it.hasNext()){
@ -614,10 +611,7 @@ public class bookmarksDB {
if (max==SHOW_ALL)
return getTagIterator(priv, comp);
final Iterator<Tag> it = getTagIterator(priv, SORT_SIZE);
Comparator<Tag> c;
if (comp == SORT_SIZE) c = new tagSizeComparator();
else c = new tagComparator();
final TreeSet<Tag> set=new TreeSet<Tag>(c);
final TreeSet<Tag> set=new TreeSet<Tag>((comp == SORT_SIZE) ? tagSizeComparator : tagComparator);
int count = 0;
while (it.hasNext() && count<=max) {
set.add(it.next());
@ -631,10 +625,7 @@ public class bookmarksDB {
}
*/
public Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp){
Comparator<Tag> c;
if (comp == SORT_SIZE) c = new tagSizeComparator();
else c = new tagComparator();
final TreeSet<Tag> set=new TreeSet<Tag>(c);
final TreeSet<Tag> set=new TreeSet<Tag>((comp == SORT_SIZE) ? tagSizeComparator : tagComparator);
Iterator<String> it=null;
final Iterator<String> bit=getBookmarksIterator(tagName, priv);
Bookmark bm;
@ -657,11 +648,8 @@ public class bookmarksDB {
public Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp, final int max){
if (max==SHOW_ALL)
return getTagIterator(priv, comp);
final Iterator<Tag> it = getTagIterator(tagName, priv, SORT_SIZE);
Comparator<Tag> c;
if (comp == SORT_SIZE) c = new tagSizeComparator();
else c = new tagComparator();
final TreeSet<Tag> set=new TreeSet<Tag>(c);
final Iterator<Tag> it = getTagIterator(tagName, priv, SORT_SIZE);
final TreeSet<Tag> set=new TreeSet<Tag>((comp == SORT_SIZE) ? tagSizeComparator : tagComparator);
int count = 0;
while (it.hasNext() && count<=max) {
set.add(it.next());
@ -1410,10 +1398,13 @@ public class bookmarksDB {
}
}
public static final TagComparator tagComparator = new TagComparator();
public static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
/**
* Comparator to sort objects of type Tag according to their names
*/
public static class tagComparator implements Comparator<Tag>, Serializable {
public static class TagComparator implements Comparator<Tag>, Serializable {
/**
* generated serial
@ -1426,7 +1417,7 @@ public class bookmarksDB {
}
public static class tagSizeComparator implements Comparator<Tag>, Serializable {
public static class TagSizeComparator implements Comparator<Tag>, Serializable {
/**
* generated serial

@ -87,7 +87,7 @@ public class LinkToken extends AbstractToken {
}
protected void parse() throws wikiParserException {
final StringBuilder stringBuilder = new StringBuilder();
final StringBuilder stringBuilder = new StringBuilder(6000);
if (this.patternNr < 0 || this.patternNr >= patterns.length) {
throw new wikiParserException("patternNr was not set correctly: " + this.patternNr);
@ -179,7 +179,7 @@ public class LinkToken extends AbstractToken {
@Override
public String toString() {
final StringBuilder stringBuilder = new StringBuilder();
final StringBuilder stringBuilder = new StringBuilder(300);
stringBuilder.append("<a href=\"").append(this.href).append("\"");
if (this.title != null) stringBuilder.append(" title=\"").append(this.title).append("\"");
stringBuilder.append(">");

@ -1194,7 +1194,7 @@ public final class HTTPDProxyHandler {
if (myAddress != null) {
// getting header set by other proxies in the chain
final StringBuilder viaValue = new StringBuilder();
final StringBuilder viaValue = new StringBuilder(80);
if (header.containsKey(HeaderFramework.VIA)) viaValue.append(header.get(HeaderFramework.VIA));
if (viaValue.length() > 0) viaValue.append(", ");

@ -406,7 +406,7 @@ public class HeaderFramework extends TreeMap<String, String> implements Map<Stri
final int httpStatusCode,
final String httpStatusText) {
// creating a new buffer to store the header as string
final StringBuilder theHeader = new StringBuilder();
final StringBuilder theHeader = new StringBuilder(180);
// generating the header string
this.toHeaderString(httpVersion,httpStatusCode,httpStatusText,theHeader);

@ -92,7 +92,7 @@ public final class RobotsTxtConfig {
@Override
public String toString() {
if (this.allDisallowed) return ALL;
final StringBuilder sb = new StringBuilder();
final StringBuilder sb = new StringBuilder(200);
if (this.blogDisallowed) sb.append(BLOG).append(",");
if (this.bookmarksDisallowed) sb.append(BOOKMARKS).append(",");
if (this.dirsDisallowed) sb.append(DIRS).append(",");

@ -58,6 +58,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
// all these tags must be given in lowercase, because the tags from the files are compared in lowercase
static {
linkTags0.add("html"); // scraped as tag 0 to get attached properties like 'lang'
linkTags0.add("img");
linkTags0.add("base");
linkTags0.add("frame");

@ -64,11 +64,10 @@ public class rssParser extends AbstractParser implements Idiom {
public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
static {
SUPPORTED_EXTENSIONS.add("xml");
SUPPORTED_EXTENSIONS.add("rss");
SUPPORTED_EXTENSIONS.add("rdf");
SUPPORTED_EXTENSIONS.add("xml");
SUPPORTED_MIME_TYPES.add("XML");
SUPPORTED_MIME_TYPES.add("text/rss");
SUPPORTED_MIME_TYPES.add("application/rdf+xml");
SUPPORTED_MIME_TYPES.add("application/rss+xml");
SUPPORTED_MIME_TYPES.add("application/atom+xml");
}

Loading…
Cancel
Save