- performance hacks (should affect the crawl balancer and reduce CPU load during crawl stack re-fill)

- this may have also (good) performance side effects on other parts of YaCy


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7982 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 9c131adeb6
commit 035ebfbf3b

@ -45,7 +45,6 @@ import net.yacy.peers.yacyNewsPool;
import net.yacy.peers.yacySeed;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
@ -128,14 +127,14 @@ public class Supporter {
row = Supporter.get(urlhash);
if (row == null) continue;
url = row.getColString(0);
url = row.getPrimaryKeyUTF8();
try {
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue;
} catch(final MalformedURLException e) {continue;}
title = row.getColString(1);
description = row.getColString(2);
title = row.getColUTF8(1);
description = row.getColUTF8(2);
if ((url == null) || (title == null) || (description == null)) continue;
refid = row.getColString(3);
refid = row.getColUTF8(3);
voted = (sb.peers.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null) ||
(sb.peers.newsPool.getSpecific(yacyNewsPool.PUBLISHED_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null);
prop.put("supporter_results_" + i + "_authorized", authenticated ? "1" : "0");

@ -44,7 +44,6 @@ import net.yacy.peers.yacyNewsPool;
import net.yacy.peers.yacySeed;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
@ -135,15 +134,15 @@ public class Surftips {
row = surftips.get(urlhash);
if (row == null) continue;
url = row.getColString(0);
url = row.getPrimaryKeyUTF8();
try{
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url)))
continue;
}catch(final MalformedURLException e){continue;};
title = row.getColString(1);
description = row.getColString(2);
title = row.getColUTF8(1);
description = row.getColUTF8(2);
if ((url == null) || (title == null) || (description == null)) continue;
refid = row.getColString(3);
refid = row.getColUTF8(3);
voted = (sb.peers.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null) ||
(sb.peers.newsPool.getSpecific(yacyNewsPool.PUBLISHED_DB, yacyNewsPool.CATEGORY_SURFTIPP_VOTE_ADD, "refid", refid) != null);
prop.put("surftips_results_" + i + "_authorized", (authenticated) ? "1" : "0");

@ -278,7 +278,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
this.executor = entry.getColBytes(1, true);
this.workdate = new Date(entry.getColLong(2));
this.workcount = (int) entry.getColLong(3);
this.anycause = entry.getColString(4);
this.anycause = entry.getColUTF8(4);
this.bentry = new Request(Request.rowdef.newEntry(entry.getColBytes(5, false)));
assert (Base64Order.enhancedCoder.equal(entry.getPrimaryKeyBytes(), this.bentry.url().hash()));
this.stored = true;
@ -337,7 +337,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
try {
return new Entry(e);
} catch (final IOException ex) {
throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getColString(0));
throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getPrimaryKeyASCII());
}
}

@ -136,15 +136,15 @@ public class Request extends WorkflowJob {
}
private void insertEntry(final Row.Entry entry) throws IOException {
final String urlstring = entry.getColString(2);
final String urlstring = entry.getColUTF8(2);
if (urlstring == null) throw new IOException ("url string is null");
this.initiator = entry.getColBytes(1, true);
this.initiator = (initiator == null) ? null : ((initiator.length == 0) ? null : initiator);
this.initiator = (this.initiator == null) ? null : ((this.initiator.length == 0) ? null : this.initiator);
this.url = new DigestURI(urlstring, entry.getPrimaryKeyBytes());
this.refhash = (entry.empty(3)) ? null : entry.getColBytes(3, true);
this.name = (entry.empty(4)) ? "" : entry.getColString(4).trim();
this.name = (entry.empty(4)) ? "" : entry.getColUTF8(4).trim();
this.appdate = entry.getColLong(5);
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6).trim();
this.profileHandle = (entry.empty(6)) ? null : entry.getColASCII(6).trim();
this.depth = (int) entry.getColLong(7);
this.anchors = (int) entry.getColLong(8);
this.forkfactor = (int) entry.getColLong(9);
@ -153,7 +153,7 @@ public class Request extends WorkflowJob {
//this.lastmodified = entry.getColLong(13);
this.size = entry.getColLong(14);
this.statusMessage = "loaded(kelondroRow.Entry)";
this.initialHash = url.hashCode();
this.initialHash = this.url.hashCode();
return;
}
@ -162,7 +162,7 @@ public class Request extends WorkflowJob {
return this.initialHash;
}
public void setStatus(final String s, int code) {
public void setStatus(final String s, final int code) {
//System.out.println("***DEBUG*** crawler status " + s + ", " + code + " for " + this.url.toNormalform(true, false));
this.statusMessage = s;
this.status = code;
@ -173,15 +173,15 @@ public class Request extends WorkflowJob {
}
public Row.Entry toRow() {
final byte[] appdatestr = NaturalOrder.encodeLong(appdate, rowdef.width(5));
final byte[] appdatestr = NaturalOrder.encodeLong(this.appdate, rowdef.width(5));
final byte[] loaddatestr = NaturalOrder.encodeLong(0 /*loaddate*/, rowdef.width(12));
final byte[] serverdatestr = NaturalOrder.encodeLong(0 /*lastmodified*/, rowdef.width(13));
final byte[] sizestr = NaturalOrder.encodeLong(this.size, rowdef.width(14));
// store the hash in the hash cache
byte[] namebytes = UTF8.getBytes(this.name);
final byte[] namebytes = UTF8.getBytes(this.name);
final byte[][] entry = new byte[][] {
this.url.hash(),
initiator,
this.initiator,
this.url.toString().getBytes(),
this.refhash,
namebytes,
@ -200,7 +200,7 @@ public class Request extends WorkflowJob {
public DigestURI url() {
// the url
return url;
return this.url;
}
public void redirectURL(final DigestURI redirectedURL) {
@ -215,7 +215,7 @@ public class Request extends WorkflowJob {
public byte[] initiator() {
// returns the hash of the initiating peer
return initiator;
return this.initiator;
}
public boolean proxy() {
@ -255,7 +255,7 @@ public class Request extends WorkflowJob {
public String profileHandle() {
// the handle of the crawl profile
assert profileHandle.length() == Word.commonHashLength : profileHandle + " != " + Word.commonHashLength;
assert this.profileHandle.length() == Word.commonHashLength : this.profileHandle + " != " + Word.commonHashLength;
return this.profileHandle;
}

@ -402,7 +402,7 @@ public class dbtest {
Row.Entry row;
while (i.hasNext()) {
row = i.next();
for (int j = 0; j < row.columns(); j++) System.out.print(row.getColString(j) + ",");
for (int j = 0; j < row.columns(); j++) System.out.print(row.getColUTF8(j) + ",");
System.out.println();
}
}

@ -106,12 +106,12 @@ public final class NavigationReferenceRow extends AbstractReference implements N
@Override
public NavigationReferenceRow clone() {
final byte[] b = new byte[navEntryRow.objectsize];
System.arraycopy(entry.bytes(), 0, b, 0, navEntryRow.objectsize);
System.arraycopy(this.entry.bytes(), 0, b, 0, navEntryRow.objectsize);
return new NavigationReferenceRow(b);
}
public String toPropertyForm() {
return entry.toPropertyForm('=', true, true, false, false);
return this.entry.toPropertyForm('=', true, true, false, false);
}
public Entry toKelondroEntry() {
@ -119,7 +119,7 @@ public final class NavigationReferenceRow extends AbstractReference implements N
}
public String navigationHash() {
return this.entry.getColString(col_navhash);
return this.entry.getColASCII(col_navhash);
}
public byte[] urlhash() {
@ -150,7 +150,7 @@ public final class NavigationReferenceRow extends AbstractReference implements N
@Override
public int hashCode() {
return this.navigationHash().hashCode();
return navigationHash().hashCode();
}
@Override
@ -158,8 +158,8 @@ public final class NavigationReferenceRow extends AbstractReference implements N
if (this == obj) return true;
if (obj == null) return false;
if (!(obj instanceof NavigationReferenceRow)) return false;
NavigationReferenceRow other = (NavigationReferenceRow) obj;
return this.navigationHash().equals(other.navigationHash());
final NavigationReferenceRow other = (NavigationReferenceRow) obj;
return navigationHash().equals(other.navigationHash());
}
public boolean isOlder(final Reference other) {

@ -36,6 +36,7 @@ import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.ranking.AbstractOrder;
import net.yacy.cora.ranking.Order;
@ -475,15 +476,57 @@ public final class Row {
throw new kelondroException("ROW", "addCol did not find appropriate encoding");
}
public final String getColString(final int column) {
public final byte[] getPrimaryKeyBytes() {
if (this.rowinstance[this.offset] == 0) return null;
if (Row.this.row.length == 1 && this.offset == 0 && this.rowinstance.length == Row.this.primaryKeyLength) {
// avoid memory allocation in case that the row consists in only the primary key
return this.rowinstance;
}
final byte[] c = new byte[Row.this.primaryKeyLength];
System.arraycopy(this.rowinstance, this.offset, c, 0, Row.this.primaryKeyLength);
return c;
}
public final String getPrimaryKeyUTF8() {
if (this.rowinstance[this.offset] == 0) return null;
if (Row.this.row.length == 1 && this.offset == 0 && this.rowinstance.length == Row.this.primaryKeyLength) {
// avoid memory allocation in case that the row consists in only the primary key
return UTF8.String(this.rowinstance);
}
return UTF8.String(this.rowinstance, this.offset, Row.this.primaryKeyLength);
}
public final String getPrimaryKeyASCII() {
if (this.rowinstance[this.offset] == 0) return null;
if (Row.this.row.length == 1 && this.offset == 0 && this.rowinstance.length == Row.this.primaryKeyLength) {
// avoid memory allocation in case that the row consists in only the primary key
return ASCII.String(this.rowinstance);
}
return ASCII.String(this.rowinstance, this.offset, Row.this.primaryKeyLength);
}
public final String getColUTF8(final int column) {
final int clstrt = Row.this.colstart[column];
int length = Row.this.row[column].cellwidth;
if (this.rowinstance[this.offset + clstrt] == 0) return null;
final int length = getColLength(column, clstrt);
if (length == 0) return null;
return UTF8.String(this.rowinstance, this.offset + clstrt, length);
}
public final String getColASCII(final int column) {
final int clstrt = Row.this.colstart[column];
if (this.rowinstance[this.offset + clstrt] == 0) return null;
final int length = getColLength(column, clstrt);
if (length == 0) return null;
return ASCII.String(this.rowinstance, this.offset + clstrt, length);
}
private final int getColLength(final int column, final int clstrt) {
int length = Row.this.row[column].cellwidth;
assert length <= this.rowinstance.length - this.offset - clstrt;
if (length > this.rowinstance.length - this.offset - clstrt) length = this.rowinstance.length - this.offset - clstrt;
while ((length > 0) && (this.rowinstance[this.offset + clstrt + length - 1] == 0)) length--;
if (length == 0) return null;
return UTF8.String(this.rowinstance, this.offset + clstrt, length);
return length;
}
public final long getColLong(final int column) {
@ -517,16 +560,6 @@ public final class Row {
return this.rowinstance[this.offset + Row.this.colstart[column]];
}
public final byte[] getPrimaryKeyBytes() {
if (Row.this.columns() == 1 && this.offset == 0 && this.rowinstance.length == Row.this.primaryKeyLength) {
// avoid memory allocation in case that the row consists in only the primary key
return this.rowinstance;
}
final byte[] c = new byte[Row.this.primaryKeyLength];
System.arraycopy(this.rowinstance, this.offset, c, 0, Row.this.primaryKeyLength);
return c;
}
public final int getPrimaryKeyLength() {
return Row.this.primaryKeyLength;
}

@ -105,7 +105,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
this.chunkcount = chunkcachelength / rowdef.objectsize; // patch problem
}
this.lastTimeWrote = (exportedCollection.getColLong(exp_last_wrote) + 10957) * day;
final String sortOrderKey = exportedCollection.getColString(exp_order_type);
final String sortOrderKey = exportedCollection.getColASCII(exp_order_type);
ByteOrder oldOrder = null;
if ((sortOrderKey == null) || (sortOrderKey.equals("__"))) {
oldOrder = null;

@ -583,7 +583,7 @@ public class RowSet extends RowCollection implements Index, Iterable<Row.Entry>
Row.Entry entry;
while (ii.hasNext()) {
entry = ii.next();
s = ASCII.String(entry.getPrimaryKeyBytes()).trim();
s = entry.getPrimaryKeyASCII().trim();
System.out.print(s + ", ");
if (s.equals("drei")) ii.remove();
}

@ -219,7 +219,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
final PreparedStatement sqlStatement = this.theDBConnection.prepareStatement(sqlQuery);
sqlStatement.setString(1, row.getColString(0));
sqlStatement.setString(1, row.getPrimaryKeyASCII());
sqlStatement.setBytes(2, row.bytes());
sqlStatement.execute();
@ -240,7 +240,7 @@ public class SQLTable implements Index, Iterable<Row.Entry> {
final PreparedStatement sqlStatement = this.theDBConnection.prepareStatement(sqlQuery);
sqlStatement.setString(1, row.getColString(0));
sqlStatement.setString(1, row.getPrimaryKeyASCII());
sqlStatement.setBytes(2, row.bytes());
sqlStatement.execute();

@ -181,7 +181,7 @@ public final class FileUtils {
* @see #copy(File source, File dest)
*/
public static void copy(final InputStream source, final File dest, final long count) throws IOException {
String path = dest.getParent();
final String path = dest.getParent();
if (path != null && path.length() > 0) new File(path).mkdirs();
FileOutputStream fos = null;
try {
@ -274,11 +274,11 @@ public final class FileUtils {
public static byte[] read(final InputStream source, final int count) throws IOException {
if (count > 0) {
byte[] b = new byte[count];
int c = source.read(b, 0, count);
final byte[] b = new byte[count];
final int c = source.read(b, 0, count);
assert c == count: "count = " + count + ", c = " + c;
if (c != count) {
byte[] bb = new byte[c];
final byte[] bb = new byte[c];
System.arraycopy(b, 0, bb, 0, c);
return bb;
}
@ -470,8 +470,8 @@ public final class FileUtils {
os = zos;
}
if(os != null) {
for (final Iterator<byte[]> i = set.iterator(); i.hasNext(); ) {
os.write(i.next());
for (final byte[] b : set) {
os.write(b);
if (sep != null) os.write(UTF8.getBytes(sep));
}
os.close();
@ -495,23 +495,20 @@ public final class FileUtils {
}
if (os != null) {
final Iterator<Row.Entry> i = set.iterator();
String key;
if (i.hasNext()) {
key = UTF8.String(i.next().getPrimaryKeyBytes());
os.write(UTF8.getBytes(key));
os.write(i.next().getPrimaryKeyBytes());
}
while (i.hasNext()) {
key = UTF8.String(i.next().getPrimaryKeyBytes());
if (sep != null) os.write(UTF8.getBytes(sep));
os.write(UTF8.getBytes(key));
os.write(i.next().getPrimaryKeyBytes());
}
os.close();
}
forceMove(tf, file);
}
public static ConcurrentHashMap<String, String> table(Reader r) {
BufferedReader br = new BufferedReader(r);
public static ConcurrentHashMap<String, String> table(final Reader r) {
final BufferedReader br = new BufferedReader(r);
return table(new StringsIterator(br));
}
@ -519,7 +516,7 @@ public final class FileUtils {
//private final static Pattern escaped_equal = Pattern.compile("\\=");
//private final static Pattern escaped_newline = Pattern.compile("\\n");
//private final static Pattern escaped_backslash = Pattern.compile("\\");
public static ConcurrentHashMap<String, String> table(Iterator<String> li) {
public static ConcurrentHashMap<String, String> table(final Iterator<String> li) {
String line;
final ConcurrentHashMap<String, String> props = new ConcurrentHashMap<String, String>();
while (li.hasNext()) {
@ -532,8 +529,8 @@ public final class FileUtils {
} while ( pos > 0 && line.charAt(pos-1) == '\\');
if (pos > 0) {
//String key = escaped_equal.matcher(line.substring(0, pos).trim()).replaceAll("=");
String key = line.substring(0, pos).trim().replace("\\=", "=").replace("\\n", "\n").replace("\\", "\\");
String value = line.substring(pos + 1).trim().replace("\\n", "\n").replace("\\\\", "\\");
final String key = line.substring(0, pos).trim().replace("\\=", "=").replace("\\n", "\n").replace("\\", "\\");
final String value = line.substring(pos + 1).trim().replace("\\n", "\n").replace("\\\\", "\\");
props.put(key, value);
}
}
@ -544,11 +541,11 @@ public final class FileUtils {
return table(strings(a));
}
public static Iterator<String> strings(byte[] a) {
public static Iterator<String> strings(final byte[] a) {
if (a == null) return new ArrayList<String>().iterator();
try {
return new StringsIterator(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(a), "UTF-8")));
} catch (UnsupportedEncodingException e) {
} catch (final UnsupportedEncodingException e) {
return null;
}
}
@ -682,7 +679,7 @@ public final class FileUtils {
* @return array of file names
*/
public static List<String> getDirListing(final File dir, final String filter){
List<String> ret = new LinkedList<String>();
final List<String> ret = new LinkedList<String>();
File[] fileList;
if (dir != null ) {
if (!dir.exists()) {
@ -738,8 +735,8 @@ public final class FileUtils {
*/
public static boolean writeList(final File listFile, final String[] list){
final StringBuilder out = new StringBuilder(list.length * 40 + 1);
for(int i=0;i < list.length; i++){
out.append(list[i]).append(CR).append(LF);
for (final String element : list) {
out.append(element).append(CR).append(LF);
}
return FileUtils.writeList(listFile, new String(out)); //(File, String)
}
@ -753,21 +750,21 @@ public final class FileUtils {
next();
}
public boolean hasNext() {
return nextLine != null;
return this.nextLine != null;
}
public String next() {
String line = nextLine;
final String line = this.nextLine;
try {
while ((nextLine = reader.readLine()) != null) {
nextLine = nextLine.trim();
if (nextLine.length() > 0) break;
while ((this.nextLine = this.reader.readLine()) != null) {
this.nextLine = this.nextLine.trim();
if (this.nextLine.length() > 0) break;
}
} catch (IOException e) {
nextLine = null;
} catch (OutOfMemoryError e) {
} catch (final IOException e) {
this.nextLine = null;
} catch (final OutOfMemoryError e) {
Log.logException(e);
nextLine = null;
this.nextLine = null;
}
return line;
}
@ -825,7 +822,7 @@ public final class FileUtils {
}
}
public static final File createTempFile(Class<?> classObj, final String name) throws IOException {
public static final File createTempFile(final Class<?> classObj, final String name) throws IOException {
String parserClassName = classObj.getName();
int idx = parserClassName.lastIndexOf('.');
if (idx != -1) {
@ -943,7 +940,7 @@ public final class FileUtils {
if (path.isDirectory()) {
final String[] list = path.list();
if (list != null) {
for (String s: list) deletedelete(new File(path, s));
for (final String s: list) deletedelete(new File(path, s));
}
}
@ -954,28 +951,28 @@ public final class FileUtils {
// some OS may be slow when giving up file pointer
//System.runFinalization();
//System.gc();
try { Thread.sleep(200); } catch (InterruptedException e) { break; }
try { Thread.sleep(200); } catch (final InterruptedException e) { break; }
}
if (path.exists()) {
path.deleteOnExit();
String p = "";
try {
p = path.getCanonicalPath();
} catch (IOException e1) {
} catch (final IOException e1) {
Log.logException(e1);
}
if (System.getProperties().getProperty("os.name","").toLowerCase().startsWith("windows")) {
// deleting files on windows sometimes does not work with java
try {
String command = "cmd /C del /F /Q \"" + p + "\"";
Process r = Runtime.getRuntime().exec(command);
final String command = "cmd /C del /F /Q \"" + p + "\"";
final Process r = Runtime.getRuntime().exec(command);
if (r == null) {
Log.logSevere("FileUtils", "cannot execute command: " + command);
} else {
byte[] response = read(r.getInputStream());
final byte[] response = read(r.getInputStream());
Log.logInfo("FileUtils", "deletedelete: " + UTF8.String(response));
}
} catch (IOException e) {
} catch (final IOException e) {
Log.logException(e);
}
}

@ -164,11 +164,11 @@ public class yacyNewsDB {
private Record b2r(final Row.Entry b) {
if (b == null) return null;
return new yacyNewsDB.Record(
b.getColString(0),
b.getColString(1),
(b.empty(2)) ? null : my_SHORT_SECOND_FORMATTER.parse(b.getColString(2), GenericFormatter.UTCDiffString()),
b.getPrimaryKeyASCII(),
b.getColUTF8(1),
(b.empty(2)) ? null : my_SHORT_SECOND_FORMATTER.parse(b.getColASCII(2), GenericFormatter.UTCDiffString()),
(int) b.getColLong(3),
MapTools.string2map(b.getColString(4), ",")
MapTools.string2map(b.getColUTF8(4), ",")
);
}

@ -159,7 +159,7 @@ public class yacyNewsQueue {
yacyNewsDB.Record b2r(final Row.Entry b) throws IOException {
if (b == null) return null;
final String id = b.getColString(0);
final String id = b.getPrimaryKeyASCII();
//Date touched = yacyCore.parseUniversalDate(UTF8.String(b[1]));
return this.newsDB.get(id);
}

@ -302,7 +302,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
final Row.Entry entry = this.urlIndexFile.get(urlHashBytes, true);
// getting the wrong url string
oldUrlStr = entry.getColString(1).trim();
oldUrlStr = entry.getColUTF8(1).trim();
int pos = -1;
if ((pos = oldUrlStr.indexOf("://")) != -1) {

Loading…
Cancel
Save