diff --git a/doc/Links.html b/doc/Links.html
index ed9ea054f..c9335f42b 100644
--- a/doc/Links.html
+++ b/doc/Links.html
@@ -34,12 +34,21 @@ globalheader();
Publications about YaCy
+- Article in Datenschleuder #086,
+ magazine of the german 'chaos computer association' (Chaos Computer Club, CCC), german:
+ "YaCy -- Peer-to-Peer Web-Suchmaschine"
+- Cited as world-wide unique proof-of-concept for a distributed p2p search engine
+ in the German issue of the MIT's Magazine of Innovation
+ Technology Review in an
+ article by Wolfgang Sander-Beuermann;
+ issue 02/2005, page 29
+- Article in the german computer magazine c't,
+ issue c't 2/2005,
+ page 40 (german): "Suchmaschine sucht Tauschpartner"
+- "Jedermanns Suchmaschine" (german)
+ - press release in the Hannoversche Allgemeine Zeitung; issue #289, Dec 9th 2004, page 23
- News-Feed on golem.de
- Report and Interview on netzkritik.de
-- Cited as world-wide unique proof-of-concept for a distributed p2p search engine
- in the German issue of the MIT's Magazine of Innovation Technology Review in an article by Wolfgang Sander-Beuermann; issue 02/2005, page 29
-- Article in the german computer magazine c't 2/2005, page 40: "Suchmaschine sucht Tauschpartner"
-- "Jedermanns Suchmaschine" (german) - press release in the Hannoversche Allgemeine Zeitung; issue #289, Dec 9th 2004, page 23
Partner Sites
diff --git a/doc/News.html b/doc/News.html
index 8bd73b959..3961c8615 100644
--- a/doc/News.html
+++ b/doc/News.html
@@ -39,8 +39,11 @@ globalheader();
v0.37
-- Check on new peer names: must not occur already and may only contain letters, numbers and '_' or '-'.
-- New ThreadPool and performance enhancements from Martin Thelian
+- YaCy's source code is now hosted in a subversion version control system on berlios: yacy@berlios.de
+
+ - Check on new peer names: must not occur already and may only contain letters, numbers and '_' or '-'.
+ - New ThreadPool and performance enhancements from Martin Thelian
+
diff --git a/makerelease.sh b/makerelease.sh
index 1081afd6e..b4d8ebb39 100755
--- a/makerelease.sh
+++ b/makerelease.sh
@@ -45,7 +45,7 @@
# Contributions and changes to the program code must be marked as such.
# define variables
-version='0.368'
+version='0.3681'
datestr=`date +%Y%m%d`
#release='yacy_v'$version'_'$datestr
release='yacy_dev_v'$version'_'$datestr
diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java
index 3999168a5..4321750a4 100644
--- a/source/de/anomic/kelondro/kelondroRecords.java
+++ b/source/de/anomic/kelondro/kelondroRecords.java
@@ -334,8 +334,7 @@ public class kelondroRecords {
protected void deleteNode(Handle handle) throws IOException {
if (cachesize != 0) {
- Node n = (Node) cache.get(handle);
- if (n != null) synchronized (cache) {
+ if (cache.get(handle) != null) synchronized (cache) {
cacheScore.deleteScore(handle);
cache.remove(handle);
}
@@ -439,60 +438,68 @@ public class kelondroRecords {
if (this.handle.index == NUL) throw new kelondroException(filename, "the entry has no index assigned");
return new Handle(this.handle.index);
}
- protected synchronized void setOHByte(byte[] b) throws IOException {
+ protected void setOHByte(byte[] b) throws IOException {
if (b == null) throw new IllegalArgumentException("setOHByte: setting null value does not make any sense");
if (b.length != OHBYTEC) throw new IllegalArgumentException("setOHByte: wrong array size");
if (this.handle.index == NUL) throw new kelondroException(filename, "setOHByte: no handle assigned");
if (this.ohBytes == null) this.ohBytes = new byte[OHBYTEC];
- entryFile.seek(seekpos(this.handle));
- for (int j = 0; j < ohBytes.length; j++) {
- ohBytes[j] = b[j];
- entryFile.writeByte(b[j]);
+ synchronized (entryFile) {
+ entryFile.seek(seekpos(this.handle));
+ for (int j = 0; j < ohBytes.length; j++) {
+ ohBytes[j] = b[j];
+ entryFile.writeByte(b[j]);
+ }
}
updateNode();
}
- protected synchronized void setOHHandle(Handle[] i) throws IOException {
+ protected void setOHHandle(Handle[] i) throws IOException {
if (i == null) throw new IllegalArgumentException("setOHint: setting null value does not make any sense");
if (i.length != OHHANDLEC) throw new IllegalArgumentException("setOHHandle: wrong array size");
if (this.handle.index == NUL) throw new kelondroException(filename, "setOHHandle: no handle assigned");
if (this.ohHandle == null) this.ohHandle = new Handle[OHHANDLEC];
- entryFile.seek(seekpos(this.handle) + OHBYTEC);
- for (int j = 0; j < ohHandle.length; j++) {
- ohHandle[j] = i[j];
- if (i[j] == null)
- entryFile.writeInt(NUL);
- else
- entryFile.writeInt(i[j].index);
+ synchronized (entryFile) {
+ entryFile.seek(seekpos(this.handle) + OHBYTEC);
+ for (int j = 0; j < ohHandle.length; j++) {
+ ohHandle[j] = i[j];
+ if (i[j] == null)
+ entryFile.writeInt(NUL);
+ else
+ entryFile.writeInt(i[j].index);
+ }
}
updateNode();
}
- protected synchronized byte[] getOHByte() throws IOException {
+ protected byte[] getOHByte() throws IOException {
if (ohBytes == null) {
if (this.handle.index == NUL) throw new kelondroException(filename, "Cannot load OH values");
ohBytes = new byte[OHBYTEC];
- entryFile.seek(seekpos(this.handle));
- for (int j = 0; j < ohBytes.length; j++) {
- ohBytes[j] = entryFile.readByte();
+ synchronized (entryFile) {
+ entryFile.seek(seekpos(this.handle));
+ for (int j = 0; j < ohBytes.length; j++) {
+ ohBytes[j] = entryFile.readByte();
+ }
}
updateNode();
}
return ohBytes;
}
- protected synchronized Handle[] getOHHandle() throws IOException {
+ protected Handle[] getOHHandle() throws IOException {
if (ohHandle == null) {
if (this.handle.index == NUL) throw new kelondroException(filename, "Cannot load OH values");
ohHandle = new Handle[OHHANDLEC];
- entryFile.seek(seekpos(this.handle) + OHBYTEC);
- int i;
- for (int j = 0; j < ohHandle.length; j++) {
- i = entryFile.readInt();
- ohHandle[j] = (i == NUL) ? null : new Handle(i);
+ synchronized (entryFile) {
+ entryFile.seek(seekpos(this.handle) + OHBYTEC);
+ int i;
+ for (int j = 0; j < ohHandle.length; j++) {
+ i = entryFile.readInt();
+ ohHandle[j] = (i == NUL) ? null : new Handle(i);
+ }
}
updateNode();
}
return ohHandle;
}
- public synchronized byte[][] setValues(byte[][] row) throws IOException {
+ public byte[][] setValues(byte[][] row) throws IOException {
// if the index is defined, then write values directly to the file, else only to the object
byte[][] result = getValues(); // previous value (this loads the values if not already happened)
if (this.values == null) this.values = new byte[COLWIDTHS.length][];
@@ -501,18 +508,22 @@ public class kelondroRecords {
}
if (this.handle.index != NUL) {
// store data directly to database
- long seek = seekpos(this.handle) + overhead;
- for (int i = 0; i < values.length; i++) {
- entryFile.seek(seek);
- if (values[i] == null) {
- for (int j = 0; j < COLWIDTHS[i]; j++) entryFile.writeByte(0);
- } else if (values[i].length >= COLWIDTHS[i]) {
- entryFile.write(values[i], 0 , COLWIDTHS[i]);
- } else {
- entryFile.write(values[i]);
- for (int j = values[i].length; j < COLWIDTHS[i]; j++) entryFile.writeByte(0);
+ synchronized (entryFile) {
+ long seek = seekpos(this.handle) + overhead;
+ for (int i = 0; i < values.length; i++) {
+ entryFile.seek(seek);
+ if (values[i] == null) {
+ for (int j = 0; j < COLWIDTHS[i]; j++)
+ entryFile.writeByte(0);
+ } else if (values[i].length >= COLWIDTHS[i]) {
+ entryFile.write(values[i], 0 , COLWIDTHS[i]);
+ } else {
+ entryFile.write(values[i]);
+ for (int j = values[i].length; j < COLWIDTHS[i]; j++)
+ entryFile.writeByte(0);
+ }
+ seek = seek + COLWIDTHS[i];
}
- seek = seek + COLWIDTHS[i];
}
}
//System.out.print("setValues result: "); for (int i = 0; i < values.length; i++) System.out.print(new String(result[i]) + " "); System.out.println(".");
@@ -520,16 +531,18 @@ public class kelondroRecords {
return result; // return previous value
}
- public synchronized byte[] getKey() throws IOException {
+ public byte[] getKey() throws IOException {
if ((values == null) || (values[0] == null)) {
// load from database, but ONLY the key!
if (this.handle.index == NUL) {
throw new kelondroException(filename, "Cannot load Key");
} else {
values = new byte[COLWIDTHS.length][];
- entryFile.seek(seekpos(this.handle) + overhead);
values[0] = new byte[COLWIDTHS[0]];
- entryFile.read(values[0], 0, values[0].length);
+ synchronized (entryFile) {
+ entryFile.seek(seekpos(this.handle) + overhead);
+ entryFile.read(values[0], 0, values[0].length);
+ }
for (int i = 1; i < COLWIDTHS.length; i++) values[i] = null;
updateNode();
return values[0];
@@ -539,31 +552,35 @@ public class kelondroRecords {
}
}
- public synchronized byte[][] getValues() throws IOException {
+ public byte[][] getValues() throws IOException {
if ((values == null) || (values[0] == null)) {
// load ALL values from database
if (this.handle.index == NUL) {
throw new kelondroException(filename, "Cannot load values");
} else {
values = new byte[COLWIDTHS.length][];
- long seek = seekpos(this.handle) + overhead;
- for (int i = 0; i < COLWIDTHS.length; i++) {
- entryFile.seek(seek);
- values[i] = new byte[COLWIDTHS[i]];
- entryFile.read(values[i], 0, values[i].length);
- seek = seek + COLWIDTHS[i];
+ synchronized (entryFile) {
+ long seek = seekpos(this.handle) + overhead;
+ for (int i = 0; i < COLWIDTHS.length; i++) {
+ entryFile.seek(seek);
+ values[i] = new byte[COLWIDTHS[i]];
+ entryFile.read(values[i], 0, values[i].length);
+ seek = seek + COLWIDTHS[i];
+ }
}
updateNode();
return values;
}
} else if ((values.length > 1) && (values[1] == null)) {
// only the key has been read; load the remaining
- long seek = seekpos(this.handle) + overhead + COLWIDTHS[0];
- for (int i = 1; i < COLWIDTHS.length; i++) {
- entryFile.seek(seek);
- values[i] = new byte[COLWIDTHS[i]];
- entryFile.read(values[i], 0, values[i].length);
- seek = seek + COLWIDTHS[i];
+ synchronized (entryFile) {
+ long seek = seekpos(this.handle) + overhead + COLWIDTHS[0];
+ for (int i = 1; i < COLWIDTHS.length; i++) {
+ entryFile.seek(seek);
+ values[i] = new byte[COLWIDTHS[i]];
+ entryFile.read(values[i], 0, values[i].length);
+ seek = seek + COLWIDTHS[i];
+ }
}
updateNode();
return values;
@@ -679,17 +696,17 @@ public class kelondroRecords {
}
// Removes all mappings from this map (optional operation).
- public synchronized void clear() {
+ public void clear() {
throw new UnsupportedOperationException("clear not supported");
}
// Returns true if this map contains no key-value mappings.
- public synchronized boolean isEmpty() {
+ public boolean isEmpty() {
return (USEDC == 0);
}
// Returns the number of key-value mappings in this map.
- public synchronized int size() {
+ public int size() {
return this.USEDC;
}
@@ -701,22 +718,24 @@ public class kelondroRecords {
// delete element with handle h
// this element is then connected to the deleted-chain and can be re-used
// change counter
- USEDC--; entryFile.seek(POS_USEDC); entryFile.writeInt(USEDC);
- FREEC++; entryFile.seek(POS_FREEC); entryFile.writeInt(FREEC);
- // change pointer
- if (this.FREEH.index == NUL) {
- // the first entry
- entryFile.seek(seekpos(h)); entryFile.writeInt(NUL); // write null link at end of free-list
- } else {
- // another entry
- entryFile.seek(seekpos(h)); entryFile.writeInt(this.FREEH.index); // extend free-list
+ synchronized (entryFile) {
+ USEDC--; entryFile.seek(POS_USEDC); entryFile.writeInt(USEDC);
+ FREEC++; entryFile.seek(POS_FREEC); entryFile.writeInt(FREEC);
+ // change pointer
+ if (this.FREEH.index == NUL) {
+ // the first entry
+ entryFile.seek(seekpos(h)); entryFile.writeInt(NUL); // write null link at end of free-list
+ } else {
+ // another entry
+ entryFile.seek(seekpos(h)); entryFile.writeInt(this.FREEH.index); // extend free-list
+ }
+ // write new FREEH Handle link
+ this.FREEH = h;
+ entryFile.seek(POS_FREEH); entryFile.writeInt(this.FREEH.index);
}
- // write new FREEH Handle link
- this.FREEH = h;
- entryFile.seek(POS_FREEH); entryFile.writeInt(this.FREEH.index);
}
- public synchronized void close() throws IOException {
+ public void close() throws IOException {
if (this.entryFile != null) this.entryFile.close();
this.entryFile = null;
}
@@ -758,7 +777,7 @@ public class kelondroRecords {
return x;
}
- public synchronized void print(boolean records) {
+ public void print(boolean records) {
System.out.println("REPORT FOR FILE '" + this.filename + "':");
System.out.println("--");
System.out.println("CONTROL DATA");
@@ -856,5 +875,4 @@ public class kelondroRecords {
}
}
-
}
diff --git a/source/de/anomic/plasma/plasmaCondenser.java b/source/de/anomic/plasma/plasmaCondenser.java
index 382f7bd76..823868128 100644
--- a/source/de/anomic/plasma/plasmaCondenser.java
+++ b/source/de/anomic/plasma/plasmaCondenser.java
@@ -139,6 +139,7 @@ public class plasmaCondenser {
sievedWordsEnum wordenum = new sievedWordsEnum(is, wordminsize);
while (wordenum.hasMoreElements()) {
word = ((String) wordenum.nextElement()).toLowerCase();
+ //System.out.println("PARSED-WORD " + word);
wordlen = word.length();
if ((wordlen == 1) && (punctuation(word.charAt(0)))) {
// store sentence
@@ -489,6 +490,7 @@ public class plasmaCondenser {
else s = s + r.charAt(i);
}
s = s.trim();
+ //System.out.println("PARSING-LINE '" + r + "'->'" + s + "'");
} else {
return null;
}
diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java
index 69e7dc13d..6c76599df 100644
--- a/source/de/anomic/plasma/plasmaCrawlLURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlLURL.java
@@ -432,7 +432,7 @@ public class plasmaCrawlLURL extends plasmaURL {
};
urlHashCache.put(entry);
} catch (Exception e) {
- System.out.println("INTERNAL ERROR AT plasmaStore:url2hash:" + e.toString());
+ System.out.println("INTERNAL ERROR AT plasmaCrawlLURL:store:" + e.toString());
e.printStackTrace();
}
}
diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java
index ab6b51e96..638735925 100644
--- a/source/de/anomic/plasma/plasmaCrawlNURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlNURL.java
@@ -295,7 +295,7 @@ public class plasmaCrawlNURL extends plasmaURL {
};
urlHashCache.put(entry);
} catch (IOException e) {
- System.out.println("INTERNAL ERROR AT plasmaNURL:url2hash:" + e.toString());
+ System.out.println("INTERNAL ERROR AT plasmaNURL:store:" + e.toString());
} catch (kelondroException e) {
serverLog.logError("PLASMA", "plasmaCrawlNURL.store failed: " + e.getMessage());
}
diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java
index a0c2ad416..d59a77ee9 100644
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@@ -55,14 +55,19 @@ import de.anomic.server.serverFileUtils;
import de.anomic.htmlFilter.*;
public final class plasmaParser {
-
- public static String mediaExt =
- "swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," +
- "sit,hqx,img,dmg,tar,gz,ps,xls,ppt,ram,bz2,arj";
-
+
private final Properties parserList;
+ private final plasmaParserPool theParserPool;
- private final plasmaParserPool theParserPool;
+ public static HashSet mediaExtSet = new HashSet();
+ public static void initMediaExt(String mediaExtString) {
+ String[] xs = mediaExtString.split(",");
+ for (int i = 0; i < xs.length; i++) mediaExtSet.add(xs[i]);
+ }
+ static {
+ initMediaExt("swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," +
+ "sit,hqx,img,dmg,tar,gz,ps,xls,ppt,ram,bz2,arj");
+ }
public plasmaParser(File parserDispatcherPropertyFile) {
diff --git a/source/de/anomic/plasma/plasmaParserDocument.java b/source/de/anomic/plasma/plasmaParserDocument.java
index 17156c03f..26692f0ee 100644
--- a/source/de/anomic/plasma/plasmaParserDocument.java
+++ b/source/de/anomic/plasma/plasmaParserDocument.java
@@ -166,7 +166,7 @@ public class plasmaParserDocument {
ext = url.substring(extpos).toLowerCase();
normal = plasmaParser.urlNormalform(url);
if (normal != null) {
- if (plasmaParser.mediaExt.indexOf(ext.substring(1)) >= 0) {
+ if (plasmaParser.mediaExtSet.contains(ext.substring(1))) {
// this is not an normal anchor, its a media link
medialinks.put(normal, entry.getValue());
} else {
@@ -198,4 +198,4 @@ public class plasmaParserDocument {
hyperlinks.putAll(plasmaParser.allSubpaths(medialinks));
}
-}
\ No newline at end of file
+}
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 702e376c5..5684f7952 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -405,8 +405,11 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
public boolean deQueue() {
// work off fresh entries from the proxy or from the crawler
- if (processStack.size() == 0) return false; // nothing to do
-
+ if (processStack.size() == 0) {
+ log.logDebug("DEQUEUE: queue is empty");
+ return false; // nothing to do
+ }
+
// in case that the server is very busy we do not work off the queue too fast
if (!(cacheManager.idle())) try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {}
@@ -454,7 +457,10 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
}
public boolean localCrawlJob() {
- if (noticeURL.localStackSize() == 0) return false;
+ if (noticeURL.localStackSize() == 0) {
+ log.logDebug("LocalCrawl: queue is empty");
+ return false;
+ }
if (processStack.size() >= crawlSlots) {
log.logDebug("LocalCrawl: too many processes in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
@@ -484,7 +490,10 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
// do nothing if either there are private processes to be done
// or there is no global crawl on the stack
- if (noticeURL.remoteStackSize() == 0) return false;
+ if (noticeURL.remoteStackSize() == 0) {
+ log.logDebug("GlobalCrawl: queue is empty");
+ return false;
+ }
if (processStack.size() > 0) {
log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
@@ -505,7 +514,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
return true;
}
- private synchronized void processResourceStack(plasmaHTCache.Entry entry) {
+ private void processResourceStack(plasmaHTCache.Entry entry) {
// work off one stack entry with a fresh resource (scraped web page)
try {
// we must distinguish the following cases: resource-load was initiated by
@@ -796,7 +805,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
log.logInfo("LOCALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: enqueed for load " + urlEntry.url());
}
- private synchronized boolean processGlobalCrawling(plasmaCrawlNURL.entry urlEntry) {
+ private boolean processGlobalCrawling(plasmaCrawlNURL.entry urlEntry) {
if (urlEntry == null) {
log.logInfo("GLOBALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: urlEntry=null");
return false;
diff --git a/source/yacy.java b/source/yacy.java
index 66fb40c6a..97c995d50 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -208,7 +208,7 @@ public final class yacy {
}
// init parser
- de.anomic.plasma.plasmaParser.mediaExt = sb.getConfig("mediaExt","");
+ de.anomic.plasma.plasmaParser.initMediaExt(sb.getConfig("mediaExt",""));
// start main threads
try {
diff --git a/yacy.init b/yacy.init
index 0f30feecc..8605f8ae7 100644
--- a/yacy.init
+++ b/yacy.init
@@ -374,13 +374,13 @@ xpstopw=true
30_peerping_busysleep=120000
40_peerseedcycle_idlesleep=1800000
40_peerseedcycle_busysleep=1200000
-50_localcrawl_idlesleep=5000
+50_localcrawl_idlesleep=15000
50_localcrawl_busysleep=0
-60_globalcrawl_idlesleep=60000
+60_globalcrawl_idlesleep=30000
60_globalcrawl_busysleep=3000
70_cachemanager_idlesleep=10000
70_cachemanager_busysleep=0
-80_dequeue_idlesleep=4000
+80_dequeue_idlesleep=10000
80_dequeue_busysleep=0
90_cleanup_idlesleep=300000
90_cleanup_busysleep=300000