several bugfixes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@71 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 52abc456fb
commit 995673d795

@ -34,12 +34,21 @@ globalheader();
<p>Publications about YaCy
<ul>
<li>Article in <a href="http://www.ccc.de/updates/2005/ds86">Datenschleuder #086</a>,
magazine of the german 'chaos computer association' (<b>Chaos Computer Club</b>, <b>CCC</b>), german:
<a href="http://www.yacy.net/yacy/material/YaCy-Datenschleuder086.pdf"><b>"YaCy -- Peer-to-Peer Web-Suchmaschine"</b></a></li>
<li>Cited as <b>world-wide unique proof-of-concept for a distributed p2p search engine</b>
in the German issue of the MIT's Magazine of Innovation
<a href="http://www.technologyreview.de"><b>Technology Review</b></a> in an
<a href="http://suma-ev.de/tech-rev1.html">article by Wolfgang Sander-Beuermann</a>;
<a href="http://www.heise.de/tr/inhalt/2005/02/">issue 02/2005</a>, page 29</li>
<li>Article in the german computer magazine <a href="http://www.heise.de/ct/"><b>c't</b></a>,
issue <a href="http://www.heise.de/ct/05/02/urls/">c't 2/2005</a>,
page 40 (german): "<b>Suchmaschine sucht Tauschpartner</b>"</li>
<li><a href="http://suma-ev.de/downloads/yacy-haz.pdf"><b>"Jedermanns Suchmaschine"</b> (german)</a>
- press release in the <a href="http://www.haz.de">Hannoversche Allgemeine Zeitung</a>; issue #289, Dec 9th 2004, page 23</li>
<li>News-Feed on <a href="http://www.golem.de/0502/36260.html">golem.de</a></li>
<li>Report and Interview on <a href="http://www.netzkritik.de/art/288.shtml">netzkritik.de</a></li>
<li>Cited as <b>world-wide unique proof-of-concept for a distributed p2p search engine</b>
in the German issue of the MIT's Magazine of Innovation <a href="http://www.technologyreview.de"><b>Technology Review</b></a> in an <a href="http://suma-ev.de/tech-rev1.html">article by Wolfgang Sander-Beuermann</a>; issue 02/2005, page 29</li>
<li>Article in the german computer magazine <a href="http://www.heise.de/ct/inhalt.shtml"><b>c't</b></a> 2/2005, page 40: "<b>Suchmaschine sucht Tauschpartner</b>"</li>
<li><a href="http://suma-ev.de/downloads/yacy-haz.pdf"><b>"Jedermanns Suchmaschine"</b> (german)</a> - press release in the <a href="http://www.haz.de">Hannoversche Allgemeine Zeitung</a>; issue #289, Dec 9th 2004, page 23</li>
</ul></p><br>
<p>Partner Sites

@ -39,8 +39,11 @@ globalheader();
<br><p>v0.37
<ul>
<li>Check on new peer names: must not occur already and may only contain letters, numbers and '_' or '-'.</li>
<li>New ThreadPool and performance enhancements from Martin Thelian</li>
<li>YaCy's source code is now hosted in a subversion version control system on berlios: <a href="http://developer.berlios.de/projects/yacy/">yacy@berlios.de</a></li>
<ul>
<li>Check on new peer names: must not occur already and may only contain letters, numbers and '_' or '-'.</li>
<li>New ThreadPool and performance enhancements from Martin Thelian</li>
</ul>
<li></li>
<li></li>
<li></li>

@ -45,7 +45,7 @@
# Contributions and changes to the program code must be marked as such.
# define variables
version='0.368'
version='0.3681'
datestr=`date +%Y%m%d`
#release='yacy_v'$version'_'$datestr
release='yacy_dev_v'$version'_'$datestr

@ -334,8 +334,7 @@ public class kelondroRecords {
protected void deleteNode(Handle handle) throws IOException {
if (cachesize != 0) {
Node n = (Node) cache.get(handle);
if (n != null) synchronized (cache) {
if (cache.get(handle) != null) synchronized (cache) {
cacheScore.deleteScore(handle);
cache.remove(handle);
}
@ -439,60 +438,68 @@ public class kelondroRecords {
if (this.handle.index == NUL) throw new kelondroException(filename, "the entry has no index assigned");
return new Handle(this.handle.index);
}
protected synchronized void setOHByte(byte[] b) throws IOException {
protected void setOHByte(byte[] b) throws IOException {
if (b == null) throw new IllegalArgumentException("setOHByte: setting null value does not make any sense");
if (b.length != OHBYTEC) throw new IllegalArgumentException("setOHByte: wrong array size");
if (this.handle.index == NUL) throw new kelondroException(filename, "setOHByte: no handle assigned");
if (this.ohBytes == null) this.ohBytes = new byte[OHBYTEC];
entryFile.seek(seekpos(this.handle));
for (int j = 0; j < ohBytes.length; j++) {
ohBytes[j] = b[j];
entryFile.writeByte(b[j]);
synchronized (entryFile) {
entryFile.seek(seekpos(this.handle));
for (int j = 0; j < ohBytes.length; j++) {
ohBytes[j] = b[j];
entryFile.writeByte(b[j]);
}
}
updateNode();
}
protected synchronized void setOHHandle(Handle[] i) throws IOException {
protected void setOHHandle(Handle[] i) throws IOException {
if (i == null) throw new IllegalArgumentException("setOHint: setting null value does not make any sense");
if (i.length != OHHANDLEC) throw new IllegalArgumentException("setOHHandle: wrong array size");
if (this.handle.index == NUL) throw new kelondroException(filename, "setOHHandle: no handle assigned");
if (this.ohHandle == null) this.ohHandle = new Handle[OHHANDLEC];
entryFile.seek(seekpos(this.handle) + OHBYTEC);
for (int j = 0; j < ohHandle.length; j++) {
ohHandle[j] = i[j];
if (i[j] == null)
entryFile.writeInt(NUL);
else
entryFile.writeInt(i[j].index);
synchronized (entryFile) {
entryFile.seek(seekpos(this.handle) + OHBYTEC);
for (int j = 0; j < ohHandle.length; j++) {
ohHandle[j] = i[j];
if (i[j] == null)
entryFile.writeInt(NUL);
else
entryFile.writeInt(i[j].index);
}
}
updateNode();
}
protected synchronized byte[] getOHByte() throws IOException {
protected byte[] getOHByte() throws IOException {
if (ohBytes == null) {
if (this.handle.index == NUL) throw new kelondroException(filename, "Cannot load OH values");
ohBytes = new byte[OHBYTEC];
entryFile.seek(seekpos(this.handle));
for (int j = 0; j < ohBytes.length; j++) {
ohBytes[j] = entryFile.readByte();
synchronized (entryFile) {
entryFile.seek(seekpos(this.handle));
for (int j = 0; j < ohBytes.length; j++) {
ohBytes[j] = entryFile.readByte();
}
}
updateNode();
}
return ohBytes;
}
protected synchronized Handle[] getOHHandle() throws IOException {
protected Handle[] getOHHandle() throws IOException {
if (ohHandle == null) {
if (this.handle.index == NUL) throw new kelondroException(filename, "Cannot load OH values");
ohHandle = new Handle[OHHANDLEC];
entryFile.seek(seekpos(this.handle) + OHBYTEC);
int i;
for (int j = 0; j < ohHandle.length; j++) {
i = entryFile.readInt();
ohHandle[j] = (i == NUL) ? null : new Handle(i);
synchronized (entryFile) {
entryFile.seek(seekpos(this.handle) + OHBYTEC);
int i;
for (int j = 0; j < ohHandle.length; j++) {
i = entryFile.readInt();
ohHandle[j] = (i == NUL) ? null : new Handle(i);
}
}
updateNode();
}
return ohHandle;
}
public synchronized byte[][] setValues(byte[][] row) throws IOException {
public byte[][] setValues(byte[][] row) throws IOException {
// if the index is defined, then write values directly to the file, else only to the object
byte[][] result = getValues(); // previous value (this loads the values if not already happened)
if (this.values == null) this.values = new byte[COLWIDTHS.length][];
@ -501,18 +508,22 @@ public class kelondroRecords {
}
if (this.handle.index != NUL) {
// store data directly to database
long seek = seekpos(this.handle) + overhead;
for (int i = 0; i < values.length; i++) {
entryFile.seek(seek);
if (values[i] == null) {
for (int j = 0; j < COLWIDTHS[i]; j++) entryFile.writeByte(0);
} else if (values[i].length >= COLWIDTHS[i]) {
entryFile.write(values[i], 0 , COLWIDTHS[i]);
} else {
entryFile.write(values[i]);
for (int j = values[i].length; j < COLWIDTHS[i]; j++) entryFile.writeByte(0);
synchronized (entryFile) {
long seek = seekpos(this.handle) + overhead;
for (int i = 0; i < values.length; i++) {
entryFile.seek(seek);
if (values[i] == null) {
for (int j = 0; j < COLWIDTHS[i]; j++)
entryFile.writeByte(0);
} else if (values[i].length >= COLWIDTHS[i]) {
entryFile.write(values[i], 0 , COLWIDTHS[i]);
} else {
entryFile.write(values[i]);
for (int j = values[i].length; j < COLWIDTHS[i]; j++)
entryFile.writeByte(0);
}
seek = seek + COLWIDTHS[i];
}
seek = seek + COLWIDTHS[i];
}
}
//System.out.print("setValues result: "); for (int i = 0; i < values.length; i++) System.out.print(new String(result[i]) + " "); System.out.println(".");
@ -520,16 +531,18 @@ public class kelondroRecords {
return result; // return previous value
}
public synchronized byte[] getKey() throws IOException {
public byte[] getKey() throws IOException {
if ((values == null) || (values[0] == null)) {
// load from database, but ONLY the key!
if (this.handle.index == NUL) {
throw new kelondroException(filename, "Cannot load Key");
} else {
values = new byte[COLWIDTHS.length][];
entryFile.seek(seekpos(this.handle) + overhead);
values[0] = new byte[COLWIDTHS[0]];
entryFile.read(values[0], 0, values[0].length);
synchronized (entryFile) {
entryFile.seek(seekpos(this.handle) + overhead);
entryFile.read(values[0], 0, values[0].length);
}
for (int i = 1; i < COLWIDTHS.length; i++) values[i] = null;
updateNode();
return values[0];
@ -539,31 +552,35 @@ public class kelondroRecords {
}
}
public synchronized byte[][] getValues() throws IOException {
public byte[][] getValues() throws IOException {
if ((values == null) || (values[0] == null)) {
// load ALL values from database
if (this.handle.index == NUL) {
throw new kelondroException(filename, "Cannot load values");
} else {
values = new byte[COLWIDTHS.length][];
long seek = seekpos(this.handle) + overhead;
for (int i = 0; i < COLWIDTHS.length; i++) {
entryFile.seek(seek);
values[i] = new byte[COLWIDTHS[i]];
entryFile.read(values[i], 0, values[i].length);
seek = seek + COLWIDTHS[i];
synchronized (entryFile) {
long seek = seekpos(this.handle) + overhead;
for (int i = 0; i < COLWIDTHS.length; i++) {
entryFile.seek(seek);
values[i] = new byte[COLWIDTHS[i]];
entryFile.read(values[i], 0, values[i].length);
seek = seek + COLWIDTHS[i];
}
}
updateNode();
return values;
}
} else if ((values.length > 1) && (values[1] == null)) {
// only the key has been read; load the remaining
long seek = seekpos(this.handle) + overhead + COLWIDTHS[0];
for (int i = 1; i < COLWIDTHS.length; i++) {
entryFile.seek(seek);
values[i] = new byte[COLWIDTHS[i]];
entryFile.read(values[i], 0, values[i].length);
seek = seek + COLWIDTHS[i];
synchronized (entryFile) {
long seek = seekpos(this.handle) + overhead + COLWIDTHS[0];
for (int i = 1; i < COLWIDTHS.length; i++) {
entryFile.seek(seek);
values[i] = new byte[COLWIDTHS[i]];
entryFile.read(values[i], 0, values[i].length);
seek = seek + COLWIDTHS[i];
}
}
updateNode();
return values;
@ -679,17 +696,17 @@ public class kelondroRecords {
}
// Removes all mappings from this map (optional operation).
public synchronized void clear() {
public void clear() {
throw new UnsupportedOperationException("clear not supported");
}
// Returns true if this map contains no key-value mappings.
public synchronized boolean isEmpty() {
public boolean isEmpty() {
return (USEDC == 0);
}
// Returns the number of key-value mappings in this map.
public synchronized int size() {
public int size() {
return this.USEDC;
}
@ -701,22 +718,24 @@ public class kelondroRecords {
// delete element with handle h
// this element is then connected to the deleted-chain and can be re-used
// change counter
USEDC--; entryFile.seek(POS_USEDC); entryFile.writeInt(USEDC);
FREEC++; entryFile.seek(POS_FREEC); entryFile.writeInt(FREEC);
// change pointer
if (this.FREEH.index == NUL) {
// the first entry
entryFile.seek(seekpos(h)); entryFile.writeInt(NUL); // write null link at end of free-list
} else {
// another entry
entryFile.seek(seekpos(h)); entryFile.writeInt(this.FREEH.index); // extend free-list
synchronized (entryFile) {
USEDC--; entryFile.seek(POS_USEDC); entryFile.writeInt(USEDC);
FREEC++; entryFile.seek(POS_FREEC); entryFile.writeInt(FREEC);
// change pointer
if (this.FREEH.index == NUL) {
// the first entry
entryFile.seek(seekpos(h)); entryFile.writeInt(NUL); // write null link at end of free-list
} else {
// another entry
entryFile.seek(seekpos(h)); entryFile.writeInt(this.FREEH.index); // extend free-list
}
// write new FREEH Handle link
this.FREEH = h;
entryFile.seek(POS_FREEH); entryFile.writeInt(this.FREEH.index);
}
// write new FREEH Handle link
this.FREEH = h;
entryFile.seek(POS_FREEH); entryFile.writeInt(this.FREEH.index);
}
public synchronized void close() throws IOException {
public void close() throws IOException {
if (this.entryFile != null) this.entryFile.close();
this.entryFile = null;
}
@ -758,7 +777,7 @@ public class kelondroRecords {
return x;
}
public synchronized void print(boolean records) {
public void print(boolean records) {
System.out.println("REPORT FOR FILE '" + this.filename + "':");
System.out.println("--");
System.out.println("CONTROL DATA");
@ -856,5 +875,4 @@ public class kelondroRecords {
}
}
}

@ -139,6 +139,7 @@ public class plasmaCondenser {
sievedWordsEnum wordenum = new sievedWordsEnum(is, wordminsize);
while (wordenum.hasMoreElements()) {
word = ((String) wordenum.nextElement()).toLowerCase();
//System.out.println("PARSED-WORD " + word);
wordlen = word.length();
if ((wordlen == 1) && (punctuation(word.charAt(0)))) {
// store sentence
@ -489,6 +490,7 @@ public class plasmaCondenser {
else s = s + r.charAt(i);
}
s = s.trim();
//System.out.println("PARSING-LINE '" + r + "'->'" + s + "'");
} else {
return null;
}

@ -432,7 +432,7 @@ public class plasmaCrawlLURL extends plasmaURL {
};
urlHashCache.put(entry);
} catch (Exception e) {
System.out.println("INTERNAL ERROR AT plasmaStore:url2hash:" + e.toString());
System.out.println("INTERNAL ERROR AT plasmaCrawlLURL:store:" + e.toString());
e.printStackTrace();
}
}

@ -295,7 +295,7 @@ public class plasmaCrawlNURL extends plasmaURL {
};
urlHashCache.put(entry);
} catch (IOException e) {
System.out.println("INTERNAL ERROR AT plasmaNURL:url2hash:" + e.toString());
System.out.println("INTERNAL ERROR AT plasmaNURL:store:" + e.toString());
} catch (kelondroException e) {
serverLog.logError("PLASMA", "plasmaCrawlNURL.store failed: " + e.getMessage());
}

@ -55,14 +55,19 @@ import de.anomic.server.serverFileUtils;
import de.anomic.htmlFilter.*;
public final class plasmaParser {
public static String mediaExt =
"swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," +
"sit,hqx,img,dmg,tar,gz,ps,xls,ppt,ram,bz2,arj";
private final Properties parserList;
private final plasmaParserPool theParserPool;
private final plasmaParserPool theParserPool;
public static HashSet mediaExtSet = new HashSet();
public static void initMediaExt(String mediaExtString) {
String[] xs = mediaExtString.split(",");
for (int i = 0; i < xs.length; i++) mediaExtSet.add(xs[i]);
}
static {
initMediaExt("swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," +
"sit,hqx,img,dmg,tar,gz,ps,xls,ppt,ram,bz2,arj");
}
public plasmaParser(File parserDispatcherPropertyFile) {

@ -166,7 +166,7 @@ public class plasmaParserDocument {
ext = url.substring(extpos).toLowerCase();
normal = plasmaParser.urlNormalform(url);
if (normal != null) {
if (plasmaParser.mediaExt.indexOf(ext.substring(1)) >= 0) {
if (plasmaParser.mediaExtSet.contains(ext.substring(1))) {
// this is not an normal anchor, its a media link
medialinks.put(normal, entry.getValue());
} else {
@ -198,4 +198,4 @@ public class plasmaParserDocument {
hyperlinks.putAll(plasmaParser.allSubpaths(medialinks));
}
}
}

@ -405,8 +405,11 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
public boolean deQueue() {
// work off fresh entries from the proxy or from the crawler
if (processStack.size() == 0) return false; // nothing to do
if (processStack.size() == 0) {
log.logDebug("DEQUEUE: queue is empty");
return false; // nothing to do
}
// in case that the server is very busy we do not work off the queue too fast
if (!(cacheManager.idle())) try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {}
@ -454,7 +457,10 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
}
public boolean localCrawlJob() {
if (noticeURL.localStackSize() == 0) return false;
if (noticeURL.localStackSize() == 0) {
log.logDebug("LocalCrawl: queue is empty");
return false;
}
if (processStack.size() >= crawlSlots) {
log.logDebug("LocalCrawl: too many processes in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
@ -484,7 +490,10 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
// do nothing if either there are private processes to be done
// or there is no global crawl on the stack
if (noticeURL.remoteStackSize() == 0) return false;
if (noticeURL.remoteStackSize() == 0) {
log.logDebug("GlobalCrawl: queue is empty");
return false;
}
if (processStack.size() > 0) {
log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
@ -505,7 +514,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
return true;
}
private synchronized void processResourceStack(plasmaHTCache.Entry entry) {
private void processResourceStack(plasmaHTCache.Entry entry) {
// work off one stack entry with a fresh resource (scraped web page)
try {
// we must distinguish the following cases: resource-load was initiated by
@ -796,7 +805,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
log.logInfo("LOCALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: enqueed for load " + urlEntry.url());
}
private synchronized boolean processGlobalCrawling(plasmaCrawlNURL.entry urlEntry) {
private boolean processGlobalCrawling(plasmaCrawlNURL.entry urlEntry) {
if (urlEntry == null) {
log.logInfo("GLOBALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: urlEntry=null");
return false;

@ -208,7 +208,7 @@ public final class yacy {
}
// init parser
de.anomic.plasma.plasmaParser.mediaExt = sb.getConfig("mediaExt","");
de.anomic.plasma.plasmaParser.initMediaExt(sb.getConfig("mediaExt",""));
// start main threads
try {

@ -374,13 +374,13 @@ xpstopw=true
30_peerping_busysleep=120000
40_peerseedcycle_idlesleep=1800000
40_peerseedcycle_busysleep=1200000
50_localcrawl_idlesleep=5000
50_localcrawl_idlesleep=15000
50_localcrawl_busysleep=0
60_globalcrawl_idlesleep=60000
60_globalcrawl_idlesleep=30000
60_globalcrawl_busysleep=3000
70_cachemanager_idlesleep=10000
70_cachemanager_busysleep=0
80_dequeue_idlesleep=4000
80_dequeue_idlesleep=10000
80_dequeue_busysleep=0
90_cleanup_idlesleep=300000
90_cleanup_busysleep=300000

Loading…
Cancel
Save