fixed 100%-CPU-Bug in plasmaCondenser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@72 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 995673d795
commit 48650c082c

@ -39,6 +39,8 @@
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
// compile with javac -sourcepath source source/de/anomic/plasma/plasmaCondenser.java
// execute with java -cp source de.anomic.plasma.plasmaCondenser
package de.anomic.plasma;
@ -446,7 +448,6 @@ public class plasmaCondenser {
c = r.charAt(i);
if (!(((c >= 'a') && (c <= 'z')) ||
((c >= '0') && (c <= '9')))) continue loop; // go to next while loop
//if ((c < 'a') || (c > 'z')) continue loop; // go to next while loop
}
return s;
}
@ -480,16 +481,21 @@ public class plasmaCondenser {
private Object nextElement0() {
String r;
StringBuffer sb;
char c;
while (s.length() == 0) {
if (e.hasMoreElements()) {
r = ((String) e.nextElement()).trim();
s = "";
r = (String) e.nextElement();
if (r == null) return null;
r = r.trim();
sb = new StringBuffer(r.length() * 2);
for (int i = 0; i < r.length(); i++) {
if (invisible(r.charAt(i))) s = s + " ";
else if (punctuation(r.charAt(i))) s = s + " " + r.charAt(i) + " ";
else s = s + r.charAt(i);
c = r.charAt(i);
if (invisible(c)) sb = sb.append(' ');
else if (punctuation(c)) sb = sb.append(' ').append(c).append(' ');
else sb = sb.append(c);
}
s = s.trim();
s = sb.toString().trim();
//System.out.println("PARSING-LINE '" + r + "'->'" + s + "'");
} else {
return null;
@ -636,5 +642,4 @@ public class plasmaCondenser {
}
}
}

@ -39,6 +39,8 @@
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
// compile: javac -classpath lib/commons-collections.jar:lib/commons-pool-1.2.jar -sourcepath source source/de/anomic/plasma/plasmaParser.java
package de.anomic.plasma;
@ -307,17 +309,23 @@ public final class plasmaParser {
}
public static void main(String[] args) {
try {
plasmaParser theParser = new plasmaParser(new File("yacy.parser"));
FileInputStream theInput = new FileInputStream(new File("Y:/public_html/test.pdf"));
ByteArrayOutputStream theOutput = new ByteArrayOutputStream();
//javac -classpath lib/commons-collections.jar:lib/commons-pool-1.2.jar -sourcepath source source/de/anomic/plasma/plasmaParser.java
//java -cp source:lib/commons-collections.jar:lib/commons-pool-1.2.jar de.anomic.plasma.plasmaParser bug.html bug.out
try {
File in = new File(args[0]);
File out = new File(args[1]);
plasmaParser theParser = new plasmaParser(new File("yacy.parser"));
FileInputStream theInput = new FileInputStream(in);
ByteArrayOutputStream theOutput = new ByteArrayOutputStream();
serverFileUtils.copy(theInput, theOutput);
theParser.parseSource(new URL("http://brain"),"application/pdf",theOutput.toByteArray());
plasmaParserDocument document = theParser.parseSource(new URL("http://brain.yacy"), "text/html", theOutput.toByteArray());
//plasmaParserDocument document = theParser.parseSource(new URL("http://brain.yacy"), "application/pdf", theOutput.toByteArray());
byte[] theText = document.getText();
serverFileUtils.write(theText, out);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}

@ -406,7 +406,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
// work off fresh entries from the proxy or from the crawler
if (processStack.size() == 0) {
log.logDebug("DEQUEUE: queue is empty");
//log.logDebug("DEQUEUE: queue is empty");
return false; // nothing to do
}
@ -458,7 +458,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
public boolean localCrawlJob() {
if (noticeURL.localStackSize() == 0) {
log.logDebug("LocalCrawl: queue is empty");
//log.logDebug("LocalCrawl: queue is empty");
return false;
}
if (processStack.size() >= crawlSlots) {
@ -491,7 +491,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
// do nothing if either there are private processes to be done
// or there is no global crawl on the stack
if (noticeURL.remoteStackSize() == 0) {
log.logDebug("GlobalCrawl: queue is empty");
//log.logDebug("GlobalCrawl: queue is empty");
return false;
}
if (processStack.size() > 0) {

Loading…
Cancel
Save