fixed 100%-CPU-Bug in plasmaCondenser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@72 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 995673d795
commit 48650c082c

@ -39,6 +39,8 @@
// the intact and unchanged copyright notice. // the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such. // Contributions and changes to the program code must be marked as such.
// compile with javac -sourcepath source source/de/anomic/plasma/plasmaCondenser.java
// execute with java -cp source de.anomic.plasma.plasmaCondenser
package de.anomic.plasma; package de.anomic.plasma;
@ -446,7 +448,6 @@ public class plasmaCondenser {
c = r.charAt(i); c = r.charAt(i);
if (!(((c >= 'a') && (c <= 'z')) || if (!(((c >= 'a') && (c <= 'z')) ||
((c >= '0') && (c <= '9')))) continue loop; // go to next while loop ((c >= '0') && (c <= '9')))) continue loop; // go to next while loop
//if ((c < 'a') || (c > 'z')) continue loop; // go to next while loop
} }
return s; return s;
} }
@ -480,16 +481,21 @@ public class plasmaCondenser {
private Object nextElement0() { private Object nextElement0() {
String r; String r;
StringBuffer sb;
char c;
while (s.length() == 0) { while (s.length() == 0) {
if (e.hasMoreElements()) { if (e.hasMoreElements()) {
r = ((String) e.nextElement()).trim(); r = (String) e.nextElement();
s = ""; if (r == null) return null;
r = r.trim();
sb = new StringBuffer(r.length() * 2);
for (int i = 0; i < r.length(); i++) { for (int i = 0; i < r.length(); i++) {
if (invisible(r.charAt(i))) s = s + " "; c = r.charAt(i);
else if (punctuation(r.charAt(i))) s = s + " " + r.charAt(i) + " "; if (invisible(c)) sb = sb.append(' ');
else s = s + r.charAt(i); else if (punctuation(c)) sb = sb.append(' ').append(c).append(' ');
else sb = sb.append(c);
} }
s = s.trim(); s = sb.toString().trim();
//System.out.println("PARSING-LINE '" + r + "'->'" + s + "'"); //System.out.println("PARSING-LINE '" + r + "'->'" + s + "'");
} else { } else {
return null; return null;
@ -636,5 +642,4 @@ public class plasmaCondenser {
} }
} }
} }

@ -39,6 +39,8 @@
// the intact and unchanged copyright notice. // the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such. // Contributions and changes to the program code must be marked as such.
// compile: javac -classpath lib/commons-collections.jar:lib/commons-pool-1.2.jar -sourcepath source source/de/anomic/plasma/plasmaParser.java
package de.anomic.plasma; package de.anomic.plasma;
@ -307,17 +309,23 @@ public final class plasmaParser {
} }
public static void main(String[] args) { public static void main(String[] args) {
try { //javac -classpath lib/commons-collections.jar:lib/commons-pool-1.2.jar -sourcepath source source/de/anomic/plasma/plasmaParser.java
plasmaParser theParser = new plasmaParser(new File("yacy.parser")); //java -cp source:lib/commons-collections.jar:lib/commons-pool-1.2.jar de.anomic.plasma.plasmaParser bug.html bug.out
FileInputStream theInput = new FileInputStream(new File("Y:/public_html/test.pdf")); try {
ByteArrayOutputStream theOutput = new ByteArrayOutputStream(); File in = new File(args[0]);
File out = new File(args[1]);
plasmaParser theParser = new plasmaParser(new File("yacy.parser"));
FileInputStream theInput = new FileInputStream(in);
ByteArrayOutputStream theOutput = new ByteArrayOutputStream();
serverFileUtils.copy(theInput, theOutput); serverFileUtils.copy(theInput, theOutput);
plasmaParserDocument document = theParser.parseSource(new URL("http://brain.yacy"), "text/html", theOutput.toByteArray());
theParser.parseSource(new URL("http://brain"),"application/pdf",theOutput.toByteArray()); //plasmaParserDocument document = theParser.parseSource(new URL("http://brain.yacy"), "application/pdf", theOutput.toByteArray());
byte[] theText = document.getText();
serverFileUtils.write(theText, out);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
} }

@ -406,7 +406,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
// work off fresh entries from the proxy or from the crawler // work off fresh entries from the proxy or from the crawler
if (processStack.size() == 0) { if (processStack.size() == 0) {
log.logDebug("DEQUEUE: queue is empty"); //log.logDebug("DEQUEUE: queue is empty");
return false; // nothing to do return false; // nothing to do
} }
@ -458,7 +458,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
public boolean localCrawlJob() { public boolean localCrawlJob() {
if (noticeURL.localStackSize() == 0) { if (noticeURL.localStackSize() == 0) {
log.logDebug("LocalCrawl: queue is empty"); //log.logDebug("LocalCrawl: queue is empty");
return false; return false;
} }
if (processStack.size() >= crawlSlots) { if (processStack.size() >= crawlSlots) {
@ -491,7 +491,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
// do nothing if either there are private processes to be done // do nothing if either there are private processes to be done
// or there is no global crawl on the stack // or there is no global crawl on the stack
if (noticeURL.remoteStackSize() == 0) { if (noticeURL.remoteStackSize() == 0) {
log.logDebug("GlobalCrawl: queue is empty"); //log.logDebug("GlobalCrawl: queue is empty");
return false; return false;
} }
if (processStack.size() > 0) { if (processStack.size() > 0) {

Loading…
Cancel
Save