From abba8fe61b1ea9febb43c2fc0b9a3558f1600089 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 26 Jun 2005 22:40:50 +0000 Subject: [PATCH] fixed utf-8 decoding in htmlFilterAbstractScraper and removed httpd timing git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@323 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../htmlFilter/htmlFilterAbstractScraper.java | 25 ++- .../anomic/server/serverAbstractThread.java | 2 +- source/de/anomic/server/serverCore.java | 193 +++++++++--------- 3 files changed, 117 insertions(+), 103 deletions(-) diff --git a/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java b/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java index 17b1a3ffd..a91936ea9 100644 --- a/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java @@ -357,12 +357,27 @@ public abstract class htmlFilterAbstractScraper implements htmlFilterScraper { public static serverByteBuffer convertUmlaute(serverByteBuffer bb) { serverByteBuffer t = new serverByteBuffer(bb.length() + 20); - byte b; + int b0, b1, b2; String z; - for (int i = 0; i < bb.length(); i++) { - b = bb.byteAt(i); - z = code_iso8859s(b & 0xff); - if (z == null) t.append(b); else t.append(z); + int i = 0; + while (i < bb.length()) { + b0 = bb.byteAt(i) & 0xff; + // check utf-8 encoding + if (b0 < 128) { + t.append(b0); + i++; + } else { + b1 = bb.byteAt(i + 1) & 0xff; + if ((b0 > 0xbf) && (b0 < 0xe0)) { + z = code_iso8859s(((b0 & 0x1f) << 0x6) | (b1 & 0x3f)); + i += 2; + } else { + b2 = bb.byteAt(i + 2) & 0xff; + z = code_iso8859s(((b0 & 0xf) << 0xc) | ((b1 & 0x3f) << 0x6) | (b2 & 0x3f)); + i += 3; + } + if (z == null) t.append(b0); else t.append(z); + } } return t; } diff --git a/source/de/anomic/server/serverAbstractThread.java b/source/de/anomic/server/serverAbstractThread.java index abeb5847e..c553312e9 100644 --- a/source/de/anomic/server/serverAbstractThread.java +++ b/source/de/anomic/server/serverAbstractThread.java @@ -81,7 +81,7 @@ public abstract class serverAbstractThread extends Thread implements serverThrea protected final void announceMoreExecTime(long millis) { this.busytime += millis; } - + protected final void announceMoreSleepTime(long millis) { this.idletime += millis; } diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index f86b256a4..b7b681204 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -637,6 +637,9 @@ public final class serverCore extends serverAbstractThread implements serverThre public Session(ThreadGroup theThreadGroup) { super(theThreadGroup,"Session"); + + // setting the session startup time + this.start = System.currentTimeMillis(); } public void setStopped(boolean stopped) { @@ -724,56 +727,50 @@ public final class serverCore extends serverAbstractThread implements serverThre this.running = true; // The thread keeps running. - while (!this.stopped && !Thread.interrupted()) { - if (this.done) { - // We are waiting for a task now. + while (!this.stopped && !Thread.interrupted()) { + if (this.done) { + // We are waiting for a task now. synchronized (this) { - try { - this.wait(); //Wait until we get a request to process. - } - catch (InterruptedException e) { - this.stopped = true; - // log.error("", e); - } + try { + this.wait(); //Wait until we get a request to process. + } catch (InterruptedException e) { + this.stopped = true; + // log.error("", e); + } } - } - else - { + } else { //There is a task....let us execute it. try { - execute(); - if (this.syncObject != null) { - synchronized (this.syncObject) { - //Notify the completion. - this.syncObject.notifyAll(); - } - } + execute(); + if (this.syncObject != null) { + synchronized (this.syncObject) { + //Notify the completion. + this.syncObject.notifyAll(); + } + } } catch (Exception e) { // log.error("", e); - } - finally { + } finally { reset(); if (!this.stopped && !this.isInterrupted()) { try { this.setName("Session_inPool"); serverCore.this.theSessionPool.returnObject(this); - } - catch (Exception e1) { + } catch (Exception e1) { // e1.printStackTrace(); this.stopped = true; } } } - } - } - } + } + } + } private void execute() throws InterruptedException { try { - // setting the session startup time - this.start = System.currentTimeMillis(); + // settin the session identity this.identity = "-"; @@ -820,53 +817,52 @@ public final class serverCore extends serverAbstractThread implements serverThre } //log.logDebug("* session " + handle + " completed. time = " + (System.currentTimeMillis() - handle)); - announceMoreExecTime(System.currentTimeMillis() - this.start); } - private void listen() { - try { - // set up some reflection - Class[] stringType = {"".getClass()}; - Class[] exceptionType = {Class.forName("java.lang.Throwable")}; - - // send greeting - Object result = commandObj.greeting(); - if (result != null) { - if ((result instanceof String) && (((String) result).length() > 0)) writeLine((String) result); - } - - // start dialog - byte[] requestBytes = null; - boolean terminate = false; - int pos; - String cmd; - String tmp; - Object[] stringParameter = new String[1]; - while ((this.in != null) && ((requestBytes = readLine()) != null)) { - this.commandCounter++; + private void listen() { + try { + // set up some reflection + Class[] stringType = {"".getClass()}; + Class[] exceptionType = {Class.forName("java.lang.Throwable")}; + + // send greeting + Object result = commandObj.greeting(); + if (result != null) { + if ((result instanceof String) && (((String) result).length() > 0)) writeLine((String) result); + } + + // start dialog + byte[] requestBytes = null; + boolean terminate = false; + int pos; + String cmd; + String tmp; + Object[] stringParameter = new String[1]; + while ((this.in != null) && ((requestBytes = readLine()) != null)) { + this.commandCounter++; this.setName("Session_" + this.userAddress.getHostAddress() + ":" + this.controlSocket.getPort() + "#" + commandCounter); - this.request = new String(requestBytes); - //log.logDebug("* session " + handle + " received command '" + request + "'. time = " + (System.currentTimeMillis() - handle)); - log(false, this.request); - try { + this.request = new String(requestBytes); + //log.logDebug("* session " + handle + " received command '" + request + "'. time = " + (System.currentTimeMillis() - handle)); + log(false, this.request); + try { // if we can not determine the proper command string we try to call function emptyRequest // of the commandObject if (this.request.trim().length() == 0) this.request = "EMPTY"; - pos = this.request.indexOf(' '); - if (pos < 0) { - cmd = this.request.trim().toUpperCase(); - stringParameter[0] = ""; - } else { - cmd = this.request.substring(0, pos).trim().toUpperCase(); - stringParameter[0] = this.request.substring(pos).trim(); - } - + pos = this.request.indexOf(' '); + if (pos < 0) { + cmd = this.request.trim().toUpperCase(); + stringParameter[0] = ""; + } else { + cmd = this.request.substring(0, pos).trim().toUpperCase(); + stringParameter[0] = this.request.substring(pos).trim(); + } + // setting the socket timeout for reading of the request content this.controlSocket.setSoTimeout(this.socketTimeout); - // exec command and return value + // exec command and return value Object commandMethod = this.commandObjMethodCache.get(cmd); if (commandMethod == null) { try { @@ -877,38 +873,40 @@ public final class serverCore extends serverAbstractThread implements serverThre stringParameter[0] = this.request.trim(); } } - result = ((Method)commandMethod).invoke(this.commandObj, stringParameter); - //log.logDebug("* session " + handle + " completed command '" + request + "'. time = " + (System.currentTimeMillis() - handle)); + //long commandStart = System.currentTimeMillis(); + result = ((Method)commandMethod).invoke(this.commandObj, stringParameter); + //announceMoreExecTime(commandStart - System.currentTimeMillis()); // shall be negative! + //log.logDebug("* session " + handle + " completed command '" + request + "'. time = " + (System.currentTimeMillis() - handle)); this.out.flush(); - if (result == null) { - /* - log(2, true, "(NULL RETURNED/STREAM PASSED)"); - */ - } else if (result instanceof Boolean) { - if (((Boolean) result).equals(TERMINATE_CONNECTION)) break; + if (result == null) { + /* + log(2, true, "(NULL RETURNED/STREAM PASSED)"); + */ + } else if (result instanceof Boolean) { + if (((Boolean) result).equals(TERMINATE_CONNECTION)) break; // deactivating timeout. this is needed because of persistent connections this.controlSocket.setSoTimeout(0); - } else if (result instanceof String) { - if (((String) result).startsWith("!")) { - result = ((String) result).substring(1); - terminate = true; - } - writeLine((String) result); - } else if (result instanceof InputStream) { - tmp = send(out, (InputStream) result); - if ((tmp.length() > 4) && (tmp.toUpperCase().startsWith("PASS"))) { + } else if (result instanceof String) { + if (((String) result).startsWith("!")) { + result = ((String) result).substring(1); + terminate = true; + } + writeLine((String) result); + } else if (result instanceof InputStream) { + tmp = send(out, (InputStream) result); + if ((tmp.length() > 4) && (tmp.toUpperCase().startsWith("PASS"))) { log(true, "PASS ********"); - } else { + } else { log(true, tmp); - } - tmp = null; - } + } + tmp = null; + } if (terminate) break; - + } catch (InvocationTargetException ite) { - System.out.println("ERROR A " + userAddress.getHostAddress()); - // we extract a target exception and let the thread survive - writeLine((String) commandObj.error(ite.getTargetException())); + System.out.println("ERROR A " + userAddress.getHostAddress()); + // we extract a target exception and let the thread survive + writeLine((String) commandObj.error(ite.getTargetException())); } catch (NoSuchMethodException nsme) { System.out.println("ERROR B " + userAddress.getHostAddress()); if (isNotLocal(userAddress.getHostAddress().toString())) { @@ -933,14 +931,15 @@ public final class serverCore extends serverAbstractThread implements serverThre writeLine("UNKNOWN REASON:" + (String) commandObj.error(e)); } } // end of while - } catch (java.lang.ClassNotFoundException e) { - System.out.println("Internal Error: wrapper class not found: " + e.getMessage()); - System.exit(0); - } catch (java.io.IOException e) { + } catch (java.lang.ClassNotFoundException e) { + System.out.println("Internal Error: wrapper class not found: " + e.getMessage()); + System.exit(0); + } catch (java.io.IOException e) { // connection interruption: more or less normal - } - } - + } + //announceMoreExecTime(System.currentTimeMillis() - this.start); + } + } public static byte[] receive(PushbackInputStream pbis, serverByteBuffer readLineBuffer, int maxSize, boolean logerr) {