From a603c4d5e8cf52f1dbc518c06d113864ed108e1b Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 6 Dec 2006 13:13:55 +0000 Subject: [PATCH] more code simplifications git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3052 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/htdocsdefault/dir.java | 9 ++++----- source/de/anomic/plasma/plasmaCondenser.java | 13 ++++++------- source/de/anomic/plasma/plasmaSwitchboard.java | 13 +++++++------ source/de/anomic/plasma/plasmaWordIndex.java | 17 +---------------- 4 files changed, 18 insertions(+), 34 deletions(-) diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java index 5fe394a42..fe86b967f 100644 --- a/htroot/htdocsdefault/dir.java +++ b/htroot/htdocsdefault/dir.java @@ -55,7 +55,6 @@ import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; import java.util.Iterator; -import java.util.Map; import de.anomic.data.userDB; import de.anomic.http.httpHeader; @@ -395,11 +394,11 @@ public class dir { public static void deletePhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr) { try { final String urlhash = plasmaURL.urlHash(new URL(urlstring)); - final Iterator words = plasmaCondenser.getWords(("yacyshare " + phrase + " " + descr).getBytes("UTF-8"), "UTF-8"); - Map.Entry entry; + final Iterator words = plasmaCondenser.getWords(("yacyshare " + phrase + " " + descr).getBytes("UTF-8"), "UTF-8").keySet().iterator(); + String word; while (words.hasNext()) { - entry = (Map.Entry) words.next(); - switchboard.wordIndex.removeEntry(plasmaCondenser.word2hash((String) entry.getKey()), urlhash); + word = (String) words.next(); + switchboard.wordIndex.removeEntry(plasmaCondenser.word2hash(word), urlhash); } switchboard.wordIndex.loadedURL.remove(urlhash); } catch (Exception e) { diff --git a/source/de/anomic/plasma/plasmaCondenser.java b/source/de/anomic/plasma/plasmaCondenser.java index ee7274c82..76d57dfe8 100644 --- a/source/de/anomic/plasma/plasmaCondenser.java +++ b/source/de/anomic/plasma/plasmaCondenser.java @@ -173,10 +173,9 @@ public final class plasmaCondenser { return oldsize - words.size(); } - public Iterator words() { - // returns an entry set iterator - // key is a String (the word), value is a wordStatProp Object - return words.entrySet().iterator(); + public Map words() { + // returns the words as wod/wordStatProp relation map + return words; } public static class wordStatProp { @@ -772,13 +771,13 @@ public final class plasmaCondenser { } - public static Iterator getWords(InputStream input, String charset) throws UnsupportedEncodingException { + public static Map getWords(InputStream input, String charset) throws UnsupportedEncodingException { if (input == null) return null; plasmaCondenser condenser = new plasmaCondenser(input, charset); - return condenser.words(); + return condenser.words; } - public static Iterator getWords(byte[] text, String charset) throws UnsupportedEncodingException { + public static Map getWords(byte[] text, String charset) throws UnsupportedEncodingException { if (text == null) return null; ByteArrayInputStream buffer = new ByteArrayInputStream(text); return getWords(buffer, charset); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 8deab46c9..fa1c008bc 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -119,6 +119,7 @@ import java.util.HashMap; import java.util.Hashtable; import java.util.Iterator; import java.util.Map; +import java.util.Set; import java.util.TreeSet; import de.anomic.data.blogBoard; @@ -1681,7 +1682,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser int urlComps = htmlFilterContentScraper.urlComps(comp.url().toNormalform()).length; // iterate over all words - Iterator i = condenser.words(); + Iterator i = condenser.words().entrySet().iterator(); Map.Entry wentry; plasmaCondenser.wordStatProp wordStat; while (i.hasNext()) { @@ -2110,7 +2111,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser filename = comp.url().getFile(); if ((seed == null) || ((address = seed.getAddress()) == null)) { // seed is not known from here - wordIndex.removeReferences(plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8"), urlentry.hash()); + wordIndex.removeReferences(plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8").keySet(), urlentry.hash()); wordIndex.loadedURL.remove(urlentry.hash()); // clean up continue; // next result } @@ -2249,17 +2250,17 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // parse the resource plasmaParserDocument document = snippetCache.parseDocument(comp.url(), resourceContentLength.longValue(), resourceContent); - // getting word iterator - Iterator witer = null; + // get the word set + Set words = null; try { - witer = new plasmaCondenser(document).words(); + words = new plasmaCondenser(document).words().keySet(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } // delete all word references int count = 0; - if (witer != null) count = wordIndex.removeReferences(witer, urlhash); + if (words != null) count = wordIndex.removeReferences(words, urlhash); // finally delete the url entry itself wordIndex.loadedURL.remove(urlhash); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 97e15ea35..f2ee11cbb 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -252,7 +252,7 @@ public final class plasmaWordIndex implements indexRI { // use all the words in one condenser object to simultanous create index entries // iterate over all words - Iterator i = condenser.words(); + Iterator i = condenser.words().entrySet().iterator(); Map.Entry wentry; String word; indexRWIEntry ientry; @@ -405,21 +405,6 @@ public final class plasmaWordIndex implements indexRI { } return count; } - - public int removeReferences(Iterator wordStatPropIterator, String urlhash) { - // sequentially delete all word references - // returns number of deletions - Map.Entry entry; - String word; - int count = 0; - while (wordStatPropIterator.hasNext()) { - entry = (Map.Entry) wordStatPropIterator.next(); - word = (String) entry.getKey(); - // delete the URL reference in this word index - if (removeEntry(plasmaCondenser.word2hash(word), urlhash)) count++; - } - return count; - } public int tryRemoveURLs(String urlHash) { // this tries to delete an index from the cache that has this