added stringbuffer in condenser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@782 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent c42a543bc3
commit 68d5ff2ef1

@ -165,7 +165,7 @@ public class CacheAdmin_p {
} catch (Exception e) { } catch (Exception e) {
info.append("- This file is not cached -"); info.append("- This file is not cached -");
info.append(e.toString()); info.append(e.toString());
// e.printStackTrace(); e.printStackTrace();
} }
} }

@ -138,7 +138,7 @@ public final class plasmaCondenser {
words = new TreeMap(kelondroMSetTools.fastStringComparator); words = new TreeMap(kelondroMSetTools.fastStringComparator);
sentences = new HashMap(); sentences = new HashMap();
HashSet currsentwords = new HashSet(); HashSet currsentwords = new HashSet();
String sentence = ""; StringBuffer sentence = new StringBuffer(100);
String word = ""; String word = "";
String k; String k;
int wordlen; int wordlen;
@ -163,7 +163,7 @@ public final class plasmaCondenser {
if (sentence.length() > 0) { if (sentence.length() > 0) {
// we store the punctuation symbol as first element of the sentence vector // we store the punctuation symbol as first element of the sentence vector
allsentencecounter++; allsentencecounter++;
sentence = word + sentence; sentence.insert(0, word); // append at beginning
if (sentences.containsKey(sentence)) { if (sentences.containsKey(sentence)) {
// sentence already exists // sentence already exists
sp = (statProp) sentences.get(sentence); sp = (statProp) sentences.get(sentence);
@ -184,7 +184,7 @@ public final class plasmaCondenser {
words.put(k,sp); words.put(k,sp);
} }
} }
sentence = ""; sentence = new StringBuffer(100);
currsentwords.clear(); currsentwords.clear();
} else { } else {
// store word // store word
@ -202,13 +202,13 @@ public final class plasmaCondenser {
} }
words.put(word, sp); words.put(word, sp);
// we now have the unique handle of the word, put it into the sentence: // we now have the unique handle of the word, put it into the sentence:
sentence = sentence + intString(wordHandle, numlength); // thread hang error here sentence.append(intString(wordHandle, numlength));
} }
} }
// finnish last sentence // finnish last sentence
if (sentence.length() > 0) { if (sentence.length() > 0) {
allsentencecounter++; allsentencecounter++;
sentence = "." + sentence; sentence.insert(0, "."); // append at beginning
if (sentences.containsKey(sentence)) { if (sentences.containsKey(sentence)) {
sp = (statProp) sentences.get(sentence); sp = (statProp) sentences.get(sentence);
sp.inc(); sp.inc();
@ -226,20 +226,24 @@ public final class plasmaCondenser {
Object[] orderedSentences = new Object[sentenceHandleCount]; Object[] orderedSentences = new Object[sentenceHandleCount];
String[] s; String[] s;
int wc; int wc;
Object o;
it = sentences.keySet().iterator(); it = sentences.keySet().iterator();
while (it.hasNext()) { while (it.hasNext()) {
sentence = (String) it.next(); o = it.next();
wc = (sentence.length() - 1) / numlength; if (o != null) {
s = new String[wc + 2]; sentence = (StringBuffer) o;
sp = (statProp) sentences.get(sentence); wc = (sentence.length() - 1) / numlength;
s[0] = intString(sp.count, numlength); // number of occurrences of this sentence s = new String[wc + 2];
s[1] = sentence.substring(0,1); // the termination symbol of this sentence sp = (statProp) sentences.get(sentence);
for (int i = 0; i < wc; i++) { s[0] = intString(sp.count, numlength); // number of occurrences of this sentence
k = sentence.substring(i * numlength + 1, (i + 1) * numlength + 1); s[1] = sentence.substring(0,1); // the termination symbol of this sentence
s[i + 2] = k; for (int i = 0; i < wc; i++) {
} k = sentence.substring(i * numlength + 1, (i + 1) * numlength + 1);
orderedSentences[sp.handle] = s; s[i + 2] = k;
} }
orderedSentences[sp.handle] = s;
}
}
Map.Entry entry; Map.Entry entry;
// we search for similar words and reorganize the corresponding sentences // we search for similar words and reorganize the corresponding sentences
@ -280,10 +284,12 @@ public final class plasmaCondenser {
// depending on the orderedSentences structure, we rebuild the sentence HashMap to // depending on the orderedSentences structure, we rebuild the sentence HashMap to
// eliminate double occuring sentences // eliminate double occuring sentences
sentences = new HashMap(); sentences = new HashMap();
int le;
for (int i = 0; i < orderedSentences.length; i++) { for (int i = 0; i < orderedSentences.length; i++) {
sentence = ""; le = ((String[]) orderedSentences[i]).length;
for (int j = 1; j < ((String[]) orderedSentences[i]).length; j++) sentence = sentence + ((String[]) orderedSentences[i])[j]; sentence = new StringBuffer(le * 10);
if (sentences.containsKey(sentence)) { for (int j = 1; j < le; j++) sentence.append(((String[]) orderedSentences[i])[j]);
if (sentences.containsKey(sentence)) {
// add sentence counter to counter of found sentence // add sentence counter to counter of found sentence
sp = (statProp) sentences.get(sentence); sp = (statProp) sentences.get(sentence);
sp.count = sp.count + Integer.parseInt(((String[]) orderedSentences[i])[0]); sp.count = sp.count + Integer.parseInt(((String[]) orderedSentences[i])[0]);
@ -374,12 +380,12 @@ public final class plasmaCondenser {
Iterator it; Iterator it;
statProp sp; statProp sp;
String[] s; String[] s;
String sentence; StringBuffer sentence;
Object[] orderedSentences = new Object[sentences.size()]; Object[] orderedSentences = new Object[sentences.size()];
for (int i = 0; i < sentences.size(); i++) orderedSentences[i] = null; // this array must be initialized for (int i = 0; i < sentences.size(); i++) orderedSentences[i] = null; // this array must be initialized
it = sentences.keySet().iterator(); it = sentences.keySet().iterator();
while (it.hasNext()) { while (it.hasNext()) {
sentence = (String) it.next(); sentence = (StringBuffer) it.next();
wc = (sentence.length() - 1) / numlength; wc = (sentence.length() - 1) / numlength;
s = new String[wc + 2]; s = new String[wc + 2];
sp = (statProp) sentences.get(sentence); sp = (statProp) sentences.get(sentence);

@ -292,7 +292,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String cache = getConfig("proxyCache", "DATA/HTCACHE"); String cache = getConfig("proxyCache", "DATA/HTCACHE");
cache = cache.replace('\\', '/'); cache = cache.replace('\\', '/');
if (cache.endsWith("/")) { cache = cache.substring(0, cache.length() - 1); } if (cache.endsWith("/")) { cache = cache.substring(0, cache.length() - 1); }
File htCachePath = new File(cache); File htCachePath = new File(rootPath, cache);
long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig("proxyCacheSize", "2")); // this is megabyte long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig("proxyCacheSize", "2")); // this is megabyte
this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP); this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP);
@ -936,7 +936,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
private void processResourceStack(plasmaSwitchboardQueue.Entry entry) { private void processResourceStack(plasmaSwitchboardQueue.Entry entry) {
// work off one stack entry with a fresh resource // work off one stack entry with a fresh resource
try { try {
long parsingStartTime = 0, parsingEndTime = 0, indexingStartTime = 0, indexingEndTime; long stackStartTime = 0, stackEndTime = 0, parsingStartTime = 0, parsingEndTime = 0, indexingStartTime = 0, indexingEndTime;
// we must distinguish the following cases: resource-load was initiated by // we must distinguish the following cases: resource-load was initiated by
// 1) global crawling: the index is extern, not here (not possible here) // 1) global crawling: the index is extern, not here (not possible here)
@ -999,6 +999,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (loadDate == null) loadDate = new Date(); if (loadDate == null) loadDate = new Date();
// put anchors on crawl stack // put anchors on crawl stack
stackStartTime = System.currentTimeMillis();
if (((processCase == 4) || (processCase == 5)) && if (((processCase == 4) || (processCase == 5)) &&
((entry.profile() == null) || (entry.depth() < entry.profile().generalDepth()))) { ((entry.profile() == null) || (entry.depth() < entry.profile().generalDepth()))) {
Map hl = document.getHyperlinks(); Map hl = document.getHyperlinks();
@ -1020,8 +1021,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} }
} }
log.logInfo("CRAWL: ADDED " + c + " LINKS FROM " + entry.normalizedURLString() + log.logInfo("CRAWL: ADDED " + c + " LINKS FROM " + entry.normalizedURLString() +
", " + (hl.size() - c) + " LINKS DOUBLE" +
", NEW CRAWL STACK SIZE IS " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE)); ", NEW CRAWL STACK SIZE IS " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE));
} }
stackEndTime = System.currentTimeMillis();
// create index // create index
String descr = document.getMainLongTitle(); String descr = document.getMainLongTitle();
@ -1085,6 +1088,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
"\n\tDescription: " + descr + "\n\t" + "\n\tDescription: " + descr + "\n\t" +
"MimeType: " + document.getMimeType() + " | " + "MimeType: " + document.getMimeType() + " | " +
"Size: " + document.text.length + " bytes | " + "Size: " + document.text.length + " bytes | " +
"StackingTime: " + (stackEndTime-stackStartTime) + " ms | " +
"ParsingTime: " + (parsingEndTime-parsingStartTime) + " ms | " + "ParsingTime: " + (parsingEndTime-parsingStartTime) + " ms | " +
"IndexingTime: " + (indexingEndTime-indexingStartTime) + " ms"); "IndexingTime: " + (indexingEndTime-indexingStartTime) + " ms");

Loading…
Cancel
Save