added stringbuffer in condenser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@782 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent c42a543bc3
commit 68d5ff2ef1

@ -165,7 +165,7 @@ public class CacheAdmin_p {
} catch (Exception e) {
info.append("- This file is not cached -");
info.append(e.toString());
// e.printStackTrace();
e.printStackTrace();
}
}

@ -138,7 +138,7 @@ public final class plasmaCondenser {
words = new TreeMap(kelondroMSetTools.fastStringComparator);
sentences = new HashMap();
HashSet currsentwords = new HashSet();
String sentence = "";
StringBuffer sentence = new StringBuffer(100);
String word = "";
String k;
int wordlen;
@ -163,7 +163,7 @@ public final class plasmaCondenser {
if (sentence.length() > 0) {
// we store the punctuation symbol as first element of the sentence vector
allsentencecounter++;
sentence = word + sentence;
sentence.insert(0, word); // append at beginning
if (sentences.containsKey(sentence)) {
// sentence already exists
sp = (statProp) sentences.get(sentence);
@ -184,7 +184,7 @@ public final class plasmaCondenser {
words.put(k,sp);
}
}
sentence = "";
sentence = new StringBuffer(100);
currsentwords.clear();
} else {
// store word
@ -202,13 +202,13 @@ public final class plasmaCondenser {
}
words.put(word, sp);
// we now have the unique handle of the word, put it into the sentence:
sentence = sentence + intString(wordHandle, numlength); // thread hang error here
sentence.append(intString(wordHandle, numlength));
}
}
// finnish last sentence
if (sentence.length() > 0) {
allsentencecounter++;
sentence = "." + sentence;
sentence.insert(0, "."); // append at beginning
if (sentences.containsKey(sentence)) {
sp = (statProp) sentences.get(sentence);
sp.inc();
@ -226,20 +226,24 @@ public final class plasmaCondenser {
Object[] orderedSentences = new Object[sentenceHandleCount];
String[] s;
int wc;
Object o;
it = sentences.keySet().iterator();
while (it.hasNext()) {
sentence = (String) it.next();
wc = (sentence.length() - 1) / numlength;
s = new String[wc + 2];
sp = (statProp) sentences.get(sentence);
s[0] = intString(sp.count, numlength); // number of occurrences of this sentence
s[1] = sentence.substring(0,1); // the termination symbol of this sentence
for (int i = 0; i < wc; i++) {
k = sentence.substring(i * numlength + 1, (i + 1) * numlength + 1);
s[i + 2] = k;
}
orderedSentences[sp.handle] = s;
}
while (it.hasNext()) {
o = it.next();
if (o != null) {
sentence = (StringBuffer) o;
wc = (sentence.length() - 1) / numlength;
s = new String[wc + 2];
sp = (statProp) sentences.get(sentence);
s[0] = intString(sp.count, numlength); // number of occurrences of this sentence
s[1] = sentence.substring(0,1); // the termination symbol of this sentence
for (int i = 0; i < wc; i++) {
k = sentence.substring(i * numlength + 1, (i + 1) * numlength + 1);
s[i + 2] = k;
}
orderedSentences[sp.handle] = s;
}
}
Map.Entry entry;
// we search for similar words and reorganize the corresponding sentences
@ -280,10 +284,12 @@ public final class plasmaCondenser {
// depending on the orderedSentences structure, we rebuild the sentence HashMap to
// eliminate double occuring sentences
sentences = new HashMap();
int le;
for (int i = 0; i < orderedSentences.length; i++) {
sentence = "";
for (int j = 1; j < ((String[]) orderedSentences[i]).length; j++) sentence = sentence + ((String[]) orderedSentences[i])[j];
if (sentences.containsKey(sentence)) {
le = ((String[]) orderedSentences[i]).length;
sentence = new StringBuffer(le * 10);
for (int j = 1; j < le; j++) sentence.append(((String[]) orderedSentences[i])[j]);
if (sentences.containsKey(sentence)) {
// add sentence counter to counter of found sentence
sp = (statProp) sentences.get(sentence);
sp.count = sp.count + Integer.parseInt(((String[]) orderedSentences[i])[0]);
@ -374,12 +380,12 @@ public final class plasmaCondenser {
Iterator it;
statProp sp;
String[] s;
String sentence;
StringBuffer sentence;
Object[] orderedSentences = new Object[sentences.size()];
for (int i = 0; i < sentences.size(); i++) orderedSentences[i] = null; // this array must be initialized
it = sentences.keySet().iterator();
while (it.hasNext()) {
sentence = (String) it.next();
sentence = (StringBuffer) it.next();
wc = (sentence.length() - 1) / numlength;
s = new String[wc + 2];
sp = (statProp) sentences.get(sentence);

@ -292,7 +292,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String cache = getConfig("proxyCache", "DATA/HTCACHE");
cache = cache.replace('\\', '/');
if (cache.endsWith("/")) { cache = cache.substring(0, cache.length() - 1); }
File htCachePath = new File(cache);
File htCachePath = new File(rootPath, cache);
long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig("proxyCacheSize", "2")); // this is megabyte
this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP);
@ -936,7 +936,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
private void processResourceStack(plasmaSwitchboardQueue.Entry entry) {
// work off one stack entry with a fresh resource
try {
long parsingStartTime = 0, parsingEndTime = 0, indexingStartTime = 0, indexingEndTime;
long stackStartTime = 0, stackEndTime = 0, parsingStartTime = 0, parsingEndTime = 0, indexingStartTime = 0, indexingEndTime;
// we must distinguish the following cases: resource-load was initiated by
// 1) global crawling: the index is extern, not here (not possible here)
@ -999,6 +999,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (loadDate == null) loadDate = new Date();
// put anchors on crawl stack
stackStartTime = System.currentTimeMillis();
if (((processCase == 4) || (processCase == 5)) &&
((entry.profile() == null) || (entry.depth() < entry.profile().generalDepth()))) {
Map hl = document.getHyperlinks();
@ -1020,8 +1021,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
log.logInfo("CRAWL: ADDED " + c + " LINKS FROM " + entry.normalizedURLString() +
", " + (hl.size() - c) + " LINKS DOUBLE" +
", NEW CRAWL STACK SIZE IS " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE));
}
stackEndTime = System.currentTimeMillis();
// create index
String descr = document.getMainLongTitle();
@ -1085,6 +1088,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
"\n\tDescription: " + descr + "\n\t" +
"MimeType: " + document.getMimeType() + " | " +
"Size: " + document.text.length + " bytes | " +
"StackingTime: " + (stackEndTime-stackStartTime) + " ms | " +
"ParsingTime: " + (parsingEndTime-parsingStartTime) + " ms | " +
"IndexingTime: " + (indexingEndTime-indexingStartTime) + " ms");

Loading…
Cancel
Save