*) Minor changes

- more debugging output: storageTime for indexed document is logged now
   - saving memory in plasmaParserDocument.java, plasmaWordIndexEntryContainer.java (not a big deal)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@798 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 3c1d968d29
commit 9b7f37fc37

@ -81,8 +81,8 @@ public class plasmaParserDocument {
this.sections = (sections==null)?new String[0]:sections;
this.abstrct = (abstrct==null)?"":abstrct;
this.text = (text==null)?new byte[0]:text;
this.anchors = (anchors==null)?new HashMap():anchors;
this.images = (images==null)?new HashMap():images;
this.anchors = (anchors==null)?new HashMap(0):anchors;
this.images = (images==null)?new HashMap(0):images;
this.hyperlinks = null;
this.medialinks = null;
this.emaillinks = null;

@ -118,6 +118,8 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import de.anomic.data.messageBoard;
import de.anomic.data.robotsParser;
import de.anomic.data.wikiBoard;
@ -470,7 +472,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
private static String ppRamString(int bytes) {
private static String ppRamString(long bytes) {
if (bytes < 1024) return bytes + " KByte";
bytes = bytes / 1024;
if (bytes < 1024) return bytes + " MByte";
@ -942,7 +944,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
private void processResourceStack(plasmaSwitchboardQueue.Entry entry) {
// work off one stack entry with a fresh resource
try {
long stackStartTime = 0, stackEndTime = 0, parsingStartTime = 0, parsingEndTime = 0, indexingStartTime = 0, indexingEndTime;
long stackStartTime = 0, stackEndTime = 0,
parsingStartTime = 0, parsingEndTime = 0,
indexingStartTime = 0, indexingEndTime = 0,
storageStartTime = 0, storageEndTime = 0;
// we must distinguish the following cases: resource-load was initiated by
// 1) global crawling: the index is extern, not here (not possible here)
@ -1085,18 +1090,25 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
(entry.profile().localIndexing())) {
// remove stopwords
log.logInfo("Excluded " + condenser.excludeWords(stopwords) + " words in URL " + entry.url());
indexingEndTime = System.currentTimeMillis();
// do indexing
//log.logDebug("Create Index for '" + entry.normalizedURLString() + "'");
storageStartTime = System.currentTimeMillis();
int words = searchManager.addPageIndex(entry.url(), urlHash, loadDate, condenser, plasmaWordIndexEntry.language(entry.url()), plasmaWordIndexEntry.docType(document.getMimeType()));
indexingEndTime = System.currentTimeMillis();
log.logInfo("*Indexed " + words + " words in URL " + entry.url() +
"\n\tDescription: " + descr + "\n\t" +
"MimeType: " + document.getMimeType() + " | " +
"Size: " + document.text.length + " bytes | " +
"StackingTime: " + (stackEndTime-stackStartTime) + " ms | " +
"ParsingTime: " + (parsingEndTime-parsingStartTime) + " ms | " +
"IndexingTime: " + (indexingEndTime-indexingStartTime) + " ms");
storageEndTime = System.currentTimeMillis();
if (log.isLoggable(Level.INFO)) {
log.logInfo("*Indexed " + words + " words in URL " + entry.url() +
"\n\tDescription: " + descr +
"\n\tMimeType: " + document.getMimeType() + " | " +
"Size: " + document.text.length + " bytes | " +
"Anchors: " + ((document.anchors==null)?0:document.anchors.size()) +
"\n\tStackingTime: " + (stackEndTime-stackStartTime) + " ms | " +
"ParsingTime: " + (parsingEndTime-parsingStartTime) + " ms | " +
"IndexingTime: " + (indexingEndTime-indexingStartTime) + " ms | " +
"StorageTime: " + (storageEndTime-storageStartTime) + " ms");
}
// if this was performed for a remote crawl request, notify requester
if ((processCase == 6) && (initiator != null)) {

@ -57,16 +57,20 @@ import java.util.Iterator;
import de.anomic.server.serverCodings;
public class plasmaWordIndexEntryContainer implements Comparable {
public final class plasmaWordIndexEntryContainer implements Comparable {
private String wordHash;
private HashMap container; // urlHash/plasmaWordIndexEntry - Mapping
private final String wordHash;
private final HashMap container; // urlHash/plasmaWordIndexEntry - Mapping
private long updateTime;
public plasmaWordIndexEntryContainer(String wordHash) {
this(wordHash,16);
}
public plasmaWordIndexEntryContainer(String wordHash, int initContainerSize) {
this.wordHash = wordHash;
this.updateTime = 0;
container = new HashMap(); // a urlhash/plasmaWordIndexEntry - relation
container = new HashMap(initContainerSize); // a urlhash/plasmaWordIndexEntry - relation
}
public int size() {
@ -123,7 +127,7 @@ public class plasmaWordIndexEntryContainer implements Comparable {
}
public static plasmaWordIndexEntryContainer instantContainer(String wordHash, long creationTime, plasmaWordIndexEntry entry) {
plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash);
plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash,1);
c.add(entry);
c.updateTime = creationTime;
return c;

Loading…
Cancel
Save