- fix for number of words log message

- adding meta:refresh also to crawler stack
pull/1/head
Michael Peter Christen 12 years ago
parent c25d7bcb80
commit 6905182d41

@ -864,6 +864,12 @@ dc_rights
final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>(); final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
for (final Document d: documents) { for (final Document d: documents) {
result.putAll(d.getHyperlinks()); result.putAll(d.getHyperlinks());
final Object parser = d.getParserObject();
if (parser instanceof ContentScraper) {
final ContentScraper html = (ContentScraper) parser;
String refresh = html.getRefreshPath();
if (refresh != null && refresh.length() > 0)try {result.put(new MultiProtocolURI(refresh), "refresh");} catch (MalformedURLException e) {}
}
} }
return result; return result;
} }

@ -445,7 +445,7 @@ public class Segment {
final long indexingEndTime = System.currentTimeMillis(); final long indexingEndTime = System.currentTimeMillis();
if (this.log.isInfo()) { if (this.log.isInfo()) {
this.log.logInfo("*Indexed " + wordCount + " words in URL " + url + this.log.logInfo("*Indexed " + condenser.words().size() + " words in URL " + url +
" [" + id + "]" + " [" + id + "]" +
"\n\tDescription: " + dc_title + "\n\tDescription: " + dc_title +
"\n\tMimeType: " + document.dc_format() + " | Charset: " + document.getCharset() + " | " + "\n\tMimeType: " + document.dc_format() + " | Charset: " + document.getCharset() + " | " +

Loading…
Cancel
Save