Merge branch 'master' of git://gitorious.org/~reger/yacy/bbyacy-rc1

pull/1/head
Michael Peter Christen 13 years ago
commit b991685782

@ -504,7 +504,7 @@ public class SettingsAck_p {
}
// everything is ok
prop.put("info_crawler.clientTimeout",(crawlerTimeout==0) ? "0" :PeerActions.formatInterval(crawlerTimeout));
prop.put("info_crawler.clientTimeout",(crawlerTimeout==0) ? "0" :Formatter.number(crawlerTimeout/1000.0,false)+" sec");
prop.put("info_crawler.http.maxFileSize",(maxHttpSize==-1)? "-1":Formatter.bytesToString(maxHttpSize));
prop.put("info_crawler.ftp.maxFileSize", (maxFtpSize==-1) ? "-1":Formatter.bytesToString(maxFtpSize));
prop.put("info_crawler.smb.maxFileSize", (maxSmbSize==-1) ? "-1":Formatter.bytesToString(maxSmbSize));

@ -79,7 +79,7 @@ public class Document {
private List<String> titles; // the document titles, taken from title and/or h1 tag; shall appear as headline of search result
private final StringBuilder creator; // author or copyright
private final String publisher; // publisher
private final List<String> sections; // if present: more titles/headlines appearing in the document
private List<String> sections; // if present: more titles/headlines appearing in the document
private final StringBuilder description; // an abstract, if present: short content description
private Object text; // the clear text, all that is visible
private final Map<MultiProtocolURI, Properties> anchors; // all links embedded as clickeable entities (anchor tags)
@ -631,7 +631,17 @@ dc_rights
public void addSubDocuments(final Document[] docs) throws IOException {
for (final Document doc: docs) {
this.sections.addAll(Arrays.asList(doc.getSectionTitles()));
// check class as addAll method might not be available if initialized via Arrays.toList
if (this.sections.getClass() == java.util.LinkedList.class) {
this.sections.addAll(doc.sections);
} else {
/* sections might be initialized via Arrays.toList (which does not implement the addAll method)
so new list must be assigned */
LinkedList<String> tmplist = new LinkedList();
tmplist.addAll(this.sections);
tmplist.addAll(doc.sections);
this.sections = tmplist;
}
this.titles.addAll(doc.titles());
this.keywords.addAll(doc.getKeywords());

@ -37,24 +37,31 @@ public class AugmentParser extends AbstractParser implements Parser {
@Override
public Document[] parse(DigestURI url, String mimeType, String charset, InputStream source) throws Failure, InterruptedException {
Document[] htmlDocs = this.rdfaParser.parse(url, mimeType, charset, source);
try {
source.reset();
} catch (IOException e) {
Log.logException(e);
}
Document alreadyParsedDocument = htmlDocs[0];
Document superDoc = analyze(alreadyParsedDocument, url, mimeType, charset);
Document augmentDoc = parseAndAugment(url, mimeType, charset);
Document[] retDocs = new Document[htmlDocs.length + 2];
for (int i = 0; i < htmlDocs.length; i++) {
retDocs[i] = htmlDocs[i];
}
retDocs[retDocs.length - 1] = augmentDoc;
retDocs[retDocs.length - 2] = superDoc;
return retDocs;
Document[] htmlDocs = this.rdfaParser.parse(url, mimeType, charset, source);
try {
source.reset();
} catch (IOException e) {
Log.logException(e);
}
Document alreadyParsedDocument = htmlDocs[0];
Document superDoc = analyze(alreadyParsedDocument, url, mimeType, charset);
Document augmentDoc = parseAndAugment(url, mimeType, charset);
Document[] retDocs = new Document[htmlDocs.length + 1];
for (int i = 1; i < htmlDocs.length; i++) {
retDocs[i - 1] = htmlDocs[i];
}
retDocs[retDocs.length - 1] = augmentDoc;
retDocs[retDocs.length - 2] = superDoc;
try { // merge additional result docs into the parse main document
alreadyParsedDocument.addSubDocuments(retDocs);
} catch (IOException ex) {
Log.logException(ex);
}
Document[] finalretDocs = new Document[1]; // return the merged document
finalretDocs[0] = alreadyParsedDocument;
return finalretDocs;
}
private static Document analyze (Document alreadyParsedDocument, DigestURI url,

Loading…
Cancel
Save