diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index 436c91f87..d1b86cb9c 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -166,8 +166,8 @@ public class Bookmarks { document = switchboard.snippetCache.retrieveDocument(comp.url(), true, 5000, true); prop.put("mode_edit", 0); // create mode prop.put("mode_url", comp.url().toNormalform()); - prop.put("mode_title", comp.descr()); - prop.put("mode_description", (document == null) ? comp.descr(): document.getMainLongTitle()); + prop.put("mode_title", comp.title()); + prop.put("mode_description", (document == null) ? comp.title(): document.getTitle()); prop.put("mode_author", comp.author()); prop.put("mode_tags", (document == null) ? comp.tags() : document.getKeywords(',')); prop.put("mode_public", 0); diff --git a/htroot/DetailedSearch.java b/htroot/DetailedSearch.java index bb9ae7d5f..29c7b83bb 100644 --- a/htroot/DetailedSearch.java +++ b/htroot/DetailedSearch.java @@ -269,7 +269,7 @@ public class DetailedSearch { prop.put("type_results_" + i + "_authorized_recommend_deletelink", "/yacysearch.html?search=" + results.getFormerSearch() + "&Enter=Search&count=" + results.getQuery().wantedResults + "&order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&resource=local&time=3&deleteref=" + result.getUrlhash() + "&urlmaskfilter=.*"); prop.put("type_results_" + i + "_authorized_recommend_recommendlink", "/yacysearch.html?search=" + results.getFormerSearch() + "&Enter=Search&count=" + results.getQuery().wantedResults + "&order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&resource=local&time=3&recommendref=" + result.getUrlhash() + "&urlmaskfilter=.*"); prop.put("type_results_" + i + "_authorized_urlhash", result.getUrlhash()); - prop.put("type_results_" + i + "_description", result.getUrlentry().comp().descr()); + prop.put("type_results_" + i + "_description", result.getUrlentry().comp().title()); prop.put("type_results_" + i + "_url", result.getUrl()); prop.put("type_results_" + i + "_urlhash", result.getUrlhash()); prop.put("type_results_" + i + "_urlhexhash", yacySeed.b64Hash2hexHash(result.getUrlhash())); diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index c047c5418..5f74a4e2e 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -507,7 +507,7 @@ public class IndexControl_p { prop.put("genUrlProfile", 2); prop.put("genUrlProfile_urlNormalform", comp.url().toNormalform()); prop.put("genUrlProfile_urlhash", urlhash); - prop.put("genUrlProfile_urlDescr", comp.descr()); + prop.put("genUrlProfile_urlDescr", comp.title()); prop.put("genUrlProfile_moddate", entry.moddate()); prop.put("genUrlProfile_loaddate", entry.loaddate()); prop.put("genUrlProfile_referrer", referrer); diff --git a/htroot/IndexMonitor.java b/htroot/IndexMonitor.java index 02d6a0f8d..e35313c1a 100644 --- a/htroot/IndexMonitor.java +++ b/htroot/IndexMonitor.java @@ -181,7 +181,7 @@ public class IndexMonitor { prop.put("table_indexed_" + cnt + "_showExec_executorSeed", (executorSeed == null) ? "unknown" : executorSeed.getName()); prop.put("table_indexed_" + cnt + "_moddate", daydate(urle.moddate())); prop.put("table_indexed_" + cnt + "_wordcount", urle.wordCount()); - prop.put("table_indexed_" + cnt + "_urldescr", comp.descr()); + prop.put("table_indexed_" + cnt + "_urldescr", comp.title()); if (cachepath == null) { prop.put("table_indexed_" + cnt + "_available", 0); } else { diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 5bb7f955a..39da43955 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -124,7 +124,7 @@ public class ViewFile { return prop; } url = comp.url(); - descr = comp.descr(); + descr = comp.title(); urlEntry.wordCount(); size = urlEntry.size(); pre = urlEntry.flags().get(plasmaCondenser.flag_cat_indexof); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 10de09dfd..3ad0b180b 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -236,8 +236,8 @@ public class yacysearch { // create a news message HashMap map = new HashMap(); map.put("url", comp.url().toNormalform().replace(',', '|')); - map.put("title", comp.descr().replace(',', ' ')); - map.put("description", ((document == null) ? comp.descr() : document.getMainLongTitle()).replace(',', ' ')); + map.put("title", comp.title().replace(',', ' ')); + map.put("description", ((document == null) ? comp.title() : document.getTitle()).replace(',', ' ')); map.put("author", ((document == null) ? "" : document.getAuthor())); map.put("tags", ((document == null) ? "" : document.getKeywords(' '))); yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map)); @@ -294,7 +294,7 @@ public class yacysearch { prop.put("type_results_" + i + "_authorized_recommend_deletelink", "/yacysearch.html?search=" + results.getFormerSearch() + "&Enter=Search&count=" + results.getQuery().wantedResults + "&order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&resource=local&time=3&deleteref=" + result.getUrlhash() + "&urlmaskfilter=.*"); prop.put("type_results_" + i + "_authorized_recommend_recommendlink", "/yacysearch.html?search=" + results.getFormerSearch() + "&Enter=Search&count=" + results.getQuery().wantedResults + "&order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&resource=local&time=3&recommendref=" + result.getUrlhash() + "&urlmaskfilter=.*"); prop.put("type_results_" + i + "_authorized_urlhash", result.getUrlhash()); - prop.put("type_results_" + i + "_description", result.getUrlentry().comp().descr()); + prop.put("type_results_" + i + "_description", result.getUrlentry().comp().title()); prop.put("type_results_" + i + "_url", result.getUrl()); prop.put("type_results_" + i + "_urlhash", result.getUrlhash()); prop.put("type_results_" + i + "_urlhexhash", yacySeed.b64Hash2hexHash(result.getUrlhash())); diff --git a/source/de/anomic/index/indexURLEntry.java b/source/de/anomic/index/indexURLEntry.java index 341e0feeb..2192e24e8 100644 --- a/source/de/anomic/index/indexURLEntry.java +++ b/source/de/anomic/index/indexURLEntry.java @@ -66,28 +66,28 @@ public interface indexURLEntry { public class Components { private URL url; - private String descr, author, tags, ETag; + private String title, author, tags, ETag; - public Components(String url, String descr, String author, String tags, String ETag) { + public Components(String url, String title, String author, String tags, String ETag) { try { this.url = new URL(url); } catch (MalformedURLException e) { this.url = null; } - this.descr = descr; + this.title = title; this.author = author; this.tags = tags; this.ETag = ETag; } public Components(URL url, String descr, String author, String tags, String ETag) { this.url = url; - this.descr = descr; + this.title = descr; this.author = author; this.tags = tags; this.ETag = ETag; } public URL url() { return this.url; } - public String descr() { return this.descr; } + public String title() { return this.title; } public String author() { return this.author; } public String tags() { return this.tags; } public String ETag() { return this.ETag; } diff --git a/source/de/anomic/index/indexURLEntryNew.java b/source/de/anomic/index/indexURLEntryNew.java index 0c41c93e5..cb3db10bf 100644 --- a/source/de/anomic/index/indexURLEntryNew.java +++ b/source/de/anomic/index/indexURLEntryNew.java @@ -205,7 +205,7 @@ public class indexURLEntryNew implements indexURLEntry { try { s.append("hash=").append(hash()); s.append(",url=").append(crypt.simpleEncode(comp.url().toNormalform())); - s.append(",descr=").append(crypt.simpleEncode(comp.descr())); + s.append(",descr=").append(crypt.simpleEncode(comp.title())); s.append(",author=").append(crypt.simpleEncode(comp.author())); s.append(",tags=").append(crypt.simpleEncode(comp.tags())); s.append(",ETag=").append(crypt.simpleEncode(comp.ETag())); @@ -373,7 +373,7 @@ public class indexURLEntryNew implements indexURLEntry { null, comp().url(), referrerHash(), - comp().descr(), + comp().title(), loaddate(), null, 0, diff --git a/source/de/anomic/index/indexURLEntryOld.java b/source/de/anomic/index/indexURLEntryOld.java index 1558ebb1f..3497977c1 100644 --- a/source/de/anomic/index/indexURLEntryOld.java +++ b/source/de/anomic/index/indexURLEntryOld.java @@ -341,7 +341,7 @@ public class indexURLEntryOld implements indexURLEntry { null, comp().url(), referrerHash(), - comp().descr(), + comp().title(), loaddate(), null, 0, diff --git a/source/de/anomic/plasma/parser/doc/docParser.java b/source/de/anomic/plasma/parser/doc/docParser.java index 1ccc83991..10dfdaa01 100644 --- a/source/de/anomic/plasma/parser/doc/docParser.java +++ b/source/de/anomic/plasma/parser/doc/docParser.java @@ -96,7 +96,6 @@ implements Parser { replaceAll("\n"," "). replaceAll("\r"," "). replaceAll("\t"," "), - null, "", // TODO: AUTHOR null, null, diff --git a/source/de/anomic/plasma/parser/odt/odtParser.java b/source/de/anomic/plasma/parser/odt/odtParser.java index 7851f038c..a1a4f9966 100644 --- a/source/de/anomic/plasma/parser/odt/odtParser.java +++ b/source/de/anomic/plasma/parser/odt/odtParser.java @@ -101,9 +101,10 @@ public class odtParser extends AbstractParser implements Parser { File writerFile = null; try { String docDescription = null; - String docKeywordStr = null; + String docKeywordStr = null; String docShortTitle = null; String docLongTitle = null; + String docAuthor = null; // opening the file as zip file ZipFile zipFile= new ZipFile(dest); @@ -145,9 +146,10 @@ public class odtParser extends AbstractParser implements Parser { ODFMetaFileAnalyzer metaAnalyzer = new ODFMetaFileAnalyzer(); OpenDocumentMetadata metaData = metaAnalyzer.analyzeMetaData(zipFileEntryStream); docDescription = metaData.getDescription(); - docKeywordStr = metaData.getKeyword(); + docKeywordStr = metaData.getKeyword(); docShortTitle = metaData.getTitle(); docLongTitle = metaData.getSubject(); + docAuthor = metaData.getCreator(); } } @@ -171,9 +173,8 @@ public class odtParser extends AbstractParser implements Parser { mimeType, "UTF-8", docKeywords, - docShortTitle, docLongTitle, - "", // TODO: AUTHOR + docAuthor, null, docDescription, contentBytes, @@ -185,9 +186,8 @@ public class odtParser extends AbstractParser implements Parser { mimeType, "UTF-8", docKeywords, - docShortTitle, docLongTitle, - "", // TODO: AUTHOR + docAuthor, null, docDescription, writerFile, diff --git a/source/de/anomic/plasma/parser/pdf/pdfParser.java b/source/de/anomic/plasma/parser/pdf/pdfParser.java index 6cc8bf644..0e491ef35 100644 --- a/source/de/anomic/plasma/parser/pdf/pdfParser.java +++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java @@ -102,7 +102,7 @@ public class pdfParser extends AbstractParser implements Parser { // Logger theLogger = Logger.getLogger("org.pdfbox"); // theLogger.setLevel(Level.INFO); - String docTitle = null, docSubject = null, /*docAuthor = null,*/ docKeywordStr = null; + String docTitle = null, docSubject = null, docAuthor = null, docKeywordStr = null; // check for interruption checkInterruption(); @@ -127,7 +127,7 @@ public class pdfParser extends AbstractParser implements Parser { if (theDocInfo != null) { docTitle = theDocInfo.getTitle(); docSubject = theDocInfo.getSubject(); - //docAuthor = theDocInfo.getAuthor(); + docAuthor = theDocInfo.getAuthor(); docKeywordStr = theDocInfo.getKeywords(); } @@ -156,9 +156,8 @@ public class pdfParser extends AbstractParser implements Parser { mimeType, "UTF-8", docKeywords, - docSubject, - docTitle, - "", // TODO: AUTHOR + (docTitle == null) ? docSubject : docTitle, + docAuthor, null, null, contentBytes, @@ -170,9 +169,8 @@ public class pdfParser extends AbstractParser implements Parser { mimeType, "UTF-8", docKeywords, - docSubject, - docTitle, - "", // TODO: AUTHOR + (docTitle == null) ? docSubject : docTitle, + docAuthor, null, null, writerFile, diff --git a/source/de/anomic/plasma/parser/ppt/pptParser.java b/source/de/anomic/plasma/parser/ppt/pptParser.java index 223bc2ef5..756580884 100644 --- a/source/de/anomic/plasma/parser/ppt/pptParser.java +++ b/source/de/anomic/plasma/parser/ppt/pptParser.java @@ -113,7 +113,6 @@ public class pptParser extends AbstractParser implements Parser { replaceAll("\n"," "). replaceAll("\r"," "). replaceAll("\t"," "), - null, "", // TODO: AUTHOR null, null, diff --git a/source/de/anomic/plasma/parser/rpm/rpmParser.java b/source/de/anomic/plasma/parser/rpm/rpmParser.java index f7615e268..6d84aebf3 100644 --- a/source/de/anomic/plasma/parser/rpm/rpmParser.java +++ b/source/de/anomic/plasma/parser/rpm/rpmParser.java @@ -144,13 +144,13 @@ public class rpmParser extends AbstractParser implements Parser { // closing the rpm file rpmFile.close(); rpmFile = null; + if (summary == null) summary = name; plasmaParserDocument theDoc = new plasmaParserDocument( location, mimeType, "UTF-8", null, - name, summary, "", // TODO: AUTHOR null, diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java index b5d4ce614..f0eb4c52a 100644 --- a/source/de/anomic/plasma/parser/rss/rssParser.java +++ b/source/de/anomic/plasma/parser/rss/rssParser.java @@ -188,7 +188,6 @@ public class rssParser extends AbstractParser implements Parser { mimeType, "UTF-8", null, - null, feedTitle, "", // TODO: AUTHOR (String[]) feedSections.toArray(new String[feedSections.size()]), diff --git a/source/de/anomic/plasma/parser/rtf/rtfParser.java b/source/de/anomic/plasma/parser/rtf/rtfParser.java index 4e6f047b6..c3ef24b32 100644 --- a/source/de/anomic/plasma/parser/rtf/rtfParser.java +++ b/source/de/anomic/plasma/parser/rtf/rtfParser.java @@ -101,7 +101,6 @@ implements Parser { replaceAll("\n"," "). replaceAll("\r"," "). replaceAll("\t"," "), - null, "", // TODO: AUTHOR null, null, diff --git a/source/de/anomic/plasma/parser/swf/swfParser.java b/source/de/anomic/plasma/parser/swf/swfParser.java index d225ca40c..e5e92e4ce 100644 --- a/source/de/anomic/plasma/parser/swf/swfParser.java +++ b/source/de/anomic/plasma/parser/swf/swfParser.java @@ -98,7 +98,6 @@ public class swfParser extends AbstractParser implements Parser { String url = null; String urlnr = null; String linebreak = System.getProperty("line.separator"); - String longTitle = null; String[] sections = null; String abstrct = null; //TreeSet images = null; @@ -133,8 +132,7 @@ public class swfParser extends AbstractParser implements Parser { replaceAll("\r\n"," "). replaceAll("\n"," "). replaceAll("\r"," "). - replaceAll("\t"," "), //short title - longTitle, // a long document title + replaceAll("\t"," "), // title "", // TODO: AUTHOR sections, // an array of section headlines abstrct, // an abstract diff --git a/source/de/anomic/plasma/parser/tar/tarParser.java b/source/de/anomic/plasma/parser/tar/tarParser.java index 5c1604e41..1bd6ce277 100644 --- a/source/de/anomic/plasma/parser/tar/tarParser.java +++ b/source/de/anomic/plasma/parser/tar/tarParser.java @@ -126,7 +126,6 @@ public class tarParser extends AbstractParser implements Parser { // TODO: what about bzip .... StringBuffer docKeywords = new StringBuffer(); - StringBuffer docShortTitle = new StringBuffer(); StringBuffer docLongTitle = new StringBuffer(); LinkedList docSections = new LinkedList(); StringBuffer docAbstrct = new StringBuffer(); @@ -180,15 +179,12 @@ public class tarParser extends AbstractParser implements Parser { docKeywords.append(subDoc.getKeywords(',')); if (docLongTitle.length() > 0) docLongTitle.append("\n"); - docLongTitle.append(subDoc.getMainLongTitle()); - - if (docShortTitle.length() > 0) docShortTitle.append("\n"); - docShortTitle.append(subDoc.getMainShortTitle()); + docLongTitle.append(subDoc.getTitle()); docSections.addAll(Arrays.asList(subDoc.getSectionTitles())); if (docAbstrct.length() > 0) docAbstrct.append("\n"); - docAbstrct.append(subDoc.getAbstract()); + docAbstrct.append(subDoc.getAbstract()); if (subDoc.getTextLength() > 0) { if (docTextLength > 0) docText.write('\n'); @@ -211,7 +207,6 @@ public class tarParser extends AbstractParser implements Parser { mimeType, null, docKeywords.toString().split(" |,"), - docShortTitle.toString(), docLongTitle.toString(), "", // TODO: AUTHOR (String[])docSections.toArray(new String[docSections.size()]), @@ -225,7 +220,6 @@ public class tarParser extends AbstractParser implements Parser { mimeType, null, docKeywords.toString().split(" |,"), - docShortTitle.toString(), docLongTitle.toString(), "", // TODO: AUTHOR (String[])docSections.toArray(new String[docSections.size()]), diff --git a/source/de/anomic/plasma/parser/vcf/vcfParser.java b/source/de/anomic/plasma/parser/vcf/vcfParser.java index defbdc43c..5419b2635 100644 --- a/source/de/anomic/plasma/parser/vcf/vcfParser.java +++ b/source/de/anomic/plasma/parser/vcf/vcfParser.java @@ -245,7 +245,6 @@ public class vcfParser extends AbstractParser implements Parser { mimeType, // the documents mime type null, null, // a list of extracted keywords - null, // a short document title parsedTitle.toString(), // a long document title "", // TODO: AUTHOR sections, // an array of section headlines diff --git a/source/de/anomic/plasma/parser/xls/xlsParser.java b/source/de/anomic/plasma/parser/xls/xlsParser.java index 1eeca049b..ea5de3899 100644 --- a/source/de/anomic/plasma/parser/xls/xlsParser.java +++ b/source/de/anomic/plasma/parser/xls/xlsParser.java @@ -141,7 +141,6 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener { replaceAll("\n"," "). replaceAll("\r"," "). replaceAll("\t"," "), - null, "", // TODO: AUTHOR null, null, diff --git a/source/de/anomic/plasma/parser/zip/zipParser.java b/source/de/anomic/plasma/parser/zip/zipParser.java index 55e56a54a..2c59df142 100644 --- a/source/de/anomic/plasma/parser/zip/zipParser.java +++ b/source/de/anomic/plasma/parser/zip/zipParser.java @@ -110,7 +110,6 @@ public class zipParser extends AbstractParser implements Parser { } StringBuffer docKeywords = new StringBuffer(); - StringBuffer docShortTitle = new StringBuffer(); StringBuffer docLongTitle = new StringBuffer(); LinkedList docSections = new LinkedList(); StringBuffer docAbstrct = new StringBuffer(); @@ -163,15 +162,12 @@ public class zipParser extends AbstractParser implements Parser { docKeywords.append(subDoc.getKeywords(',')); if (docLongTitle.length() > 0) docLongTitle.append("\n"); - docLongTitle.append(subDoc.getMainLongTitle()); - - if (docShortTitle.length() > 0) docShortTitle.append("\n"); - docShortTitle.append(subDoc.getMainShortTitle()); + docLongTitle.append(subDoc.getTitle()); docSections.addAll(Arrays.asList(subDoc.getSectionTitles())); if (docAbstrct.length() > 0) docAbstrct.append("\n"); - docAbstrct.append(subDoc.getAbstract()); + docAbstrct.append(subDoc.getAbstract()); if (subDoc.getTextLength() > 0) { if (docTextLength > 0) docText.write('\n'); @@ -195,7 +191,6 @@ public class zipParser extends AbstractParser implements Parser { mimeType, null, docKeywords.toString().split(" |,"), - docShortTitle.toString(), docLongTitle.toString(), "", // TODO: AUTHOR (String[])docSections.toArray(new String[docSections.size()]), @@ -209,7 +204,6 @@ public class zipParser extends AbstractParser implements Parser { mimeType, null, docKeywords.toString().split(" |,"), - docShortTitle.toString(), docLongTitle.toString(), "", // TODO: AUTHOR (String[])docSections.toArray(new String[docSections.size()]), diff --git a/source/de/anomic/plasma/plasmaCondenser.java b/source/de/anomic/plasma/plasmaCondenser.java index 9c5e37535..2b3efbcc6 100644 --- a/source/de/anomic/plasma/plasmaCondenser.java +++ b/source/de/anomic/plasma/plasmaCondenser.java @@ -141,8 +141,8 @@ public final class plasmaCondenser { // the phrase counter: // phrase 0 are words taken from the URL - // phrase 1 is the MainLongTitle - // phrase 2 is the MainShortTitle + // phrase 1 is the MainTitle + // phrase 2 is // phrase 3 is the Document Abstract // phrase 4 is the Document Author // phrase 5 are the tags specified in document @@ -151,8 +151,8 @@ public final class plasmaCondenser { // phrase 99 is taken from the media Link url and anchor description // phrase 100 and above are lines from the text - insertTextToWords(document.getMainLongTitle(), 1, indexRWIEntryNew.flag_app_descr, wflags); - insertTextToWords(document.getMainShortTitle(), 2, indexRWIEntryNew.flag_app_descr, wflags); + insertTextToWords(document.getTitle(), 1, indexRWIEntryNew.flag_app_descr, wflags); + //insertTextToWords(document.getTitle(), 2, indexRWIEntryNew.flag_app_descr, wflags); insertTextToWords(document.getAbstract(), 3, indexRWIEntryNew.flag_app_descr, wflags); insertTextToWords(document.getAuthor(), 4, indexRWIEntryNew.flag_app_descr, wflags); // missing: tags! diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 70a9fcfd6..3cf319fed 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -759,7 +759,6 @@ public final class plasmaParser { charSet, scraper.getKeywords(), scraper.getTitle(), - scraper.getTitle(), scraper.getAuthor(), sections, scraper.getDescription(), @@ -960,7 +959,7 @@ public final class plasmaParser { // printing out all parsed sentences if (document != null) { System.out.print("Document titel: "); - System.out.println(document.getMainLongTitle()); + System.out.println(document.getTitle()); // found text final Iterator sentences = document.getSentences(false); diff --git a/source/de/anomic/plasma/plasmaParserDocument.java b/source/de/anomic/plasma/plasmaParserDocument.java index 727c828ab..66e05f18a 100644 --- a/source/de/anomic/plasma/plasmaParserDocument.java +++ b/source/de/anomic/plasma/plasmaParserDocument.java @@ -64,8 +64,7 @@ public class plasmaParserDocument { private String mimeType; // mimeType as taken from http header private String charset; // the charset of the document private String[] keywords; // most resources provide a keyword field - private String shortTitle; // a shortTitle mostly appears in the window header (border) - private String longTitle; // the real title of the document, commonly h1-tags + private String title; // a document title, taken from title or h1 tag; shall appear as headline of search result private String author; // author or copyright private String[] sections; // if present: more titles/headlines appearing in the document private String abstrct; // an abstract, if present: short content description @@ -81,15 +80,14 @@ public class plasmaParserDocument { private InputStream textStream; public plasmaParserDocument(URL location, String mimeType, String charset, - String[] keywords, String shortTitle, String longTitle, String author, + String[] keywords, String title, String author, String[] sections, String abstrct, byte[] text, Map anchors, TreeSet images) { this.location = location; this.mimeType = (mimeType==null)?"application/octet-stream":mimeType; this.charset = charset; this.keywords = (keywords==null) ? new String[0] : keywords; - this.shortTitle = (shortTitle==null)?"":shortTitle; - this.longTitle = (longTitle==null)?"":longTitle; + this.title = (title==null)?"":title; this.author = (author==null)?"":author; this.sections = (sections==null)?new String[0]:sections; this.abstrct = (abstrct==null)?"":abstrct; @@ -105,15 +103,14 @@ public class plasmaParserDocument { } public plasmaParserDocument(URL location, String mimeType, String charset, - String[] keywords, String shortTitle, String longTitle, String author, + String[] keywords, String title, String author, String[] sections, String abstrct, File text, Map anchors, TreeSet images) { this.location = location; this.mimeType = (mimeType==null)?"application/octet-stream":mimeType; this.charset = charset; this.keywords = (keywords==null) ? new String[0] : keywords; - this.shortTitle = (shortTitle==null)?"":shortTitle; - this.longTitle = (longTitle==null)?"":longTitle; + this.title = (title==null)?"":title; this.author = (author==null)?"":author; this.sections = (sections==null)?new String[0]:sections; this.abstrct = (abstrct==null)?"":abstrct; @@ -144,20 +141,16 @@ public class plasmaParserDocument { return this.charset; } - public String getMainShortTitle() { - if (shortTitle != null) return shortTitle; else return longTitle; - } - - public String getMainLongTitle() { - if (longTitle != null) return longTitle; else return shortTitle; + public String getTitle() { + return title; } public String[] getSectionTitles() { - if (sections != null) return sections; else return new String[]{getMainLongTitle()}; + if (sections != null) return sections; else return new String[]{getTitle()}; } public String getAbstract() { - if (abstrct != null) return abstrct; else return getMainLongTitle(); + if (abstrct != null) return abstrct; else return getTitle(); } public String getAuthor() { diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 88d6c10d8..21ffdf069 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -411,7 +411,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { if (page != null) { if ((!(query.constraint.equals(plasmaSearchQuery.catchall_constraint))) && (query.constraint.get(plasmaCondenser.flag_cat_indexof)) && - (!(page.comp().descr().startsWith("Index of")))) { + (!(page.comp().title().startsWith("Index of")))) { log.logFine("filtered out " + page.comp().url().toString()); // filter out bad results Iterator wi = query.queryHashes.iterator(); diff --git a/source/de/anomic/plasma/plasmaSearchPostOrder.java b/source/de/anomic/plasma/plasmaSearchPostOrder.java index f2d80bb5d..bbc475684 100644 --- a/source/de/anomic/plasma/plasmaSearchPostOrder.java +++ b/source/de/anomic/plasma/plasmaSearchPostOrder.java @@ -111,9 +111,9 @@ public final class plasmaSearchPostOrder { // take out relevant information for reference computation indexURLEntry.Components comp = page.comp(); - if ((comp.url() == null) || (comp.descr() == null)) return; + if ((comp.url() == null) || (comp.title() == null)) return; String[] urlcomps = htmlFilterContentScraper.urlComps(comp.url().toNormalform()); // word components of the url - String[] descrcomps = comp.descr().toLowerCase().split(htmlFilterContentScraper.splitrex); // words in the description + String[] descrcomps = comp.title().toLowerCase().split(htmlFilterContentScraper.splitrex); // words in the description // store everything results.add(new Object[] {page, urlcomps, descrcomps, preranking}); diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java index d5699ac49..084897d74 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java @@ -309,7 +309,7 @@ public class plasmaSearchRankingProfile { // prefer hit with 'prefer' pattern indexURLEntry.Components comp = page.comp(); if (comp.url().toNormalform().matches(query.prefer)) ranking += 256 << coeff_prefer; - if (comp.descr().matches(query.prefer)) ranking += 256 << coeff_prefer; + if (comp.title().matches(query.prefer)) ranking += 256 << coeff_prefer; // apply 'common-sense' heuristic using references for (int j = 0; j < urlcomps.length; j++) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index f916ed0f7..204ca62b3 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -2195,7 +2195,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser /* ========================================================================= * CREATE INDEX * ========================================================================= */ - String docDescription = document.getMainLongTitle(); + String docDescription = document.getTitle(); URL referrerURL = entry.referrerURL(); String referrerUrlHash = plasmaURL.urlHash(referrerURL); if (referrerUrlHash == null) referrerUrlHash = plasmaURL.dummyHash; @@ -2334,7 +2334,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser urlHash, urlLength, urlComps, wordStat.count, - document.getMainLongTitle().length(), + document.getTitle().length(), condenser.words().size(), condenser.sentences().size(), wordStat.posInText, @@ -2763,7 +2763,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser filename = comp.url().getFile(); if ((seed == null) || ((address = seed.getAddress()) == null)) { // seed is not known from here - wordIndex.removeWordReferences(plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8").keySet(), urlentry.hash()); + wordIndex.removeWordReferences(plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.title()).getBytes(), "UTF-8").keySet(), urlentry.hash()); wordIndex.loadedURL.remove(urlentry.hash()); // clean up continue; // next result } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 4d929ab68..f2322228e 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -296,7 +296,7 @@ public final class plasmaWordIndex implements indexRI { wprop = (plasmaCondenser.wordStatProp) wentry.getValue(); assert (wprop.flags != null); ientry = new indexRWIEntryNew(urlHash, - urlLength, urlComps, (document == null) ? urlLength : document.getMainLongTitle().length(), + urlLength, urlComps, (document == null) ? urlLength : document.getTitle().length(), wprop.count, condenser.words().size(), condenser.sentences().size(), diff --git a/source/yacy.java b/source/yacy.java index 60039f149..30e12da80 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -1110,7 +1110,7 @@ public final class yacy { indexURLEntry.Components comp = entry.comp(); if ((entry != null) && (comp.url() != null)) { if (html) { - bos.write(("" + comp.descr() + "
").getBytes("UTF-8")); + bos.write(("" + comp.title() + "
").getBytes("UTF-8")); bos.write(serverCore.crlf); } else { bos.write(comp.url().toNormalform().getBytes()); @@ -1189,7 +1189,7 @@ public final class yacy { comp = oldentry.comp(); newentry = sb.wordIndex.loadedURL.newEntry( comp.url(), - comp.descr(), + comp.title(), "", "", "",