*) tried to get rid of warnings when compiling parsers (http://forum.yacy-websuche.de/viewtopic.php?t=660)

lots of warnings are gone, new one in htmlFilterContentScraper


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4293 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 17 years ago
parent 4dc438f7e7
commit b08f877e97

@ -70,8 +70,8 @@ import de.anomic.yacy.yacyURL;
public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper {
// statics: for initialisation of the HTMLFilterAbstractScraper
private static TreeSet linkTags0;
private static TreeSet linkTags1;
private static TreeSet<String> linkTags0;
private static TreeSet<String> linkTags1;
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
@ -80,7 +80,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
static {
linkTags0 = new TreeSet(insensitiveCollator);
linkTags0 = new TreeSet<String>(insensitiveCollator);
linkTags0.add("img");
linkTags0.add("base");
linkTags0.add("frame");
@ -90,7 +90,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
linkTags0.add("embed"); //added by [MN]
linkTags0.add("param"); //added by [MN]
linkTags1 = new TreeSet(insensitiveCollator);
linkTags1 = new TreeSet<String>(insensitiveCollator);
linkTags1.add("a");
linkTags1.add("h1");
linkTags1.add("h2");
@ -100,9 +100,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
// class variables: collectors for links
private HashMap anchors;
private TreeSet images; // String(absolute url)/ImageEntry relation
private HashMap metas;
private HashMap<String, String> anchors;
private TreeSet<htmlFilterImageEntry> images; // String(absolute url)/ImageEntry relation
private HashMap<String, String> metas;
private String title;
//private String headline;
private List[] headlines;
@ -124,12 +124,12 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
// it is only the reference for relative links
super(linkTags0, linkTags1);
this.root = root;
this.anchors = new HashMap();
this.images = new TreeSet();
this.metas = new HashMap();
this.anchors = new HashMap<String, String>();
this.images = new TreeSet<htmlFilterImageEntry>();
this.metas = new HashMap<String, String>();
this.title = "";
this.headlines = new ArrayList[4];
for (int i = 0; i < 4; i++) headlines[i] = new ArrayList();
for (int i = 0; i < 4; i++) headlines[i] = new ArrayList<String>();
this.content = new serverCharBuffer(1024);
}
@ -311,7 +311,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
}
// othervise take any headline
// otherwise take any headline
for (int i = 0; i < 4; i++) {
if (headlines[i].size() > 0) return (String) headlines[i].get(0);
}
@ -346,17 +346,17 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
}
public Map getAnchors() {
public Map<String, String> getAnchors() {
// returns a url (String) / name (String) relation
return anchors;
}
public TreeSet getImages() {
public TreeSet<htmlFilterImageEntry> getImages() {
// this resturns a String(absolute url)/htmlFilterImageEntry - relation
return images;
}
public Map getMetas() {
public Map<String, String> getMetas() {
return metas;
}

@ -63,7 +63,7 @@ public class bzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static String fileExtensions = "bz2,tbz,tbz2";
static {
SUPPORTED_MIME_TYPES.put("application/x-bzip2",fileExtensions);

@ -60,7 +60,7 @@ public class docParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { SUPPORTED_MIME_TYPES.put("application/msword","doc"); }
/**

@ -62,7 +62,7 @@ public class gzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/x-gzip","gz,tgz");
SUPPORTED_MIME_TYPES.put("application/gzip","gz,tgz");

@ -70,7 +70,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("text/xml","xml");
SUPPORTED_MIME_TYPES.put("application/xml","xml");
@ -96,7 +96,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
* Helping structure used to detect loops in the mimeType detection
* process
*/
private static Hashtable threadLoopDetection = new Hashtable();
private static Hashtable<Thread, Integer> threadLoopDetection = new Hashtable<Thread, Integer>();
public mimeTypeParser() {
super(LIBX_DEPENDENCIES);

@ -74,7 +74,7 @@ public class odtParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/vnd.oasis.opendocument.text","odt");
SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt");

@ -70,7 +70,7 @@ public class pdfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { SUPPORTED_MIME_TYPES.put("application/pdf","pdf"); }
/**

@ -62,7 +62,7 @@ public class pptParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/mspowerpoint","ppt,pps");
SUPPORTED_MIME_TYPES.put("application/powerpoint","ppt,pps");

@ -65,7 +65,7 @@ public class psParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/postscript","ps");
SUPPORTED_MIME_TYPES.put("text/postscript","ps");

@ -70,7 +70,7 @@ public class rpmParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/x-rpm","rpm");
SUPPORTED_MIME_TYPES.put("application/x-redhat packet manager","rpm");
@ -110,7 +110,7 @@ public class rpmParser extends AbstractParser implements Parser {
RPMFile rpmFile = null;
try {
String summary = null, description = null, packager = null, name = sourceFile.getName();
HashMap anchors = new HashMap();
HashMap<String, String> anchors = new HashMap<String, String>();
StringBuffer content = new StringBuffer();
// opening the rpm file

@ -73,7 +73,7 @@ public class rssParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("text/rss","xml,rss,rdf");
SUPPORTED_MIME_TYPES.put("application/rdf+xml","xml,rss,rdf");
@ -95,9 +95,9 @@ public class rssParser extends AbstractParser implements Parser {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
try {
LinkedList feedSections = new LinkedList();
HashMap anchors = new HashMap();
TreeSet images = new TreeSet();
LinkedList<String> feedSections = new LinkedList<String>();
HashMap<String, String> anchors = new HashMap<String, String>();
TreeSet<htmlFilterImageEntry> images = new TreeSet<htmlFilterImageEntry>();
serverByteBuffer text = new serverByteBuffer();
serverCharBuffer authors = new serverCharBuffer();
@ -149,12 +149,12 @@ public class rssParser extends AbstractParser implements Parser {
feedSections.add(itemHeadline);
}
Map itemLinks = scraper.getAnchors();
Map<String, String> itemLinks = scraper.getAnchors();
if ((itemLinks != null) && (itemLinks.size() > 0)) {
anchors.putAll(itemLinks);
}
TreeSet itemImages = scraper.getImages();
TreeSet<htmlFilterImageEntry> itemImages = scraper.getImages();
if ((itemImages != null) && (itemImages.size() > 0)) {
images.addAll(itemImages);
}

@ -61,7 +61,7 @@ public class rtfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/rtf","rtf");
SUPPORTED_MIME_TYPES.put("text/rtf","rtf");

@ -93,7 +93,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
case IInArchive.NExtract_NAskMode_kSkip:
this.log.logFine("Skipping " + this.filePath);
break;
};
}
}
public void SetOperationResult(int arg0) throws IOException {
@ -126,7 +126,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
}
// revert the above workaround
Map nanchors = new HashMap(theDoc.getAnchors().size(), 1f);
Map<String, String> nanchors = new HashMap<String, String>(theDoc.getAnchors().size(), 1f);
Iterator it = theDoc.getAnchors().entrySet().iterator();
Map.Entry entry;
String base = doc.getLocation().toNormalform(false, true);
@ -135,9 +135,9 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
if (((String)entry.getKey()).startsWith(base + "/")) {
String ref = "#" + ((String)entry.getKey()).substring(base.length() + 1);
this.log.logFinest("changing " + entry.getKey() + " to use reference " + ref);
nanchors.put(base + ref, entry.getValue());
nanchors.put(base + ref, (String)entry.getValue());
} else {
nanchors.put(entry.getKey(), entry.getValue());
nanchors.put((String)entry.getKey(), (String)entry.getValue());
}
}
theDoc.getAnchors().clear();

@ -65,7 +65,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z");
}

@ -61,7 +61,7 @@ public class swfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash","swf");
SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash2-preview","swf");
@ -101,7 +101,7 @@ public class swfParser extends AbstractParser implements Parser {
String[] sections = null;
String abstrct = null;
//TreeSet images = null;
HashMap anchors = new HashMap();
HashMap<String, String> anchors = new HashMap<String, String>();
int urls = 0;
int urlStart = -1;
int urlEnd = 0;

@ -59,6 +59,7 @@ import java.util.zip.GZIPInputStream;
import com.ice.tar.TarEntry;
import com.ice.tar.TarInputStream;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
@ -74,7 +75,7 @@ public class tarParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/x-tar","tar");
SUPPORTED_MIME_TYPES.put("application/tar","tar");
@ -127,11 +128,11 @@ public class tarParser extends AbstractParser implements Parser {
StringBuffer docKeywords = new StringBuffer();
StringBuffer docLongTitle = new StringBuffer();
LinkedList docSections = new LinkedList();
LinkedList<String> docSections = new LinkedList<String>();
StringBuffer docAbstrct = new StringBuffer();
Map docAnchors = new HashMap();
TreeSet docImages = new TreeSet();
Map<String, String> docAnchors = new HashMap<String, String>();
TreeSet<htmlFilterImageEntry> docImages = new TreeSet<htmlFilterImageEntry>();
// looping through the contained files
TarEntry entry;

@ -76,7 +76,7 @@ public class vcfParser extends AbstractParser implements Parser {
*
* TODO: support of x-mozilla-cpt and x-mozilla-html tags
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("text/x-vcard","vcf");
SUPPORTED_MIME_TYPES.put("application/vcard","vcf");
@ -102,9 +102,9 @@ public class vcfParser extends AbstractParser implements Parser {
try {
StringBuffer parsedTitle = new StringBuffer();
StringBuffer parsedDataText = new StringBuffer();
HashMap parsedData = new HashMap();
HashMap anchors = new HashMap();
LinkedList parsedNames = new LinkedList();
HashMap<String, String> parsedData = new HashMap<String, String>();
HashMap<String, String> anchors = new HashMap<String, String>();
LinkedList<String> parsedNames = new LinkedList<String>();
boolean useLastLine = false;
int lineNr = 0;

@ -74,7 +74,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/msexcel","xls");
SUPPORTED_MIME_TYPES.put("application/excel","xls");

@ -43,6 +43,7 @@
package de.anomic.plasma.parser.zip;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
@ -72,7 +73,7 @@ public class zipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/zip","zip");
SUPPORTED_MIME_TYPES.put("application/x-zip","zip");
@ -91,7 +92,7 @@ public class zipParser extends AbstractParser implements Parser {
this.parserName = "Compressed Archive File Parser";
}
public Hashtable getSupportedMimeTypes() {
public Hashtable<String, String> getSupportedMimeTypes() {
return SUPPORTED_MIME_TYPES;
}
@ -111,10 +112,10 @@ public class zipParser extends AbstractParser implements Parser {
StringBuffer docKeywords = new StringBuffer();
StringBuffer docLongTitle = new StringBuffer();
LinkedList docSections = new LinkedList();
LinkedList<String> docSections = new LinkedList<String>();
StringBuffer docAbstrct = new StringBuffer();
Map docAnchors = new HashMap();
TreeSet docImages = new TreeSet();
Map<String, String> docAnchors = new HashMap<String, String>();
TreeSet<htmlFilterImageEntry> docImages = new TreeSet<htmlFilterImageEntry>();
// creating a new parser class to parse the unzipped content
plasmaParser theParser = new plasmaParser();

@ -66,7 +66,7 @@ public final class plasmaParser {
public static final String PARSER_MODE_URLREDIRECTOR = "URLREDIRECTOR";
public static final String PARSER_MODE_ICAP = "ICAP";
public static final String PARSER_MODE_IMAGE = "IMAGE";
public static final HashSet PARSER_MODE = new HashSet(Arrays.asList(new String[]{
public static final HashSet<String> PARSER_MODE = new HashSet<String>(Arrays.asList(new String[]{
PARSER_MODE_PROXY,
PARSER_MODE_CRAWLER,
PARSER_MODE_ICAP,
@ -74,7 +74,7 @@ public final class plasmaParser {
PARSER_MODE_IMAGE
}));
private static final HashMap parserConfigList = new HashMap();
private static final HashMap<String, plasmaParserConfig> parserConfigList = new HashMap<String, plasmaParserConfig>();
/**
* A list containing all installed parsers and the mimeType that they support
@ -85,8 +85,8 @@ public final class plasmaParser {
/**
* A list of file extensions and mime types that are supported by the html-parser
*/
public static final HashSet supportedHTMLFileExt = new HashSet();
public static final HashSet supportedHTMLMimeTypes = new HashSet();
public static final HashSet<String> supportedHTMLFileExt = new HashSet<String>();
public static final HashSet<String> supportedHTMLMimeTypes = new HashSet<String>();
private static final Properties mimeTypeLookupByFileExt = new Properties();
static {
@ -104,15 +104,15 @@ public final class plasmaParser {
/**
* A list of media extensions that should <b>not</b> be handled by the plasmaParser
*/
private static final HashSet mediaExtSet = new HashSet();
private static final HashSet<String> mediaExtSet = new HashSet<String>();
/**
* A list of image, audio, video and application extensions
*/
private static final HashSet imageExtSet = new HashSet();
private static final HashSet audioExtSet = new HashSet();
private static final HashSet videoExtSet = new HashSet();
private static final HashSet appsExtSet = new HashSet();
private static final HashSet<String> imageExtSet = new HashSet<String>();
private static final HashSet<String> audioExtSet = new HashSet<String>();
private static final HashSet<String> videoExtSet = new HashSet<String>();
private static final HashSet<String> appsExtSet = new HashSet<String>();
/**
* This {@link FilenameFilter} is used to find all classes based on there filenames
@ -181,7 +181,7 @@ public final class plasmaParser {
* yacy html parser
*/
public static void initHTMLParsableMimeTypes(String htmlParsableMimeTypes) {
LinkedList mimeTypes = new LinkedList();
LinkedList<String> mimeTypes = new LinkedList<String>();
if ((htmlParsableMimeTypes == null) || (htmlParsableMimeTypes.length() == 0)) {
return;
}
@ -195,8 +195,8 @@ public final class plasmaParser {
}
}
public static List extString2extList(String extString) {
LinkedList extensions = new LinkedList();
public static List<String> extString2extList(String extString) {
LinkedList<String> extensions = new LinkedList<String>();
if ((extString == null) || (extString.length() == 0)) {
return extensions;
} else {
@ -206,35 +206,35 @@ public final class plasmaParser {
return extensions;
}
public static void initMediaExt(List mediaExtList) {
public static void initMediaExt(List<String> mediaExtList) {
synchronized (mediaExtSet) {
mediaExtSet.clear();
mediaExtSet.addAll(mediaExtList);
}
}
public static void initImageExt(List imageExtList) {
public static void initImageExt(List<String> imageExtList) {
synchronized (imageExtSet) {
imageExtSet.clear();
imageExtSet.addAll(imageExtList);
}
}
public static void initAudioExt(List audioExtList) {
public static void initAudioExt(List<String> audioExtList) {
synchronized (audioExtSet) {
audioExtSet.clear();
audioExtSet.addAll(audioExtList);
}
}
public static void initVideoExt(List videoExtList) {
public static void initVideoExt(List<String> videoExtList) {
synchronized (videoExtSet) {
videoExtSet.clear();
videoExtSet.addAll(videoExtList);
}
}
public static void initAppsExt(List appsExtList) {
public static void initAppsExt(List<String> appsExtList) {
synchronized (appsExtSet) {
appsExtSet.clear();
appsExtSet.addAll(appsExtList);
@ -247,7 +247,7 @@ public final class plasmaParser {
}
}
public static void initSupportedHTMLFileExt(List supportedRealtimeFileExtList) {
public static void initSupportedHTMLFileExt(List<String> supportedRealtimeFileExtList) {
synchronized (supportedHTMLFileExt) {
supportedHTMLFileExt.clear();
supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
@ -750,10 +750,10 @@ public final class plasmaParser {
}
static Map allReflinks(Set links) {
static Map<String, String> allReflinks(Set links) {
// links is either a Set of Strings (with urls) or htmlFilterImageEntries
// we find all links that are part of a reference inside a url
HashMap v = new HashMap();
HashMap<String, String> v = new HashMap<String, String>();
Iterator i = links.iterator();
Object o;
String url;
@ -784,9 +784,9 @@ public final class plasmaParser {
return v;
}
static Map allSubpaths(Set links) {
static Map<String, String> allSubpaths(Set links) {
// links is either a Set of Strings (urls) or a Set of htmlFilterImageEntries
HashMap v = new HashMap();
HashMap<String, String> v = new HashMap<String, String>();
Iterator i = links.iterator();
Object o;
String url;

@ -68,21 +68,21 @@ import de.anomic.plasma.parser.Parser;
public class plasmaParserDocument {
private yacyURL location; // the source url
private String mimeType; // mimeType as taken from http header
private String charset; // the charset of the document
private List keywords; // most resources provide a keyword field
private StringBuffer title; // a document title, taken from title or h1 tag; shall appear as headline of search result
private StringBuffer author; // author or copyright
private List sections; // if present: more titles/headlines appearing in the document
private StringBuffer abstrct; // an abstract, if present: short content description
private Object text; // the clear text, all that is visible
private Map anchors; // all links embedded as clickeable entities (anchor tags)
private TreeSet images; // all visible pictures in document
private String mimeType; // mimeType as taken from http header
private String charset; // the charset of the document
private List<String> keywords; // most resources provide a keyword field
private StringBuffer title; // a document title, taken from title or h1 tag; shall appear as headline of search result
private StringBuffer author; // author or copyright
private List<String> sections; // if present: more titles/headlines appearing in the document
private StringBuffer abstrct; // an abstract, if present: short content description
private Object text; // the clear text, all that is visible
private Map<String, String> anchors; // all links embedded as clickeable entities (anchor tags)
private TreeSet<htmlFilterImageEntry> images; // all visible pictures in document
// the anchors and images - Maps are URL-to-EntityDescription mappings.
// The EntityDescription appear either as visible text in anchors or as alternative
// text in image tags.
private Map hyperlinks, audiolinks, videolinks, applinks;
private Map emaillinks;
private Map<String, String> hyperlinks, audiolinks, videolinks, applinks;
private Map<String, String> emaillinks;
private yacyURL favicon;
private boolean resorted;
private InputStream textStream;
@ -90,17 +90,17 @@ public class plasmaParserDocument {
protected plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author,
String[] sections, String abstrct,
Object text, Map anchors, TreeSet images) {
Object text, Map<String, String> anchors, TreeSet<htmlFilterImageEntry> images) {
this.location = location;
this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType;
this.charset = charset;
this.keywords = (keywords == null) ? new LinkedList() : Arrays.asList(keywords);
this.keywords = (keywords == null) ? new LinkedList<String>() : Arrays.asList(keywords);
this.title = (title == null) ? new StringBuffer() : new StringBuffer(title);
this.author = (author == null) ? new StringBuffer() : new StringBuffer(author);
this.sections = (sections == null) ? new LinkedList() : Arrays.asList(sections);
this.sections = (sections == null) ? new LinkedList<String>() : Arrays.asList(sections);
this.abstrct = (abstrct == null) ? new StringBuffer() : new StringBuffer(abstrct);
this.anchors = (anchors == null) ? new HashMap(0) : anchors;
this.images = (images == null) ? new TreeSet() : images;
this.anchors = (anchors == null) ? new HashMap<String, String>(0) : anchors;
this.images = (images == null) ? new TreeSet<htmlFilterImageEntry>() : images;
this.hyperlinks = null;
this.audiolinks = null;
this.videolinks = null;
@ -125,21 +125,21 @@ public class plasmaParserDocument {
public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author,
String[] sections, String abstrct,
byte[] text, Map anchors, TreeSet images) {
byte[] text, Map<String, String> anchors, TreeSet<htmlFilterImageEntry> images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
}
public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author,
String[] sections, String abstrct,
File text, Map anchors, TreeSet images) {
File text, Map<String, String> anchors, TreeSet<htmlFilterImageEntry> images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
}
public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author,
String[] sections, String abstrct,
serverCachedFileOutputStream text, Map anchors, TreeSet images) {
serverCachedFileOutputStream text, Map<String, String> anchors, TreeSet<htmlFilterImageEntry> images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
}
@ -238,7 +238,7 @@ public class plasmaParserDocument {
public String getKeywords(char separator) {
// sort out doubles and empty words
TreeSet hs = new TreeSet();
TreeSet<String> hs = new TreeSet<String>();
String s;
for (int i = 0; i < this.keywords.size(); i++) {
if (this.keywords.get(i) == null) continue;
@ -253,11 +253,11 @@ public class plasmaParserDocument {
return sb.substring(0, sb.length() - 1);
}
public List getKeywords() {
public List<String> getKeywords() {
return this.keywords;
}
public Map getAnchors() {
public Map<String, String> getAnchors() {
// returns all links embedded as anchors (clickeable entities)
// this is a url(String)/text(String) map
return anchors;
@ -266,35 +266,35 @@ public class plasmaParserDocument {
// the next three methods provide a calculated view on the getAnchors/getImages:
public Map getHyperlinks() {
public Map<String, String> getHyperlinks() {
// this is a subset of the getAnchor-set: only links to other hyperrefs
if (!resorted) resortLinks();
return hyperlinks;
}
public Map getAudiolinks() {
public Map<String, String> getAudiolinks() {
if (!resorted) resortLinks();
return this.audiolinks;
}
public Map getVideolinks() {
public Map<String, String> getVideolinks() {
if (!resorted) resortLinks();
return this.videolinks;
}
public TreeSet getImages() {
public TreeSet<htmlFilterImageEntry> getImages() {
// returns all links enbedded as pictures (visible in document)
// this resturns a htmlFilterImageEntry collection
if (!resorted) resortLinks();
return images;
}
public Map getApplinks() {
public Map<String, String> getApplinks() {
if (!resorted) resortLinks();
return this.applinks;
}
public Map getEmaillinks() {
public Map<String, String> getEmaillinks() {
// this is part of the getAnchor-set: only links to email addresses
if (!resorted) resortLinks();
return emaillinks;
@ -309,18 +309,18 @@ public class plasmaParserDocument {
int extpos, qpos;
String ext = null;
i = anchors.entrySet().iterator();
hyperlinks = new HashMap();
videolinks = new HashMap();
audiolinks = new HashMap();
applinks = new HashMap();
emaillinks = new HashMap();
TreeSet collectedImages = new TreeSet(); // this is a set that is collected now and joined later to the imagelinks
hyperlinks = new HashMap<String, String>();
videolinks = new HashMap<String, String>();
audiolinks = new HashMap<String, String>();
applinks = new HashMap<String, String>();
emaillinks = new HashMap<String, String>();
TreeSet<htmlFilterImageEntry> collectedImages = new TreeSet<htmlFilterImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
u = (String) entry.getKey();
if ((u != null) && (u.startsWith("mailto:"))) {
emaillinks.put(u.substring(7), entry.getValue());
emaillinks.put(u.substring(7), (String)entry.getValue());
} else {
extpos = u.lastIndexOf(".");
if (extpos > 0) {
@ -337,11 +337,11 @@ public class plasmaParserDocument {
if (plasmaParser.imageExtContains(ext)) {
collectedImages.add(new htmlFilterImageEntry(url, (String) entry.getValue(), -1, -1));
}
else if (plasmaParser.audioExtContains(ext)) audiolinks.put(u, entry.getValue());
else if (plasmaParser.videoExtContains(ext)) videolinks.put(u, entry.getValue());
else if (plasmaParser.appsExtContains(ext)) applinks.put(u, entry.getValue());
else if (plasmaParser.audioExtContains(ext)) audiolinks.put(u, (String)entry.getValue());
else if (plasmaParser.videoExtContains(ext)) videolinks.put(u, (String)entry.getValue());
else if (plasmaParser.appsExtContains(ext)) applinks.put(u, (String)entry.getValue());
} else {
hyperlinks.put(u, entry.getValue());
hyperlinks.put(u, (String)entry.getValue());
}
} catch (MalformedURLException e1) {
}
@ -356,11 +356,11 @@ public class plasmaParserDocument {
iEntry = (htmlFilterImageEntry) i.next();
if (!images.contains(iEntry)) images.add(iEntry);
}
// expand the hyperlinks:
// we add artificial hyperlinks to the hyperlink set
// that can be calculated from given hyperlinks and imagelinks
hyperlinks.putAll(plasmaParser.allReflinks(hyperlinks.keySet()));
hyperlinks.putAll(plasmaParser.allReflinks(images));
hyperlinks.putAll(plasmaParser.allReflinks(audiolinks.keySet()));
hyperlinks.putAll(plasmaParser.allReflinks(videolinks.keySet()));

Loading…
Cancel
Save