*) tried to get rid of warnings when compiling parsers (http://forum.yacy-websuche.de/viewtopic.php?t=660)

lots of warnings are gone, new one in htmlFilterContentScraper


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4293 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 17 years ago
parent 4dc438f7e7
commit b08f877e97

@ -70,8 +70,8 @@ import de.anomic.yacy.yacyURL;
public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper { public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper {
// statics: for initialisation of the HTMLFilterAbstractScraper // statics: for initialisation of the HTMLFilterAbstractScraper
private static TreeSet linkTags0; private static TreeSet<String> linkTags0;
private static TreeSet linkTags1; private static TreeSet<String> linkTags1;
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US); private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static { static {
@ -80,7 +80,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
} }
static { static {
linkTags0 = new TreeSet(insensitiveCollator); linkTags0 = new TreeSet<String>(insensitiveCollator);
linkTags0.add("img"); linkTags0.add("img");
linkTags0.add("base"); linkTags0.add("base");
linkTags0.add("frame"); linkTags0.add("frame");
@ -90,7 +90,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
linkTags0.add("embed"); //added by [MN] linkTags0.add("embed"); //added by [MN]
linkTags0.add("param"); //added by [MN] linkTags0.add("param"); //added by [MN]
linkTags1 = new TreeSet(insensitiveCollator); linkTags1 = new TreeSet<String>(insensitiveCollator);
linkTags1.add("a"); linkTags1.add("a");
linkTags1.add("h1"); linkTags1.add("h1");
linkTags1.add("h2"); linkTags1.add("h2");
@ -100,9 +100,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
} }
// class variables: collectors for links // class variables: collectors for links
private HashMap anchors; private HashMap<String, String> anchors;
private TreeSet images; // String(absolute url)/ImageEntry relation private TreeSet<htmlFilterImageEntry> images; // String(absolute url)/ImageEntry relation
private HashMap metas; private HashMap<String, String> metas;
private String title; private String title;
//private String headline; //private String headline;
private List[] headlines; private List[] headlines;
@ -124,12 +124,12 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
// it is only the reference for relative links // it is only the reference for relative links
super(linkTags0, linkTags1); super(linkTags0, linkTags1);
this.root = root; this.root = root;
this.anchors = new HashMap(); this.anchors = new HashMap<String, String>();
this.images = new TreeSet(); this.images = new TreeSet<htmlFilterImageEntry>();
this.metas = new HashMap(); this.metas = new HashMap<String, String>();
this.title = ""; this.title = "";
this.headlines = new ArrayList[4]; this.headlines = new ArrayList[4];
for (int i = 0; i < 4; i++) headlines[i] = new ArrayList(); for (int i = 0; i < 4; i++) headlines[i] = new ArrayList<String>();
this.content = new serverCharBuffer(1024); this.content = new serverCharBuffer(1024);
} }
@ -311,7 +311,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
} }
} }
// othervise take any headline // otherwise take any headline
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (headlines[i].size() > 0) return (String) headlines[i].get(0); if (headlines[i].size() > 0) return (String) headlines[i].get(0);
} }
@ -346,17 +346,17 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
} }
} }
public Map getAnchors() { public Map<String, String> getAnchors() {
// returns a url (String) / name (String) relation // returns a url (String) / name (String) relation
return anchors; return anchors;
} }
public TreeSet getImages() { public TreeSet<htmlFilterImageEntry> getImages() {
// this resturns a String(absolute url)/htmlFilterImageEntry - relation // this resturns a String(absolute url)/htmlFilterImageEntry - relation
return images; return images;
} }
public Map getMetas() { public Map<String, String> getMetas() {
return metas; return metas;
} }

@ -63,7 +63,7 @@ public class bzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static String fileExtensions = "bz2,tbz,tbz2"; static String fileExtensions = "bz2,tbz,tbz2";
static { static {
SUPPORTED_MIME_TYPES.put("application/x-bzip2",fileExtensions); SUPPORTED_MIME_TYPES.put("application/x-bzip2",fileExtensions);

@ -60,7 +60,7 @@ public class docParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { SUPPORTED_MIME_TYPES.put("application/msword","doc"); } static { SUPPORTED_MIME_TYPES.put("application/msword","doc"); }
/** /**

@ -62,7 +62,7 @@ public class gzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/x-gzip","gz,tgz"); SUPPORTED_MIME_TYPES.put("application/x-gzip","gz,tgz");
SUPPORTED_MIME_TYPES.put("application/gzip","gz,tgz"); SUPPORTED_MIME_TYPES.put("application/gzip","gz,tgz");

@ -70,7 +70,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("text/xml","xml"); SUPPORTED_MIME_TYPES.put("text/xml","xml");
SUPPORTED_MIME_TYPES.put("application/xml","xml"); SUPPORTED_MIME_TYPES.put("application/xml","xml");
@ -96,7 +96,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
* Helping structure used to detect loops in the mimeType detection * Helping structure used to detect loops in the mimeType detection
* process * process
*/ */
private static Hashtable threadLoopDetection = new Hashtable(); private static Hashtable<Thread, Integer> threadLoopDetection = new Hashtable<Thread, Integer>();
public mimeTypeParser() { public mimeTypeParser() {
super(LIBX_DEPENDENCIES); super(LIBX_DEPENDENCIES);

@ -74,7 +74,7 @@ public class odtParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/vnd.oasis.opendocument.text","odt"); SUPPORTED_MIME_TYPES.put("application/vnd.oasis.opendocument.text","odt");
SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt"); SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt");

@ -70,7 +70,7 @@ public class pdfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { SUPPORTED_MIME_TYPES.put("application/pdf","pdf"); } static { SUPPORTED_MIME_TYPES.put("application/pdf","pdf"); }
/** /**

@ -62,7 +62,7 @@ public class pptParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/mspowerpoint","ppt,pps"); SUPPORTED_MIME_TYPES.put("application/mspowerpoint","ppt,pps");
SUPPORTED_MIME_TYPES.put("application/powerpoint","ppt,pps"); SUPPORTED_MIME_TYPES.put("application/powerpoint","ppt,pps");

@ -65,7 +65,7 @@ public class psParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/postscript","ps"); SUPPORTED_MIME_TYPES.put("application/postscript","ps");
SUPPORTED_MIME_TYPES.put("text/postscript","ps"); SUPPORTED_MIME_TYPES.put("text/postscript","ps");

@ -70,7 +70,7 @@ public class rpmParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/x-rpm","rpm"); SUPPORTED_MIME_TYPES.put("application/x-rpm","rpm");
SUPPORTED_MIME_TYPES.put("application/x-redhat packet manager","rpm"); SUPPORTED_MIME_TYPES.put("application/x-redhat packet manager","rpm");
@ -110,7 +110,7 @@ public class rpmParser extends AbstractParser implements Parser {
RPMFile rpmFile = null; RPMFile rpmFile = null;
try { try {
String summary = null, description = null, packager = null, name = sourceFile.getName(); String summary = null, description = null, packager = null, name = sourceFile.getName();
HashMap anchors = new HashMap(); HashMap<String, String> anchors = new HashMap<String, String>();
StringBuffer content = new StringBuffer(); StringBuffer content = new StringBuffer();
// opening the rpm file // opening the rpm file

@ -73,7 +73,7 @@ public class rssParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("text/rss","xml,rss,rdf"); SUPPORTED_MIME_TYPES.put("text/rss","xml,rss,rdf");
SUPPORTED_MIME_TYPES.put("application/rdf+xml","xml,rss,rdf"); SUPPORTED_MIME_TYPES.put("application/rdf+xml","xml,rss,rdf");
@ -95,9 +95,9 @@ public class rssParser extends AbstractParser implements Parser {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException { public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
try { try {
LinkedList feedSections = new LinkedList(); LinkedList<String> feedSections = new LinkedList<String>();
HashMap anchors = new HashMap(); HashMap<String, String> anchors = new HashMap<String, String>();
TreeSet images = new TreeSet(); TreeSet<htmlFilterImageEntry> images = new TreeSet<htmlFilterImageEntry>();
serverByteBuffer text = new serverByteBuffer(); serverByteBuffer text = new serverByteBuffer();
serverCharBuffer authors = new serverCharBuffer(); serverCharBuffer authors = new serverCharBuffer();
@ -149,12 +149,12 @@ public class rssParser extends AbstractParser implements Parser {
feedSections.add(itemHeadline); feedSections.add(itemHeadline);
} }
Map itemLinks = scraper.getAnchors(); Map<String, String> itemLinks = scraper.getAnchors();
if ((itemLinks != null) && (itemLinks.size() > 0)) { if ((itemLinks != null) && (itemLinks.size() > 0)) {
anchors.putAll(itemLinks); anchors.putAll(itemLinks);
} }
TreeSet itemImages = scraper.getImages(); TreeSet<htmlFilterImageEntry> itemImages = scraper.getImages();
if ((itemImages != null) && (itemImages.size() > 0)) { if ((itemImages != null) && (itemImages.size() > 0)) {
images.addAll(itemImages); images.addAll(itemImages);
} }

@ -61,7 +61,7 @@ public class rtfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/rtf","rtf"); SUPPORTED_MIME_TYPES.put("application/rtf","rtf");
SUPPORTED_MIME_TYPES.put("text/rtf","rtf"); SUPPORTED_MIME_TYPES.put("text/rtf","rtf");

@ -93,7 +93,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
case IInArchive.NExtract_NAskMode_kSkip: case IInArchive.NExtract_NAskMode_kSkip:
this.log.logFine("Skipping " + this.filePath); this.log.logFine("Skipping " + this.filePath);
break; break;
}; }
} }
public void SetOperationResult(int arg0) throws IOException { public void SetOperationResult(int arg0) throws IOException {
@ -126,7 +126,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
} }
// revert the above workaround // revert the above workaround
Map nanchors = new HashMap(theDoc.getAnchors().size(), 1f); Map<String, String> nanchors = new HashMap<String, String>(theDoc.getAnchors().size(), 1f);
Iterator it = theDoc.getAnchors().entrySet().iterator(); Iterator it = theDoc.getAnchors().entrySet().iterator();
Map.Entry entry; Map.Entry entry;
String base = doc.getLocation().toNormalform(false, true); String base = doc.getLocation().toNormalform(false, true);
@ -135,9 +135,9 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
if (((String)entry.getKey()).startsWith(base + "/")) { if (((String)entry.getKey()).startsWith(base + "/")) {
String ref = "#" + ((String)entry.getKey()).substring(base.length() + 1); String ref = "#" + ((String)entry.getKey()).substring(base.length() + 1);
this.log.logFinest("changing " + entry.getKey() + " to use reference " + ref); this.log.logFinest("changing " + entry.getKey() + " to use reference " + ref);
nanchors.put(base + ref, entry.getValue()); nanchors.put(base + ref, (String)entry.getValue());
} else { } else {
nanchors.put(entry.getKey(), entry.getValue()); nanchors.put((String)entry.getKey(), (String)entry.getValue());
} }
} }
theDoc.getAnchors().clear(); theDoc.getAnchors().clear();

@ -65,7 +65,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z"); SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z");
} }

@ -61,7 +61,7 @@ public class swfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash","swf"); SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash","swf");
SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash2-preview","swf"); SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash2-preview","swf");
@ -101,7 +101,7 @@ public class swfParser extends AbstractParser implements Parser {
String[] sections = null; String[] sections = null;
String abstrct = null; String abstrct = null;
//TreeSet images = null; //TreeSet images = null;
HashMap anchors = new HashMap(); HashMap<String, String> anchors = new HashMap<String, String>();
int urls = 0; int urls = 0;
int urlStart = -1; int urlStart = -1;
int urlEnd = 0; int urlEnd = 0;

@ -59,6 +59,7 @@ import java.util.zip.GZIPInputStream;
import com.ice.tar.TarEntry; import com.ice.tar.TarEntry;
import com.ice.tar.TarInputStream; import com.ice.tar.TarInputStream;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument; import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser; import de.anomic.plasma.parser.AbstractParser;
@ -74,7 +75,7 @@ public class tarParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/x-tar","tar"); SUPPORTED_MIME_TYPES.put("application/x-tar","tar");
SUPPORTED_MIME_TYPES.put("application/tar","tar"); SUPPORTED_MIME_TYPES.put("application/tar","tar");
@ -127,11 +128,11 @@ public class tarParser extends AbstractParser implements Parser {
StringBuffer docKeywords = new StringBuffer(); StringBuffer docKeywords = new StringBuffer();
StringBuffer docLongTitle = new StringBuffer(); StringBuffer docLongTitle = new StringBuffer();
LinkedList docSections = new LinkedList(); LinkedList<String> docSections = new LinkedList<String>();
StringBuffer docAbstrct = new StringBuffer(); StringBuffer docAbstrct = new StringBuffer();
Map docAnchors = new HashMap(); Map<String, String> docAnchors = new HashMap<String, String>();
TreeSet docImages = new TreeSet(); TreeSet<htmlFilterImageEntry> docImages = new TreeSet<htmlFilterImageEntry>();
// looping through the contained files // looping through the contained files
TarEntry entry; TarEntry entry;

@ -76,7 +76,7 @@ public class vcfParser extends AbstractParser implements Parser {
* *
* TODO: support of x-mozilla-cpt and x-mozilla-html tags * TODO: support of x-mozilla-cpt and x-mozilla-html tags
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("text/x-vcard","vcf"); SUPPORTED_MIME_TYPES.put("text/x-vcard","vcf");
SUPPORTED_MIME_TYPES.put("application/vcard","vcf"); SUPPORTED_MIME_TYPES.put("application/vcard","vcf");
@ -102,9 +102,9 @@ public class vcfParser extends AbstractParser implements Parser {
try { try {
StringBuffer parsedTitle = new StringBuffer(); StringBuffer parsedTitle = new StringBuffer();
StringBuffer parsedDataText = new StringBuffer(); StringBuffer parsedDataText = new StringBuffer();
HashMap parsedData = new HashMap(); HashMap<String, String> parsedData = new HashMap<String, String>();
HashMap anchors = new HashMap(); HashMap<String, String> anchors = new HashMap<String, String>();
LinkedList parsedNames = new LinkedList(); LinkedList<String> parsedNames = new LinkedList<String>();
boolean useLastLine = false; boolean useLastLine = false;
int lineNr = 0; int lineNr = 0;

@ -74,7 +74,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/msexcel","xls"); SUPPORTED_MIME_TYPES.put("application/msexcel","xls");
SUPPORTED_MIME_TYPES.put("application/excel","xls"); SUPPORTED_MIME_TYPES.put("application/excel","xls");

@ -43,6 +43,7 @@
package de.anomic.plasma.parser.zip; package de.anomic.plasma.parser.zip;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
@ -72,7 +73,7 @@ public class zipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class * a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes() * @see #getSupportedMimeTypes()
*/ */
public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable(); public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static { static {
SUPPORTED_MIME_TYPES.put("application/zip","zip"); SUPPORTED_MIME_TYPES.put("application/zip","zip");
SUPPORTED_MIME_TYPES.put("application/x-zip","zip"); SUPPORTED_MIME_TYPES.put("application/x-zip","zip");
@ -91,7 +92,7 @@ public class zipParser extends AbstractParser implements Parser {
this.parserName = "Compressed Archive File Parser"; this.parserName = "Compressed Archive File Parser";
} }
public Hashtable getSupportedMimeTypes() { public Hashtable<String, String> getSupportedMimeTypes() {
return SUPPORTED_MIME_TYPES; return SUPPORTED_MIME_TYPES;
} }
@ -111,10 +112,10 @@ public class zipParser extends AbstractParser implements Parser {
StringBuffer docKeywords = new StringBuffer(); StringBuffer docKeywords = new StringBuffer();
StringBuffer docLongTitle = new StringBuffer(); StringBuffer docLongTitle = new StringBuffer();
LinkedList docSections = new LinkedList(); LinkedList<String> docSections = new LinkedList<String>();
StringBuffer docAbstrct = new StringBuffer(); StringBuffer docAbstrct = new StringBuffer();
Map docAnchors = new HashMap(); Map<String, String> docAnchors = new HashMap<String, String>();
TreeSet docImages = new TreeSet(); TreeSet<htmlFilterImageEntry> docImages = new TreeSet<htmlFilterImageEntry>();
// creating a new parser class to parse the unzipped content // creating a new parser class to parse the unzipped content
plasmaParser theParser = new plasmaParser(); plasmaParser theParser = new plasmaParser();

@ -66,7 +66,7 @@ public final class plasmaParser {
public static final String PARSER_MODE_URLREDIRECTOR = "URLREDIRECTOR"; public static final String PARSER_MODE_URLREDIRECTOR = "URLREDIRECTOR";
public static final String PARSER_MODE_ICAP = "ICAP"; public static final String PARSER_MODE_ICAP = "ICAP";
public static final String PARSER_MODE_IMAGE = "IMAGE"; public static final String PARSER_MODE_IMAGE = "IMAGE";
public static final HashSet PARSER_MODE = new HashSet(Arrays.asList(new String[]{ public static final HashSet<String> PARSER_MODE = new HashSet<String>(Arrays.asList(new String[]{
PARSER_MODE_PROXY, PARSER_MODE_PROXY,
PARSER_MODE_CRAWLER, PARSER_MODE_CRAWLER,
PARSER_MODE_ICAP, PARSER_MODE_ICAP,
@ -74,7 +74,7 @@ public final class plasmaParser {
PARSER_MODE_IMAGE PARSER_MODE_IMAGE
})); }));
private static final HashMap parserConfigList = new HashMap(); private static final HashMap<String, plasmaParserConfig> parserConfigList = new HashMap<String, plasmaParserConfig>();
/** /**
* A list containing all installed parsers and the mimeType that they support * A list containing all installed parsers and the mimeType that they support
@ -85,8 +85,8 @@ public final class plasmaParser {
/** /**
* A list of file extensions and mime types that are supported by the html-parser * A list of file extensions and mime types that are supported by the html-parser
*/ */
public static final HashSet supportedHTMLFileExt = new HashSet(); public static final HashSet<String> supportedHTMLFileExt = new HashSet<String>();
public static final HashSet supportedHTMLMimeTypes = new HashSet(); public static final HashSet<String> supportedHTMLMimeTypes = new HashSet<String>();
private static final Properties mimeTypeLookupByFileExt = new Properties(); private static final Properties mimeTypeLookupByFileExt = new Properties();
static { static {
@ -104,15 +104,15 @@ public final class plasmaParser {
/** /**
* A list of media extensions that should <b>not</b> be handled by the plasmaParser * A list of media extensions that should <b>not</b> be handled by the plasmaParser
*/ */
private static final HashSet mediaExtSet = new HashSet(); private static final HashSet<String> mediaExtSet = new HashSet<String>();
/** /**
* A list of image, audio, video and application extensions * A list of image, audio, video and application extensions
*/ */
private static final HashSet imageExtSet = new HashSet(); private static final HashSet<String> imageExtSet = new HashSet<String>();
private static final HashSet audioExtSet = new HashSet(); private static final HashSet<String> audioExtSet = new HashSet<String>();
private static final HashSet videoExtSet = new HashSet(); private static final HashSet<String> videoExtSet = new HashSet<String>();
private static final HashSet appsExtSet = new HashSet(); private static final HashSet<String> appsExtSet = new HashSet<String>();
/** /**
* This {@link FilenameFilter} is used to find all classes based on there filenames * This {@link FilenameFilter} is used to find all classes based on there filenames
@ -181,7 +181,7 @@ public final class plasmaParser {
* yacy html parser * yacy html parser
*/ */
public static void initHTMLParsableMimeTypes(String htmlParsableMimeTypes) { public static void initHTMLParsableMimeTypes(String htmlParsableMimeTypes) {
LinkedList mimeTypes = new LinkedList(); LinkedList<String> mimeTypes = new LinkedList<String>();
if ((htmlParsableMimeTypes == null) || (htmlParsableMimeTypes.length() == 0)) { if ((htmlParsableMimeTypes == null) || (htmlParsableMimeTypes.length() == 0)) {
return; return;
} }
@ -195,8 +195,8 @@ public final class plasmaParser {
} }
} }
public static List extString2extList(String extString) { public static List<String> extString2extList(String extString) {
LinkedList extensions = new LinkedList(); LinkedList<String> extensions = new LinkedList<String>();
if ((extString == null) || (extString.length() == 0)) { if ((extString == null) || (extString.length() == 0)) {
return extensions; return extensions;
} else { } else {
@ -206,35 +206,35 @@ public final class plasmaParser {
return extensions; return extensions;
} }
public static void initMediaExt(List mediaExtList) { public static void initMediaExt(List<String> mediaExtList) {
synchronized (mediaExtSet) { synchronized (mediaExtSet) {
mediaExtSet.clear(); mediaExtSet.clear();
mediaExtSet.addAll(mediaExtList); mediaExtSet.addAll(mediaExtList);
} }
} }
public static void initImageExt(List imageExtList) { public static void initImageExt(List<String> imageExtList) {
synchronized (imageExtSet) { synchronized (imageExtSet) {
imageExtSet.clear(); imageExtSet.clear();
imageExtSet.addAll(imageExtList); imageExtSet.addAll(imageExtList);
} }
} }
public static void initAudioExt(List audioExtList) { public static void initAudioExt(List<String> audioExtList) {
synchronized (audioExtSet) { synchronized (audioExtSet) {
audioExtSet.clear(); audioExtSet.clear();
audioExtSet.addAll(audioExtList); audioExtSet.addAll(audioExtList);
} }
} }
public static void initVideoExt(List videoExtList) { public static void initVideoExt(List<String> videoExtList) {
synchronized (videoExtSet) { synchronized (videoExtSet) {
videoExtSet.clear(); videoExtSet.clear();
videoExtSet.addAll(videoExtList); videoExtSet.addAll(videoExtList);
} }
} }
public static void initAppsExt(List appsExtList) { public static void initAppsExt(List<String> appsExtList) {
synchronized (appsExtSet) { synchronized (appsExtSet) {
appsExtSet.clear(); appsExtSet.clear();
appsExtSet.addAll(appsExtList); appsExtSet.addAll(appsExtList);
@ -247,7 +247,7 @@ public final class plasmaParser {
} }
} }
public static void initSupportedHTMLFileExt(List supportedRealtimeFileExtList) { public static void initSupportedHTMLFileExt(List<String> supportedRealtimeFileExtList) {
synchronized (supportedHTMLFileExt) { synchronized (supportedHTMLFileExt) {
supportedHTMLFileExt.clear(); supportedHTMLFileExt.clear();
supportedHTMLFileExt.addAll(supportedRealtimeFileExtList); supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
@ -750,10 +750,10 @@ public final class plasmaParser {
} }
static Map allReflinks(Set links) { static Map<String, String> allReflinks(Set links) {
// links is either a Set of Strings (with urls) or htmlFilterImageEntries // links is either a Set of Strings (with urls) or htmlFilterImageEntries
// we find all links that are part of a reference inside a url // we find all links that are part of a reference inside a url
HashMap v = new HashMap(); HashMap<String, String> v = new HashMap<String, String>();
Iterator i = links.iterator(); Iterator i = links.iterator();
Object o; Object o;
String url; String url;
@ -784,9 +784,9 @@ public final class plasmaParser {
return v; return v;
} }
static Map allSubpaths(Set links) { static Map<String, String> allSubpaths(Set links) {
// links is either a Set of Strings (urls) or a Set of htmlFilterImageEntries // links is either a Set of Strings (urls) or a Set of htmlFilterImageEntries
HashMap v = new HashMap(); HashMap<String, String> v = new HashMap<String, String>();
Iterator i = links.iterator(); Iterator i = links.iterator();
Object o; Object o;
String url; String url;

@ -68,21 +68,21 @@ import de.anomic.plasma.parser.Parser;
public class plasmaParserDocument { public class plasmaParserDocument {
private yacyURL location; // the source url private yacyURL location; // the source url
private String mimeType; // mimeType as taken from http header private String mimeType; // mimeType as taken from http header
private String charset; // the charset of the document private String charset; // the charset of the document
private List keywords; // most resources provide a keyword field private List<String> keywords; // most resources provide a keyword field
private StringBuffer title; // a document title, taken from title or h1 tag; shall appear as headline of search result private StringBuffer title; // a document title, taken from title or h1 tag; shall appear as headline of search result
private StringBuffer author; // author or copyright private StringBuffer author; // author or copyright
private List sections; // if present: more titles/headlines appearing in the document private List<String> sections; // if present: more titles/headlines appearing in the document
private StringBuffer abstrct; // an abstract, if present: short content description private StringBuffer abstrct; // an abstract, if present: short content description
private Object text; // the clear text, all that is visible private Object text; // the clear text, all that is visible
private Map anchors; // all links embedded as clickeable entities (anchor tags) private Map<String, String> anchors; // all links embedded as clickeable entities (anchor tags)
private TreeSet images; // all visible pictures in document private TreeSet<htmlFilterImageEntry> images; // all visible pictures in document
// the anchors and images - Maps are URL-to-EntityDescription mappings. // the anchors and images - Maps are URL-to-EntityDescription mappings.
// The EntityDescription appear either as visible text in anchors or as alternative // The EntityDescription appear either as visible text in anchors or as alternative
// text in image tags. // text in image tags.
private Map hyperlinks, audiolinks, videolinks, applinks; private Map<String, String> hyperlinks, audiolinks, videolinks, applinks;
private Map emaillinks; private Map<String, String> emaillinks;
private yacyURL favicon; private yacyURL favicon;
private boolean resorted; private boolean resorted;
private InputStream textStream; private InputStream textStream;
@ -90,17 +90,17 @@ public class plasmaParserDocument {
protected plasmaParserDocument(yacyURL location, String mimeType, String charset, protected plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author, String[] keywords, String title, String author,
String[] sections, String abstrct, String[] sections, String abstrct,
Object text, Map anchors, TreeSet images) { Object text, Map<String, String> anchors, TreeSet<htmlFilterImageEntry> images) {
this.location = location; this.location = location;
this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType; this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType;
this.charset = charset; this.charset = charset;
this.keywords = (keywords == null) ? new LinkedList() : Arrays.asList(keywords); this.keywords = (keywords == null) ? new LinkedList<String>() : Arrays.asList(keywords);
this.title = (title == null) ? new StringBuffer() : new StringBuffer(title); this.title = (title == null) ? new StringBuffer() : new StringBuffer(title);
this.author = (author == null) ? new StringBuffer() : new StringBuffer(author); this.author = (author == null) ? new StringBuffer() : new StringBuffer(author);
this.sections = (sections == null) ? new LinkedList() : Arrays.asList(sections); this.sections = (sections == null) ? new LinkedList<String>() : Arrays.asList(sections);
this.abstrct = (abstrct == null) ? new StringBuffer() : new StringBuffer(abstrct); this.abstrct = (abstrct == null) ? new StringBuffer() : new StringBuffer(abstrct);
this.anchors = (anchors == null) ? new HashMap(0) : anchors; this.anchors = (anchors == null) ? new HashMap<String, String>(0) : anchors;
this.images = (images == null) ? new TreeSet() : images; this.images = (images == null) ? new TreeSet<htmlFilterImageEntry>() : images;
this.hyperlinks = null; this.hyperlinks = null;
this.audiolinks = null; this.audiolinks = null;
this.videolinks = null; this.videolinks = null;
@ -125,21 +125,21 @@ public class plasmaParserDocument {
public plasmaParserDocument(yacyURL location, String mimeType, String charset, public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author, String[] keywords, String title, String author,
String[] sections, String abstrct, String[] sections, String abstrct,
byte[] text, Map anchors, TreeSet images) { byte[] text, Map<String, String> anchors, TreeSet<htmlFilterImageEntry> images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
} }
public plasmaParserDocument(yacyURL location, String mimeType, String charset, public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author, String[] keywords, String title, String author,
String[] sections, String abstrct, String[] sections, String abstrct,
File text, Map anchors, TreeSet images) { File text, Map<String, String> anchors, TreeSet<htmlFilterImageEntry> images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
} }
public plasmaParserDocument(yacyURL location, String mimeType, String charset, public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author, String[] keywords, String title, String author,
String[] sections, String abstrct, String[] sections, String abstrct,
serverCachedFileOutputStream text, Map anchors, TreeSet images) { serverCachedFileOutputStream text, Map<String, String> anchors, TreeSet<htmlFilterImageEntry> images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images); this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
} }
@ -238,7 +238,7 @@ public class plasmaParserDocument {
public String getKeywords(char separator) { public String getKeywords(char separator) {
// sort out doubles and empty words // sort out doubles and empty words
TreeSet hs = new TreeSet(); TreeSet<String> hs = new TreeSet<String>();
String s; String s;
for (int i = 0; i < this.keywords.size(); i++) { for (int i = 0; i < this.keywords.size(); i++) {
if (this.keywords.get(i) == null) continue; if (this.keywords.get(i) == null) continue;
@ -253,11 +253,11 @@ public class plasmaParserDocument {
return sb.substring(0, sb.length() - 1); return sb.substring(0, sb.length() - 1);
} }
public List getKeywords() { public List<String> getKeywords() {
return this.keywords; return this.keywords;
} }
public Map getAnchors() { public Map<String, String> getAnchors() {
// returns all links embedded as anchors (clickeable entities) // returns all links embedded as anchors (clickeable entities)
// this is a url(String)/text(String) map // this is a url(String)/text(String) map
return anchors; return anchors;
@ -266,35 +266,35 @@ public class plasmaParserDocument {
// the next three methods provide a calculated view on the getAnchors/getImages: // the next three methods provide a calculated view on the getAnchors/getImages:
public Map getHyperlinks() { public Map<String, String> getHyperlinks() {
// this is a subset of the getAnchor-set: only links to other hyperrefs // this is a subset of the getAnchor-set: only links to other hyperrefs
if (!resorted) resortLinks(); if (!resorted) resortLinks();
return hyperlinks; return hyperlinks;
} }
public Map getAudiolinks() { public Map<String, String> getAudiolinks() {
if (!resorted) resortLinks(); if (!resorted) resortLinks();
return this.audiolinks; return this.audiolinks;
} }
public Map getVideolinks() { public Map<String, String> getVideolinks() {
if (!resorted) resortLinks(); if (!resorted) resortLinks();
return this.videolinks; return this.videolinks;
} }
public TreeSet getImages() { public TreeSet<htmlFilterImageEntry> getImages() {
// returns all links enbedded as pictures (visible in document) // returns all links enbedded as pictures (visible in document)
// this resturns a htmlFilterImageEntry collection // this resturns a htmlFilterImageEntry collection
if (!resorted) resortLinks(); if (!resorted) resortLinks();
return images; return images;
} }
public Map getApplinks() { public Map<String, String> getApplinks() {
if (!resorted) resortLinks(); if (!resorted) resortLinks();
return this.applinks; return this.applinks;
} }
public Map getEmaillinks() { public Map<String, String> getEmaillinks() {
// this is part of the getAnchor-set: only links to email addresses // this is part of the getAnchor-set: only links to email addresses
if (!resorted) resortLinks(); if (!resorted) resortLinks();
return emaillinks; return emaillinks;
@ -309,18 +309,18 @@ public class plasmaParserDocument {
int extpos, qpos; int extpos, qpos;
String ext = null; String ext = null;
i = anchors.entrySet().iterator(); i = anchors.entrySet().iterator();
hyperlinks = new HashMap(); hyperlinks = new HashMap<String, String>();
videolinks = new HashMap(); videolinks = new HashMap<String, String>();
audiolinks = new HashMap(); audiolinks = new HashMap<String, String>();
applinks = new HashMap(); applinks = new HashMap<String, String>();
emaillinks = new HashMap(); emaillinks = new HashMap<String, String>();
TreeSet collectedImages = new TreeSet(); // this is a set that is collected now and joined later to the imagelinks TreeSet<htmlFilterImageEntry> collectedImages = new TreeSet<htmlFilterImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
Map.Entry entry; Map.Entry entry;
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = (Map.Entry) i.next();
u = (String) entry.getKey(); u = (String) entry.getKey();
if ((u != null) && (u.startsWith("mailto:"))) { if ((u != null) && (u.startsWith("mailto:"))) {
emaillinks.put(u.substring(7), entry.getValue()); emaillinks.put(u.substring(7), (String)entry.getValue());
} else { } else {
extpos = u.lastIndexOf("."); extpos = u.lastIndexOf(".");
if (extpos > 0) { if (extpos > 0) {
@ -337,11 +337,11 @@ public class plasmaParserDocument {
if (plasmaParser.imageExtContains(ext)) { if (plasmaParser.imageExtContains(ext)) {
collectedImages.add(new htmlFilterImageEntry(url, (String) entry.getValue(), -1, -1)); collectedImages.add(new htmlFilterImageEntry(url, (String) entry.getValue(), -1, -1));
} }
else if (plasmaParser.audioExtContains(ext)) audiolinks.put(u, entry.getValue()); else if (plasmaParser.audioExtContains(ext)) audiolinks.put(u, (String)entry.getValue());
else if (plasmaParser.videoExtContains(ext)) videolinks.put(u, entry.getValue()); else if (plasmaParser.videoExtContains(ext)) videolinks.put(u, (String)entry.getValue());
else if (plasmaParser.appsExtContains(ext)) applinks.put(u, entry.getValue()); else if (plasmaParser.appsExtContains(ext)) applinks.put(u, (String)entry.getValue());
} else { } else {
hyperlinks.put(u, entry.getValue()); hyperlinks.put(u, (String)entry.getValue());
} }
} catch (MalformedURLException e1) { } catch (MalformedURLException e1) {
} }
@ -356,11 +356,11 @@ public class plasmaParserDocument {
iEntry = (htmlFilterImageEntry) i.next(); iEntry = (htmlFilterImageEntry) i.next();
if (!images.contains(iEntry)) images.add(iEntry); if (!images.contains(iEntry)) images.add(iEntry);
} }
// expand the hyperlinks: // expand the hyperlinks:
// we add artificial hyperlinks to the hyperlink set // we add artificial hyperlinks to the hyperlink set
// that can be calculated from given hyperlinks and imagelinks // that can be calculated from given hyperlinks and imagelinks
hyperlinks.putAll(plasmaParser.allReflinks(hyperlinks.keySet()));
hyperlinks.putAll(plasmaParser.allReflinks(images)); hyperlinks.putAll(plasmaParser.allReflinks(images));
hyperlinks.putAll(plasmaParser.allReflinks(audiolinks.keySet())); hyperlinks.putAll(plasmaParser.allReflinks(audiolinks.keySet()));
hyperlinks.putAll(plasmaParser.allReflinks(videolinks.keySet())); hyperlinks.putAll(plasmaParser.allReflinks(videolinks.keySet()));

Loading…
Cancel
Save