diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index 59814fda3..6c702f241 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -70,8 +70,8 @@ import de.anomic.yacy.yacyURL;
public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper {
// statics: for initialisation of the HTMLFilterAbstractScraper
- private static TreeSet linkTags0;
- private static TreeSet linkTags1;
+ private static TreeSet linkTags0;
+ private static TreeSet linkTags1;
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
@@ -80,7 +80,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
static {
- linkTags0 = new TreeSet(insensitiveCollator);
+ linkTags0 = new TreeSet(insensitiveCollator);
linkTags0.add("img");
linkTags0.add("base");
linkTags0.add("frame");
@@ -90,7 +90,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
linkTags0.add("embed"); //added by [MN]
linkTags0.add("param"); //added by [MN]
- linkTags1 = new TreeSet(insensitiveCollator);
+ linkTags1 = new TreeSet(insensitiveCollator);
linkTags1.add("a");
linkTags1.add("h1");
linkTags1.add("h2");
@@ -100,9 +100,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
// class variables: collectors for links
- private HashMap anchors;
- private TreeSet images; // String(absolute url)/ImageEntry relation
- private HashMap metas;
+ private HashMap anchors;
+ private TreeSet images; // String(absolute url)/ImageEntry relation
+ private HashMap metas;
private String title;
//private String headline;
private List[] headlines;
@@ -124,12 +124,12 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
// it is only the reference for relative links
super(linkTags0, linkTags1);
this.root = root;
- this.anchors = new HashMap();
- this.images = new TreeSet();
- this.metas = new HashMap();
+ this.anchors = new HashMap();
+ this.images = new TreeSet();
+ this.metas = new HashMap();
this.title = "";
this.headlines = new ArrayList[4];
- for (int i = 0; i < 4; i++) headlines[i] = new ArrayList();
+ for (int i = 0; i < 4; i++) headlines[i] = new ArrayList();
this.content = new serverCharBuffer(1024);
}
@@ -311,7 +311,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
}
- // othervise take any headline
+ // otherwise take any headline
for (int i = 0; i < 4; i++) {
if (headlines[i].size() > 0) return (String) headlines[i].get(0);
}
@@ -346,17 +346,17 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
}
- public Map getAnchors() {
+ public Map getAnchors() {
// returns a url (String) / name (String) relation
return anchors;
}
- public TreeSet getImages() {
+ public TreeSet getImages() {
// this resturns a String(absolute url)/htmlFilterImageEntry - relation
return images;
}
- public Map getMetas() {
+ public Map getMetas() {
return metas;
}
diff --git a/source/de/anomic/plasma/parser/bzip/bzipParser.java b/source/de/anomic/plasma/parser/bzip/bzipParser.java
index 77bed5a05..747d2ad64 100644
--- a/source/de/anomic/plasma/parser/bzip/bzipParser.java
+++ b/source/de/anomic/plasma/parser/bzip/bzipParser.java
@@ -63,7 +63,7 @@ public class bzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static String fileExtensions = "bz2,tbz,tbz2";
static {
SUPPORTED_MIME_TYPES.put("application/x-bzip2",fileExtensions);
diff --git a/source/de/anomic/plasma/parser/doc/docParser.java b/source/de/anomic/plasma/parser/doc/docParser.java
index d25e4abcb..a3ef93d30 100644
--- a/source/de/anomic/plasma/parser/doc/docParser.java
+++ b/source/de/anomic/plasma/parser/doc/docParser.java
@@ -60,7 +60,7 @@ public class docParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static { SUPPORTED_MIME_TYPES.put("application/msword","doc"); }
/**
diff --git a/source/de/anomic/plasma/parser/gzip/gzipParser.java b/source/de/anomic/plasma/parser/gzip/gzipParser.java
index c057377f8..085e95598 100644
--- a/source/de/anomic/plasma/parser/gzip/gzipParser.java
+++ b/source/de/anomic/plasma/parser/gzip/gzipParser.java
@@ -62,7 +62,7 @@ public class gzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/x-gzip","gz,tgz");
SUPPORTED_MIME_TYPES.put("application/gzip","gz,tgz");
diff --git a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
index 02fda681b..4b4417f24 100644
--- a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
+++ b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
@@ -70,7 +70,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("text/xml","xml");
SUPPORTED_MIME_TYPES.put("application/xml","xml");
@@ -96,7 +96,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
* Helping structure used to detect loops in the mimeType detection
* process
*/
- private static Hashtable threadLoopDetection = new Hashtable();
+ private static Hashtable threadLoopDetection = new Hashtable();
public mimeTypeParser() {
super(LIBX_DEPENDENCIES);
diff --git a/source/de/anomic/plasma/parser/odt/odtParser.java b/source/de/anomic/plasma/parser/odt/odtParser.java
index 262e29760..ad8606df6 100644
--- a/source/de/anomic/plasma/parser/odt/odtParser.java
+++ b/source/de/anomic/plasma/parser/odt/odtParser.java
@@ -74,7 +74,7 @@ public class odtParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/vnd.oasis.opendocument.text","odt");
SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt");
diff --git a/source/de/anomic/plasma/parser/pdf/pdfParser.java b/source/de/anomic/plasma/parser/pdf/pdfParser.java
index 3609f98c0..98c212e14 100644
--- a/source/de/anomic/plasma/parser/pdf/pdfParser.java
+++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java
@@ -70,7 +70,7 @@ public class pdfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static { SUPPORTED_MIME_TYPES.put("application/pdf","pdf"); }
/**
diff --git a/source/de/anomic/plasma/parser/ppt/pptParser.java b/source/de/anomic/plasma/parser/ppt/pptParser.java
index bd276028f..1df0fd873 100644
--- a/source/de/anomic/plasma/parser/ppt/pptParser.java
+++ b/source/de/anomic/plasma/parser/ppt/pptParser.java
@@ -62,7 +62,7 @@ public class pptParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/mspowerpoint","ppt,pps");
SUPPORTED_MIME_TYPES.put("application/powerpoint","ppt,pps");
diff --git a/source/de/anomic/plasma/parser/ps/psParser.java b/source/de/anomic/plasma/parser/ps/psParser.java
index 3d7900647..0ecbf3ce4 100644
--- a/source/de/anomic/plasma/parser/ps/psParser.java
+++ b/source/de/anomic/plasma/parser/ps/psParser.java
@@ -65,7 +65,7 @@ public class psParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/postscript","ps");
SUPPORTED_MIME_TYPES.put("text/postscript","ps");
diff --git a/source/de/anomic/plasma/parser/rpm/rpmParser.java b/source/de/anomic/plasma/parser/rpm/rpmParser.java
index 1296eb689..b33f65012 100644
--- a/source/de/anomic/plasma/parser/rpm/rpmParser.java
+++ b/source/de/anomic/plasma/parser/rpm/rpmParser.java
@@ -70,7 +70,7 @@ public class rpmParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/x-rpm","rpm");
SUPPORTED_MIME_TYPES.put("application/x-redhat packet manager","rpm");
@@ -110,7 +110,7 @@ public class rpmParser extends AbstractParser implements Parser {
RPMFile rpmFile = null;
try {
String summary = null, description = null, packager = null, name = sourceFile.getName();
- HashMap anchors = new HashMap();
+ HashMap anchors = new HashMap();
StringBuffer content = new StringBuffer();
// opening the rpm file
diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java
index 4ee94ea4f..c8595b3ca 100644
--- a/source/de/anomic/plasma/parser/rss/rssParser.java
+++ b/source/de/anomic/plasma/parser/rss/rssParser.java
@@ -73,7 +73,7 @@ public class rssParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("text/rss","xml,rss,rdf");
SUPPORTED_MIME_TYPES.put("application/rdf+xml","xml,rss,rdf");
@@ -95,9 +95,9 @@ public class rssParser extends AbstractParser implements Parser {
public plasmaParserDocument parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
try {
- LinkedList feedSections = new LinkedList();
- HashMap anchors = new HashMap();
- TreeSet images = new TreeSet();
+ LinkedList feedSections = new LinkedList();
+ HashMap anchors = new HashMap();
+ TreeSet images = new TreeSet();
serverByteBuffer text = new serverByteBuffer();
serverCharBuffer authors = new serverCharBuffer();
@@ -149,12 +149,12 @@ public class rssParser extends AbstractParser implements Parser {
feedSections.add(itemHeadline);
}
- Map itemLinks = scraper.getAnchors();
+ Map itemLinks = scraper.getAnchors();
if ((itemLinks != null) && (itemLinks.size() > 0)) {
anchors.putAll(itemLinks);
}
- TreeSet itemImages = scraper.getImages();
+ TreeSet itemImages = scraper.getImages();
if ((itemImages != null) && (itemImages.size() > 0)) {
images.addAll(itemImages);
}
diff --git a/source/de/anomic/plasma/parser/rtf/rtfParser.java b/source/de/anomic/plasma/parser/rtf/rtfParser.java
index 133bbe20b..ed8a1128f 100644
--- a/source/de/anomic/plasma/parser/rtf/rtfParser.java
+++ b/source/de/anomic/plasma/parser/rtf/rtfParser.java
@@ -61,7 +61,7 @@ public class rtfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/rtf","rtf");
SUPPORTED_MIME_TYPES.put("text/rtf","rtf");
diff --git a/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java b/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java
index cea2a6066..2a254a7fe 100644
--- a/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java
+++ b/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java
@@ -93,7 +93,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
case IInArchive.NExtract_NAskMode_kSkip:
this.log.logFine("Skipping " + this.filePath);
break;
- };
+ }
}
public void SetOperationResult(int arg0) throws IOException {
@@ -126,7 +126,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
}
// revert the above workaround
- Map nanchors = new HashMap(theDoc.getAnchors().size(), 1f);
+ Map nanchors = new HashMap(theDoc.getAnchors().size(), 1f);
Iterator it = theDoc.getAnchors().entrySet().iterator();
Map.Entry entry;
String base = doc.getLocation().toNormalform(false, true);
@@ -135,9 +135,9 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
if (((String)entry.getKey()).startsWith(base + "/")) {
String ref = "#" + ((String)entry.getKey()).substring(base.length() + 1);
this.log.logFinest("changing " + entry.getKey() + " to use reference " + ref);
- nanchors.put(base + ref, entry.getValue());
+ nanchors.put(base + ref, (String)entry.getValue());
} else {
- nanchors.put(entry.getKey(), entry.getValue());
+ nanchors.put((String)entry.getKey(), (String)entry.getValue());
}
}
theDoc.getAnchors().clear();
diff --git a/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java b/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java
index 0c15fa30f..69d60e6da 100644
--- a/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java
+++ b/source/de/anomic/plasma/parser/sevenzip/sevenzipParser.java
@@ -65,7 +65,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z");
}
diff --git a/source/de/anomic/plasma/parser/swf/swfParser.java b/source/de/anomic/plasma/parser/swf/swfParser.java
index 9e47ff7db..32645f946 100644
--- a/source/de/anomic/plasma/parser/swf/swfParser.java
+++ b/source/de/anomic/plasma/parser/swf/swfParser.java
@@ -61,7 +61,7 @@ public class swfParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash","swf");
SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash2-preview","swf");
@@ -101,7 +101,7 @@ public class swfParser extends AbstractParser implements Parser {
String[] sections = null;
String abstrct = null;
//TreeSet images = null;
- HashMap anchors = new HashMap();
+ HashMap anchors = new HashMap();
int urls = 0;
int urlStart = -1;
int urlEnd = 0;
diff --git a/source/de/anomic/plasma/parser/tar/tarParser.java b/source/de/anomic/plasma/parser/tar/tarParser.java
index 40cdc345c..08df662a6 100644
--- a/source/de/anomic/plasma/parser/tar/tarParser.java
+++ b/source/de/anomic/plasma/parser/tar/tarParser.java
@@ -59,6 +59,7 @@ import java.util.zip.GZIPInputStream;
import com.ice.tar.TarEntry;
import com.ice.tar.TarInputStream;
+import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
@@ -74,7 +75,7 @@ public class tarParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/x-tar","tar");
SUPPORTED_MIME_TYPES.put("application/tar","tar");
@@ -127,11 +128,11 @@ public class tarParser extends AbstractParser implements Parser {
StringBuffer docKeywords = new StringBuffer();
StringBuffer docLongTitle = new StringBuffer();
- LinkedList docSections = new LinkedList();
+ LinkedList docSections = new LinkedList();
StringBuffer docAbstrct = new StringBuffer();
- Map docAnchors = new HashMap();
- TreeSet docImages = new TreeSet();
+ Map docAnchors = new HashMap();
+ TreeSet docImages = new TreeSet();
// looping through the contained files
TarEntry entry;
diff --git a/source/de/anomic/plasma/parser/vcf/vcfParser.java b/source/de/anomic/plasma/parser/vcf/vcfParser.java
index 53f8ba869..e14004c52 100644
--- a/source/de/anomic/plasma/parser/vcf/vcfParser.java
+++ b/source/de/anomic/plasma/parser/vcf/vcfParser.java
@@ -76,7 +76,7 @@ public class vcfParser extends AbstractParser implements Parser {
*
* TODO: support of x-mozilla-cpt and x-mozilla-html tags
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("text/x-vcard","vcf");
SUPPORTED_MIME_TYPES.put("application/vcard","vcf");
@@ -102,9 +102,9 @@ public class vcfParser extends AbstractParser implements Parser {
try {
StringBuffer parsedTitle = new StringBuffer();
StringBuffer parsedDataText = new StringBuffer();
- HashMap parsedData = new HashMap();
- HashMap anchors = new HashMap();
- LinkedList parsedNames = new LinkedList();
+ HashMap parsedData = new HashMap();
+ HashMap anchors = new HashMap();
+ LinkedList parsedNames = new LinkedList();
boolean useLastLine = false;
int lineNr = 0;
diff --git a/source/de/anomic/plasma/parser/xls/xlsParser.java b/source/de/anomic/plasma/parser/xls/xlsParser.java
index 17780429f..010b1d2bd 100644
--- a/source/de/anomic/plasma/parser/xls/xlsParser.java
+++ b/source/de/anomic/plasma/parser/xls/xlsParser.java
@@ -74,7 +74,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/msexcel","xls");
SUPPORTED_MIME_TYPES.put("application/excel","xls");
diff --git a/source/de/anomic/plasma/parser/zip/zipParser.java b/source/de/anomic/plasma/parser/zip/zipParser.java
index 8cbf1678f..d3fcfb798 100644
--- a/source/de/anomic/plasma/parser/zip/zipParser.java
+++ b/source/de/anomic/plasma/parser/zip/zipParser.java
@@ -43,6 +43,7 @@
package de.anomic.plasma.parser.zip;
+import de.anomic.htmlFilter.htmlFilterImageEntry;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
@@ -72,7 +73,7 @@ public class zipParser extends AbstractParser implements Parser {
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
- public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
+ public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();
static {
SUPPORTED_MIME_TYPES.put("application/zip","zip");
SUPPORTED_MIME_TYPES.put("application/x-zip","zip");
@@ -91,7 +92,7 @@ public class zipParser extends AbstractParser implements Parser {
this.parserName = "Compressed Archive File Parser";
}
- public Hashtable getSupportedMimeTypes() {
+ public Hashtable getSupportedMimeTypes() {
return SUPPORTED_MIME_TYPES;
}
@@ -111,10 +112,10 @@ public class zipParser extends AbstractParser implements Parser {
StringBuffer docKeywords = new StringBuffer();
StringBuffer docLongTitle = new StringBuffer();
- LinkedList docSections = new LinkedList();
+ LinkedList docSections = new LinkedList();
StringBuffer docAbstrct = new StringBuffer();
- Map docAnchors = new HashMap();
- TreeSet docImages = new TreeSet();
+ Map docAnchors = new HashMap();
+ TreeSet docImages = new TreeSet();
// creating a new parser class to parse the unzipped content
plasmaParser theParser = new plasmaParser();
diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java
index 8aa48d51d..05e58d872 100644
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@@ -66,7 +66,7 @@ public final class plasmaParser {
public static final String PARSER_MODE_URLREDIRECTOR = "URLREDIRECTOR";
public static final String PARSER_MODE_ICAP = "ICAP";
public static final String PARSER_MODE_IMAGE = "IMAGE";
- public static final HashSet PARSER_MODE = new HashSet(Arrays.asList(new String[]{
+ public static final HashSet PARSER_MODE = new HashSet(Arrays.asList(new String[]{
PARSER_MODE_PROXY,
PARSER_MODE_CRAWLER,
PARSER_MODE_ICAP,
@@ -74,7 +74,7 @@ public final class plasmaParser {
PARSER_MODE_IMAGE
}));
- private static final HashMap parserConfigList = new HashMap();
+ private static final HashMap parserConfigList = new HashMap();
/**
* A list containing all installed parsers and the mimeType that they support
@@ -85,8 +85,8 @@ public final class plasmaParser {
/**
* A list of file extensions and mime types that are supported by the html-parser
*/
- public static final HashSet supportedHTMLFileExt = new HashSet();
- public static final HashSet supportedHTMLMimeTypes = new HashSet();
+ public static final HashSet supportedHTMLFileExt = new HashSet();
+ public static final HashSet supportedHTMLMimeTypes = new HashSet();
private static final Properties mimeTypeLookupByFileExt = new Properties();
static {
@@ -104,15 +104,15 @@ public final class plasmaParser {
/**
* A list of media extensions that should not be handled by the plasmaParser
*/
- private static final HashSet mediaExtSet = new HashSet();
+ private static final HashSet mediaExtSet = new HashSet();
/**
* A list of image, audio, video and application extensions
*/
- private static final HashSet imageExtSet = new HashSet();
- private static final HashSet audioExtSet = new HashSet();
- private static final HashSet videoExtSet = new HashSet();
- private static final HashSet appsExtSet = new HashSet();
+ private static final HashSet imageExtSet = new HashSet();
+ private static final HashSet audioExtSet = new HashSet();
+ private static final HashSet videoExtSet = new HashSet();
+ private static final HashSet appsExtSet = new HashSet();
/**
* This {@link FilenameFilter} is used to find all classes based on there filenames
@@ -181,7 +181,7 @@ public final class plasmaParser {
* yacy html parser
*/
public static void initHTMLParsableMimeTypes(String htmlParsableMimeTypes) {
- LinkedList mimeTypes = new LinkedList();
+ LinkedList mimeTypes = new LinkedList();
if ((htmlParsableMimeTypes == null) || (htmlParsableMimeTypes.length() == 0)) {
return;
}
@@ -195,8 +195,8 @@ public final class plasmaParser {
}
}
- public static List extString2extList(String extString) {
- LinkedList extensions = new LinkedList();
+ public static List extString2extList(String extString) {
+ LinkedList extensions = new LinkedList();
if ((extString == null) || (extString.length() == 0)) {
return extensions;
} else {
@@ -206,35 +206,35 @@ public final class plasmaParser {
return extensions;
}
- public static void initMediaExt(List mediaExtList) {
+ public static void initMediaExt(List mediaExtList) {
synchronized (mediaExtSet) {
mediaExtSet.clear();
mediaExtSet.addAll(mediaExtList);
}
}
- public static void initImageExt(List imageExtList) {
+ public static void initImageExt(List imageExtList) {
synchronized (imageExtSet) {
imageExtSet.clear();
imageExtSet.addAll(imageExtList);
}
}
- public static void initAudioExt(List audioExtList) {
+ public static void initAudioExt(List audioExtList) {
synchronized (audioExtSet) {
audioExtSet.clear();
audioExtSet.addAll(audioExtList);
}
}
- public static void initVideoExt(List videoExtList) {
+ public static void initVideoExt(List videoExtList) {
synchronized (videoExtSet) {
videoExtSet.clear();
videoExtSet.addAll(videoExtList);
}
}
- public static void initAppsExt(List appsExtList) {
+ public static void initAppsExt(List appsExtList) {
synchronized (appsExtSet) {
appsExtSet.clear();
appsExtSet.addAll(appsExtList);
@@ -247,7 +247,7 @@ public final class plasmaParser {
}
}
- public static void initSupportedHTMLFileExt(List supportedRealtimeFileExtList) {
+ public static void initSupportedHTMLFileExt(List supportedRealtimeFileExtList) {
synchronized (supportedHTMLFileExt) {
supportedHTMLFileExt.clear();
supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
@@ -750,10 +750,10 @@ public final class plasmaParser {
}
- static Map allReflinks(Set links) {
+ static Map allReflinks(Set links) {
// links is either a Set of Strings (with urls) or htmlFilterImageEntries
// we find all links that are part of a reference inside a url
- HashMap v = new HashMap();
+ HashMap v = new HashMap();
Iterator i = links.iterator();
Object o;
String url;
@@ -784,9 +784,9 @@ public final class plasmaParser {
return v;
}
- static Map allSubpaths(Set links) {
+ static Map allSubpaths(Set links) {
// links is either a Set of Strings (urls) or a Set of htmlFilterImageEntries
- HashMap v = new HashMap();
+ HashMap v = new HashMap();
Iterator i = links.iterator();
Object o;
String url;
diff --git a/source/de/anomic/plasma/plasmaParserDocument.java b/source/de/anomic/plasma/plasmaParserDocument.java
index e709cb768..ca1360410 100644
--- a/source/de/anomic/plasma/plasmaParserDocument.java
+++ b/source/de/anomic/plasma/plasmaParserDocument.java
@@ -68,21 +68,21 @@ import de.anomic.plasma.parser.Parser;
public class plasmaParserDocument {
private yacyURL location; // the source url
- private String mimeType; // mimeType as taken from http header
- private String charset; // the charset of the document
- private List keywords; // most resources provide a keyword field
- private StringBuffer title; // a document title, taken from title or h1 tag; shall appear as headline of search result
- private StringBuffer author; // author or copyright
- private List sections; // if present: more titles/headlines appearing in the document
- private StringBuffer abstrct; // an abstract, if present: short content description
- private Object text; // the clear text, all that is visible
- private Map anchors; // all links embedded as clickeable entities (anchor tags)
- private TreeSet images; // all visible pictures in document
+ private String mimeType; // mimeType as taken from http header
+ private String charset; // the charset of the document
+ private List keywords; // most resources provide a keyword field
+ private StringBuffer title; // a document title, taken from title or h1 tag; shall appear as headline of search result
+ private StringBuffer author; // author or copyright
+ private List sections; // if present: more titles/headlines appearing in the document
+ private StringBuffer abstrct; // an abstract, if present: short content description
+ private Object text; // the clear text, all that is visible
+ private Map anchors; // all links embedded as clickeable entities (anchor tags)
+ private TreeSet images; // all visible pictures in document
// the anchors and images - Maps are URL-to-EntityDescription mappings.
// The EntityDescription appear either as visible text in anchors or as alternative
// text in image tags.
- private Map hyperlinks, audiolinks, videolinks, applinks;
- private Map emaillinks;
+ private Map hyperlinks, audiolinks, videolinks, applinks;
+ private Map emaillinks;
private yacyURL favicon;
private boolean resorted;
private InputStream textStream;
@@ -90,17 +90,17 @@ public class plasmaParserDocument {
protected plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author,
String[] sections, String abstrct,
- Object text, Map anchors, TreeSet images) {
+ Object text, Map anchors, TreeSet images) {
this.location = location;
this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType;
this.charset = charset;
- this.keywords = (keywords == null) ? new LinkedList() : Arrays.asList(keywords);
+ this.keywords = (keywords == null) ? new LinkedList() : Arrays.asList(keywords);
this.title = (title == null) ? new StringBuffer() : new StringBuffer(title);
this.author = (author == null) ? new StringBuffer() : new StringBuffer(author);
- this.sections = (sections == null) ? new LinkedList() : Arrays.asList(sections);
+ this.sections = (sections == null) ? new LinkedList() : Arrays.asList(sections);
this.abstrct = (abstrct == null) ? new StringBuffer() : new StringBuffer(abstrct);
- this.anchors = (anchors == null) ? new HashMap(0) : anchors;
- this.images = (images == null) ? new TreeSet() : images;
+ this.anchors = (anchors == null) ? new HashMap(0) : anchors;
+ this.images = (images == null) ? new TreeSet() : images;
this.hyperlinks = null;
this.audiolinks = null;
this.videolinks = null;
@@ -125,21 +125,21 @@ public class plasmaParserDocument {
public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author,
String[] sections, String abstrct,
- byte[] text, Map anchors, TreeSet images) {
+ byte[] text, Map anchors, TreeSet images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
}
public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author,
String[] sections, String abstrct,
- File text, Map anchors, TreeSet images) {
+ File text, Map anchors, TreeSet images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
}
public plasmaParserDocument(yacyURL location, String mimeType, String charset,
String[] keywords, String title, String author,
String[] sections, String abstrct,
- serverCachedFileOutputStream text, Map anchors, TreeSet images) {
+ serverCachedFileOutputStream text, Map anchors, TreeSet images) {
this(location, mimeType, charset, keywords, title, author, sections, abstrct, (Object)text, anchors, images);
}
@@ -238,7 +238,7 @@ public class plasmaParserDocument {
public String getKeywords(char separator) {
// sort out doubles and empty words
- TreeSet hs = new TreeSet();
+ TreeSet hs = new TreeSet();
String s;
for (int i = 0; i < this.keywords.size(); i++) {
if (this.keywords.get(i) == null) continue;
@@ -253,11 +253,11 @@ public class plasmaParserDocument {
return sb.substring(0, sb.length() - 1);
}
- public List getKeywords() {
+ public List getKeywords() {
return this.keywords;
}
- public Map getAnchors() {
+ public Map getAnchors() {
// returns all links embedded as anchors (clickeable entities)
// this is a url(String)/text(String) map
return anchors;
@@ -266,35 +266,35 @@ public class plasmaParserDocument {
// the next three methods provide a calculated view on the getAnchors/getImages:
- public Map getHyperlinks() {
+ public Map getHyperlinks() {
// this is a subset of the getAnchor-set: only links to other hyperrefs
if (!resorted) resortLinks();
return hyperlinks;
}
- public Map getAudiolinks() {
+ public Map getAudiolinks() {
if (!resorted) resortLinks();
return this.audiolinks;
}
- public Map getVideolinks() {
+ public Map getVideolinks() {
if (!resorted) resortLinks();
return this.videolinks;
}
- public TreeSet getImages() {
+ public TreeSet getImages() {
// returns all links enbedded as pictures (visible in document)
// this resturns a htmlFilterImageEntry collection
if (!resorted) resortLinks();
return images;
}
- public Map getApplinks() {
+ public Map getApplinks() {
if (!resorted) resortLinks();
return this.applinks;
}
- public Map getEmaillinks() {
+ public Map getEmaillinks() {
// this is part of the getAnchor-set: only links to email addresses
if (!resorted) resortLinks();
return emaillinks;
@@ -309,18 +309,18 @@ public class plasmaParserDocument {
int extpos, qpos;
String ext = null;
i = anchors.entrySet().iterator();
- hyperlinks = new HashMap();
- videolinks = new HashMap();
- audiolinks = new HashMap();
- applinks = new HashMap();
- emaillinks = new HashMap();
- TreeSet collectedImages = new TreeSet(); // this is a set that is collected now and joined later to the imagelinks
+ hyperlinks = new HashMap();
+ videolinks = new HashMap();
+ audiolinks = new HashMap();
+ applinks = new HashMap();
+ emaillinks = new HashMap();
+ TreeSet collectedImages = new TreeSet(); // this is a set that is collected now and joined later to the imagelinks
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
u = (String) entry.getKey();
if ((u != null) && (u.startsWith("mailto:"))) {
- emaillinks.put(u.substring(7), entry.getValue());
+ emaillinks.put(u.substring(7), (String)entry.getValue());
} else {
extpos = u.lastIndexOf(".");
if (extpos > 0) {
@@ -337,11 +337,11 @@ public class plasmaParserDocument {
if (plasmaParser.imageExtContains(ext)) {
collectedImages.add(new htmlFilterImageEntry(url, (String) entry.getValue(), -1, -1));
}
- else if (plasmaParser.audioExtContains(ext)) audiolinks.put(u, entry.getValue());
- else if (plasmaParser.videoExtContains(ext)) videolinks.put(u, entry.getValue());
- else if (plasmaParser.appsExtContains(ext)) applinks.put(u, entry.getValue());
+ else if (plasmaParser.audioExtContains(ext)) audiolinks.put(u, (String)entry.getValue());
+ else if (plasmaParser.videoExtContains(ext)) videolinks.put(u, (String)entry.getValue());
+ else if (plasmaParser.appsExtContains(ext)) applinks.put(u, (String)entry.getValue());
} else {
- hyperlinks.put(u, entry.getValue());
+ hyperlinks.put(u, (String)entry.getValue());
}
} catch (MalformedURLException e1) {
}
@@ -356,11 +356,11 @@ public class plasmaParserDocument {
iEntry = (htmlFilterImageEntry) i.next();
if (!images.contains(iEntry)) images.add(iEntry);
}
-
+
// expand the hyperlinks:
// we add artificial hyperlinks to the hyperlink set
// that can be calculated from given hyperlinks and imagelinks
- hyperlinks.putAll(plasmaParser.allReflinks(hyperlinks.keySet()));
+
hyperlinks.putAll(plasmaParser.allReflinks(images));
hyperlinks.putAll(plasmaParser.allReflinks(audiolinks.keySet()));
hyperlinks.putAll(plasmaParser.allReflinks(videolinks.keySet()));