Merge branch 'master' of https://github.com/yacy/yacy_search_server

9 years ago · ff963cbe23
parent f5746b5490 c9937973e3
commit ff963cbe23
5 changed files with 41 additions and 40 deletions
--- a/htroot/Table_API_p.html
+++ b/htroot/Table_API_p.html
@ -85,7 +85,7 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
          <td valign="top" align="left"><input type="checkbox" name="item_#[count]#" value="mark_#[pk]#" /></td>
          <td valign="top">#[type]#
          #(isCrawlerStart)#::<br/><br/>
-          <a href="#[url]#" title="clone"><img src="env/grafics/doc.gif"><img src="env/grafics/right.gif"><img src="env/grafics/doc.gif"></a>::<br/><br/>
+          <a href="#[url]#" title="clone" target="_parent"><img src="env/grafics/doc.gif"><img src="env/grafics/right.gif"><img src="env/grafics/doc.gif"></a>::<br/><br/>
         
         <script>
           var f = document.createElement("form");
@ -93,7 +93,9 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
           f.setAttribute("enctype", "multipart/form-data");
           f.setAttribute("accept-charset", "UTF-8");
           f.setAttribute("action", "#[servlet]#");
+           f.setAttribute("target", "_parent");
           f.setAttribute("id", "#[pk]#");
+           f.setAttribute("name", "#[pk]#");
           #{attr}#
           var e = document.createElement("input");
           e.setAttribute("type", "hidden");
--- a/source/net/yacy/cora/document/id/MultiProtocolURL.java
+++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java
@ -945,6 +945,14 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
        return this.searchpart;
    }

+    /**
+     * Returns a search part parameter map  key=value
+     * in internal url encoded format
+     * for unescaped return values
+     * @see #getAttributes()
+     *
+     * @return key name  value
+     */
    public Map<String, String> getSearchpartMap() {
        if (this.searchpart == null) return null;
        this.searchpart = this.searchpart.replaceAll("&amp;", "&");
@ -1027,6 +1035,10 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU

    /**
     * Evaluates url search part and returns attribute '=' value pairs
+     * the returned values are in clear text (without urlencoding).
+     * 
+     * To get the parameter map as (url-encoded key and values)
+     * @see getSearchpartMap()
     *
     * @return map key=attribue name, value=string after '='
     */
@ -1037,9 +1049,9 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
        for (final String element : questp) {
            int p = element.indexOf('=');
            if (p != -1) {
-                map.put(element.substring(0, p), element.substring(p + 1));
+                map.put(unescape(element.substring(0, p)), unescape(element.substring(p + 1)));
            } else {
-                if (!element.isEmpty()) map.put(element, "");
+                if (!element.isEmpty()) map.put(unescape(element), "");
            }
        }
        return map;
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -57,7 +57,6 @@ import net.yacy.document.SentenceReader;
 import net.yacy.document.VocabularyScraper;
 import net.yacy.document.parser.htmlParser;
 import net.yacy.document.parser.html.Evaluation.Element;
-import net.yacy.document.parser.images.genericImageParser;
 import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.util.FileUtils;
 import net.yacy.kelondro.util.ISO639;
@ -552,23 +551,16 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            href = CharacterCoding.html2unicode(href);
            AnchorURL url;
            if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
-                final String ext = MultiProtocolURL.getFileExtension(url.getFileName());
-                if (genericImageParser.SUPPORTED_EXTENSIONS.contains(ext)) {
-                    // special handling of such urls: put them to the image urls
-                    final ImageEntry ie = new ImageEntry(url, recursiveParse(url, tag.content.getChars()), -1, -1, -1);
-                    this.images.add(ie);
-                } else {
-                    if (followDenied()) {
-                        String rel = tag.opts.getProperty("rel", EMPTY_STRING);
-                        if (rel.length() == 0) rel = "nofollow"; else if (rel.indexOf("nofollow") < 0) rel += ",nofollow"; 
-                        tag.opts.put("rel", rel);
-                    }
-                    tag.opts.put("text", stripAllTags(tag.content.getChars())); // strip any inline html in tag text like  "<a ...> <span>test</span> </a>"
-                    tag.opts.put("href", url.toNormalform(true)); // we must assign this because the url may have resolved backpaths and may not be absolute
-                    url.setAll(tag.opts);
-                    recursiveParse(url, tag.content.getChars());
-                    this.anchors.add(url);
+                if (followDenied()) {
+                    String rel = tag.opts.getProperty("rel", EMPTY_STRING);
+                    if (rel.length() == 0) rel = "nofollow"; else if (rel.indexOf("nofollow") < 0) rel += ",nofollow"; 
+                    tag.opts.put("rel", rel);
                }
+                tag.opts.put("text", stripAllTags(tag.content.getChars())); // strip any inline html in tag text like  "<a ...> <span>test</span> </a>"
+                tag.opts.put("href", url.toNormalform(true)); // we must assign this because the url may have resolved backpaths and may not be absolute
+                url.setAll(tag.opts);
+                recursiveParse(url, tag.content.getChars());
+                this.anchors.add(url);
            }
            this.evaluationScores.match(Element.apath, href);
        }
--- a/source/net/yacy/document/parser/html/TransformerWriter.java
+++ b/source/net/yacy/document/parser/html/TransformerWriter.java
@ -291,8 +291,7 @@ public final class TransformerWriter extends Writer {
        }
        if (this.transformer != null && this.transformer.isTag0(tagname)) {
            // this single tag is collected at once here
-            char[] b = new char[0];
-            b = this.transformer.transformTag0(tag, quotechar);
+            char[] b = this.transformer.transformTag0(tag, quotechar);
            return b;
        } else if ((this.scraper != null && this.scraper.isTag1(tagname)) ||
                   (this.transformer != null && this.transformer.isTag1(tagname))) {
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@ -65,30 +65,25 @@ import com.drew.metadata.Metadata;
 import com.drew.metadata.Tag;
 import com.drew.metadata.exif.GpsDirectory;

+/**
+ * Parser for images, bmp and jpeg and all supported by the Java Image I/O API
+ * by default java ImageIO supports bmp, gif, jpg, jpeg, png, wbmp (tif if jai-imageio is in classpath/registered)
+ * http://download.java.net/media/jai-imageio/javadoc/1.1/overview-summary.html
+ */
 public class genericImageParser extends AbstractParser implements Parser {

-    /**
-     * a list of mime types that are supported by this parser class
-     * @see #getSupportedMimeTypes()
-     */
-    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
-    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
-    static {
+    public genericImageParser() {
+        super("Generic Image Parser");
+
        SUPPORTED_EXTENSIONS.add("bmp");
-        // by default java ImageIO supports bmp, gif, jpg, jpeg, png, wbmp (tif if jai-imageio is in classpath/registered)
-        // http://download.java.net/media/jai-imageio/javadoc/1.1/overview-summary.html
        SUPPORTED_EXTENSIONS.add("jpe"); // not listed in ImageIO extension but sometimes uses for jpeg
        SUPPORTED_EXTENSIONS.addAll(Arrays.asList(ImageIO.getReaderFileSuffixes()));

        SUPPORTED_MIME_TYPES.add("image/bmp");
-        SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently        
+        SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently
        SUPPORTED_MIME_TYPES.addAll(Arrays.asList(ImageIO.getReaderMIMETypes()));
    }

-    public genericImageParser() {
-        super("Generic Image Parser");
-    }
-
    @Override
    public Document[] parse(
            final AnchorURL location,
@ -130,7 +125,8 @@ public class genericImageParser extends AbstractParser implements Parser {
            try {
                b = FileUtils.read(source);
                // check jpeg file signature (magic number FF D8 FF)
-                if ((b[0] != (byte) 0xFF) // cast to signed byte (-1)
+                if (b.length < 3
+                        || (b[0] != (byte) 0xFF) // cast to signed byte (-1)
                        || (b[1] != (byte) 0xD8) //cast to signed byte (-40)
                        || (b[2] != (byte) 0xFF)) {
                    throw new Parser.Failure("File has no jpeg signature", location);
@ -232,7 +228,7 @@ public class genericImageParser extends AbstractParser implements Parser {
        return SUPPORTED_EXTENSIONS;
    }

-    public static ImageInfo parseJavaImage(
+    private ImageInfo parseJavaImage(
                            final AnchorURL location,
                            final InputStream sourceStream) throws Parser.Failure {
        BufferedImage image = null;
@ -247,7 +243,7 @@ public class genericImageParser extends AbstractParser implements Parser {
        return parseJavaImage(location, image);
    }

-    public static ImageInfo parseJavaImage(
+    private ImageInfo parseJavaImage(
                            final AnchorURL location,
                            final BufferedImage image) {
        final ImageInfo ii = new ImageInfo(location);
@ -284,7 +280,7 @@ public class genericImageParser extends AbstractParser implements Parser {
        return ii;
    }

-    public static class ImageInfo {
+    private class ImageInfo {
        public AnchorURL location;
        public BufferedImage image;
        public StringBuilder info;