diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 5dcc13e1f..b4c532b0e 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -769,18 +769,18 @@ public class ContentScraper extends AbstractScraper implements Scraper {
final String content = tag.opts.getProperty("content", EMPTY_STRING);
String name = tag.opts.getProperty("name", EMPTY_STRING);
if (name.length() > 0) {
- this.metas.put(name.toLowerCase(), CharacterCoding.html2unicode(content));
+ this.metas.put(name.toLowerCase(), content);
if (name.toLowerCase().equals("generator")) {
this.evaluationScores.match(Element.metagenerator, content);
}
}
name = tag.opts.getProperty("http-equiv", EMPTY_STRING);
if (name.length() > 0) {
- this.metas.put(name.toLowerCase(), CharacterCoding.html2unicode(content));
+ this.metas.put(name.toLowerCase(), content);
}
name = tag.opts.getProperty("property", EMPTY_STRING);
if (name.length() > 0) {
- this.metas.put(name.toLowerCase(), CharacterCoding.html2unicode(content));
+ this.metas.put(name.toLowerCase(), content);
}
} else if (tag.name.equalsIgnoreCase("area")) {
final String areatitle = cleanLine(tag.opts.getProperty("title", EMPTY_STRING));
@@ -904,7 +904,6 @@ public class ContentScraper extends AbstractScraper implements Scraper {
// System.out.println("ScrapeTag1: tag.tagname=" + tag.tagname + ", opts=" + tag.opts.toString() + ", text=" + UTF8.String(text));
if (tag.name.equalsIgnoreCase("a") && tag.content.length() < 2048) {
String href = tag.opts.getProperty("href", EMPTY_STRING);
- href = CharacterCoding.html2unicode(href);
AnchorURL url;
if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
if (followDenied()) {
diff --git a/source/net/yacy/kelondro/io/CharBuffer.java b/source/net/yacy/kelondro/io/CharBuffer.java
index 35330d333..ad22d9300 100644
--- a/source/net/yacy/kelondro/io/CharBuffer.java
+++ b/source/net/yacy/kelondro/io/CharBuffer.java
@@ -32,6 +32,7 @@ import java.io.Writer;
import java.util.Properties;
import net.yacy.cora.document.encoding.UTF8;
+import net.yacy.document.parser.html.CharacterCoding;
public final class CharBuffer extends Writer {
@@ -444,6 +445,7 @@ public final class CharBuffer extends Writer {
while ((pos < this.length) && (this.buffer[pos] <= 32)) pos++;
// doublequotes are obligatory. However, we want to be fuzzy if they
// are ommittet
+ String value = null;
if (pos >= this.length) {
// error case: input ended too early
break;
@@ -453,7 +455,7 @@ public final class CharBuffer extends Writer {
start = pos;
while ((pos < this.length) && (this.buffer[pos] != doublequote)) pos++;
if (pos >= this.length) break; // this is the case if we found no parent doublequote
- p.setProperty(key, new String(this.buffer, start, pos - start).trim());
+ value = new String(this.buffer, start, pos - start).trim();
pos++;
} else if (this.buffer[pos] == singlequote) {
// search next singlequote
@@ -461,14 +463,15 @@ public final class CharBuffer extends Writer {
start = pos;
while ((pos < this.length) && (this.buffer[pos] != singlequote)) pos++;
if (pos >= this.length) break; // this is the case if we found no parent singlequote
- p.setProperty(key, new String(this.buffer, start, pos - start).trim());
+ value = new String(this.buffer, start, pos - start).trim();
pos++;
} else {
// search next whitespace
start = pos;
while ((pos < this.length) && (this.buffer[pos] > 32)) pos++;
- p.setProperty(key, new String(this.buffer, start, pos - start).trim());
+ value = new String(this.buffer, start, pos - start).trim();
}
+ p.setProperty(key, CharacterCoding.html2unicode(value));
// pos should point now to a whitespace: eat up spaces
while ((pos < this.length) && (this.buffer[pos] <= 32)) pos++;
// go on with next loop