Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 13 years ago
commit 8c544edee4

@ -96,14 +96,14 @@ public class Vocabulary_p {
URIMetadataRow m = segment.urlMetadata().load(u.hash()); URIMetadataRow m = segment.urlMetadata().load(u.hash());
if (m != null) t = m.dc_creator(); if (m != null) t = m.dc_creator();
} }
t = t.replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim(); t = t.replaceAll("_", " ").replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim();
if (t.length() == 0) continue; if (t.length() == 0) continue;
if (discoverFromTitleSplitted) { if (discoverFromTitleSplitted) {
String[] ts = t.split(" "); String[] ts = t.split(" ");
for (String s: ts) { for (String s: ts) {
if (s.length() == 0) continue; if (s.length() == 0) continue;
if (s.endsWith(".jpg") || s.endsWith(".gif")) continue; if (s.endsWith(".jpg") || s.endsWith(".gif")) continue;
table.put(s, new Tagging.SOTuple("", u0)); table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0));
} }
} else if (discoverFromAuthor) { } else if (discoverFromAuthor) {
String[] ts = t.split(";"); // author names are often separated by ';' String[] ts = t.split(";"); // author names are often separated by ';'
@ -111,10 +111,10 @@ public class Vocabulary_p {
if (s.length() == 0) continue; if (s.length() == 0) continue;
int p = s.indexOf(','); // check if there is a reversed method to mention the name int p = s.indexOf(','); // check if there is a reversed method to mention the name
if (p >= 0) s = s.substring(p + 1).trim() + " " + s.substring(0, p).trim(); if (p >= 0) s = s.substring(p + 1).trim() + " " + s.substring(0, p).trim();
table.put(s, new Tagging.SOTuple("", u0)); table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0));
} }
} else { } else {
table.put(t, new Tagging.SOTuple("", u0)); table.put(t, new Tagging.SOTuple(Tagging.normalizeTerm(t), u0));
} }
} }
} }

@ -163,7 +163,7 @@ public class Tagging {
for (Map.Entry<String, SOTuple> e: table.entrySet()) { for (Map.Entry<String, SOTuple> e: table.entrySet()) {
String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV(); String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV();
String o = e.getValue() == null ? "" : e.getValue().getObjectlink(); String o = e.getValue() == null ? "" : e.getValue().getObjectlink();
w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n"); w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue().getSynonymsCSV()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n");
} }
w.close(); w.close();
init(); init();

Loading…
Cancel
Save