From 1d4e206b2bc34a70db8ea3460abc5907766f9e5c Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 18 Jun 2012 18:10:40 +0200 Subject: [PATCH] bugfix in vocabulary generation --- htroot/Vocabulary_p.java | 8 ++++---- source/net/yacy/cora/lod/vocabulary/Tagging.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index 6021e9c7c..68064fb1b 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -96,14 +96,14 @@ public class Vocabulary_p { URIMetadataRow m = segment.urlMetadata().load(u.hash()); if (m != null) t = m.dc_creator(); } - t = t.replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim(); + t = t.replaceAll("_", " ").replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim(); if (t.length() == 0) continue; if (discoverFromTitleSplitted) { String[] ts = t.split(" "); for (String s: ts) { if (s.length() == 0) continue; if (s.endsWith(".jpg") || s.endsWith(".gif")) continue; - table.put(s, new Tagging.SOTuple("", u0)); + table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0)); } } else if (discoverFromAuthor) { String[] ts = t.split(";"); // author names are often separated by ';' @@ -111,10 +111,10 @@ public class Vocabulary_p { if (s.length() == 0) continue; int p = s.indexOf(','); // check if there is a reversed method to mention the name if (p >= 0) s = s.substring(p + 1).trim() + " " + s.substring(0, p).trim(); - table.put(s, new Tagging.SOTuple("", u0)); + table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0)); } } else { - table.put(t, new Tagging.SOTuple("", u0)); + table.put(t, new Tagging.SOTuple(Tagging.normalizeTerm(t), u0)); } } } diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index 3239deb48..6a931eb88 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -163,7 +163,7 @@ public class Tagging { for (Map.Entry e: table.entrySet()) { String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV(); String o = e.getValue() == null ? "" : e.getValue().getObjectlink(); - w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n"); + w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue().getSynonymsCSV()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n"); } w.close(); init();