diff --git a/htroot/Vocabulary_p.html b/htroot/Vocabulary_p.html
index 30226a0ca..91c272d28 100644
--- a/htroot/Vocabulary_p.html
+++ b/htroot/Vocabulary_p.html
@@ -109,7 +109,13 @@ To see a list of all APIs, please visit the
Objectspace
-
Discover Terms from
object link file name object page title object page title (splitted) object page author
+
Discover Terms:
+
+ no auto-discovery (empty vocabulary)
+ from file name
+ from page title
+ from page title (splitted)
+ from page author
diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java
index 2f6ceaa56..a3604a177 100644
--- a/htroot/Vocabulary_p.java
+++ b/htroot/Vocabulary_p.java
@@ -64,55 +64,58 @@ public class Vocabulary_p {
if (discoveruri == null) discoverobjectspace = "";
Map table = new TreeMap();
File propFile = LibraryProvider.autotagging.getVocabularyFile(discovername);
+ boolean discoverNot = post.get("discovermethod", "").equals("none");
boolean discoverFromPath = post.get("discovermethod", "").equals("path");
boolean discoverFromTitle = post.get("discovermethod", "").equals("title");
boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted");
boolean discoverFromAuthor = post.get("discovermethod", "").equals("author");
Segment segment = sb.index;
- Iterator ui = segment.urlSelector(discoveruri, 600000L, 100000);
String t;
- while (ui.hasNext()) {
- DigestURI u = ui.next();
- String u0 = u.toNormalform(true);
- t = "";
- if (discoverFromPath) {
- int exp = u0.lastIndexOf('.');
- if (exp < 0) continue;
- int slp = u0.lastIndexOf('/', exp);
- if (slp < 0) continue;
- t = u0.substring(slp, exp);
- int p;
- while ((p = t.indexOf(':')) >= 0) t = t.substring(p + 1);
- while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1);
- }
- if (discoverFromTitle || discoverFromTitleSplitted) {
- URIMetadataNode m = segment.fulltext().getMetadata(u.hash());
- if (m != null) t = m.dc_title();
- if (t.endsWith(".jpg") || t.endsWith(".gif")) continue;
- }
- if (discoverFromAuthor) {
- URIMetadataNode m = segment.fulltext().getMetadata(u.hash());
- if (m != null) t = m.dc_creator();
- }
- t = t.replaceAll("_", " ").replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim();
- if (t.isEmpty()) continue;
- if (discoverFromTitleSplitted) {
- String[] ts = t.split(" ");
- for (String s: ts) {
- if (s.isEmpty()) continue;
- if (s.endsWith(".jpg") || s.endsWith(".gif")) continue;
- table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0));
+ if (!discoverNot) {
+ Iterator ui = segment.urlSelector(discoveruri, 600000L, 100000);
+ while (ui.hasNext()) {
+ DigestURI u = ui.next();
+ String u0 = u.toNormalform(true);
+ t = "";
+ if (discoverFromPath) {
+ int exp = u0.lastIndexOf('.');
+ if (exp < 0) continue;
+ int slp = u0.lastIndexOf('/', exp);
+ if (slp < 0) continue;
+ t = u0.substring(slp, exp);
+ int p;
+ while ((p = t.indexOf(':')) >= 0) t = t.substring(p + 1);
+ while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1);
+ }
+ if (discoverFromTitle || discoverFromTitleSplitted) {
+ URIMetadataNode m = segment.fulltext().getMetadata(u.hash());
+ if (m != null) t = m.dc_title();
+ if (t.endsWith(".jpg") || t.endsWith(".gif")) continue;
+ }
+ if (discoverFromAuthor) {
+ URIMetadataNode m = segment.fulltext().getMetadata(u.hash());
+ if (m != null) t = m.dc_creator();
}
- } else if (discoverFromAuthor) {
- String[] ts = t.split(";"); // author names are often separated by ';'
- for (String s: ts) {
- if (s.isEmpty()) continue;
- int p = s.indexOf(','); // check if there is a reversed method to mention the name
- if (p >= 0) s = s.substring(p + 1).trim() + " " + s.substring(0, p).trim();
- table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0));
+ t = t.replaceAll("_", " ").replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim();
+ if (t.isEmpty()) continue;
+ if (discoverFromTitleSplitted) {
+ String[] ts = t.split(" ");
+ for (String s: ts) {
+ if (s.isEmpty()) continue;
+ if (s.endsWith(".jpg") || s.endsWith(".gif")) continue;
+ table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0));
+ }
+ } else if (discoverFromAuthor) {
+ String[] ts = t.split(";"); // author names are often separated by ';'
+ for (String s: ts) {
+ if (s.isEmpty()) continue;
+ int p = s.indexOf(','); // check if there is a reversed method to mention the name
+ if (p >= 0) s = s.substring(p + 1).trim() + " " + s.substring(0, p).trim();
+ table.put(s, new Tagging.SOTuple(Tagging.normalizeTerm(s), u0));
+ }
+ } else {
+ table.put(t, new Tagging.SOTuple(Tagging.normalizeTerm(t), u0));
}
- } else {
- table.put(t, new Tagging.SOTuple(Tagging.normalizeTerm(t), u0));
}
}
Tagging newvoc = new Tagging(discovername, propFile, discoverobjectspace, table);