Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 12 years ago
commit 7dfcc92b71

@ -68,22 +68,22 @@ public class Vocabulary_p {
boolean discoverFromTitle = post.get("discovermethod", "").equals("title"); boolean discoverFromTitle = post.get("discovermethod", "").equals("title");
boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted"); boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted");
boolean discoverFromAuthor = post.get("discovermethod", "").equals("author"); boolean discoverFromAuthor = post.get("discovermethod", "").equals("author");
if (discoveruri != null) {
Segment segment = sb.index; Segment segment = sb.index;
Iterator<DigestURI> ui = segment.urlSelector(discoveruri); Iterator<DigestURI> ui = segment.urlSelector(discoveruri, 600000L, 100000);
String t; String t;
while (ui.hasNext()) { while (ui.hasNext()) {
DigestURI u = ui.next(); DigestURI u = ui.next();
String u0 = u.toNormalform(true); String u0 = u.toNormalform(true);
t = ""; t = "";
if (discoverFromPath) { if (discoverFromPath) {
t = u0.substring(discoverobjectspace.length()); int exp = u0.lastIndexOf('.');
if (t.indexOf('/') >= 0) continue; if (exp < 0) continue;
int p = t.indexOf('.'); int slp = u0.lastIndexOf('/', exp);
if (p >= 0) t = t.substring(0, p); if (slp < 0) continue;
t = u0.substring(slp, exp);
int p;
while ((p = t.indexOf(':')) >= 0) t = t.substring(p + 1); while ((p = t.indexOf(':')) >= 0) t = t.substring(p + 1);
while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1); while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1);
if (p >= 0) t = t.substring(p + 1);
} }
if (discoverFromTitle || discoverFromTitleSplitted) { if (discoverFromTitle || discoverFromTitleSplitted) {
URIMetadataNode m = segment.fulltext().getMetadata(u.hash()); URIMetadataNode m = segment.fulltext().getMetadata(u.hash());
@ -115,7 +115,6 @@ public class Vocabulary_p {
table.put(t, new Tagging.SOTuple(Tagging.normalizeTerm(t), u0)); table.put(t, new Tagging.SOTuple(Tagging.normalizeTerm(t), u0));
} }
} }
}
Tagging newvoc = new Tagging(discovername, propFile, discoverobjectspace, table); Tagging newvoc = new Tagging(discovername, propFile, discoverobjectspace, table);
LibraryProvider.autotagging.addVocabulary(newvoc); LibraryProvider.autotagging.addVocabulary(newvoc);
vocabularyName = discovername; vocabularyName = discovername;

@ -28,6 +28,7 @@ package net.yacy.search.index;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Date; import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
@ -218,28 +219,41 @@ public class Segment {
* @param stub * @param stub
* @return an iterator for all matching urls * @return an iterator for all matching urls
*/ */
public Iterator<DigestURI> urlSelector(MultiProtocolURI stub) { public Iterator<DigestURI> urlSelector(final MultiProtocolURI stub, final long maxtime, final int maxcount) {
final BlockingQueue<SolrDocument> docQueue;
final String urlstub;
if (stub == null) {
docQueue = this.fulltext.getSolr().concurrentQuery("*:*", 0, Integer.MAX_VALUE, maxtime, maxcount, YaCySchema.id.getSolrFieldName(), YaCySchema.sku.getSolrFieldName());
urlstub = null;
} else {
final String host = stub.getHost(); final String host = stub.getHost();
String hh = DigestURI.hosthash(host); String hh = DigestURI.hosthash(host);
final BlockingQueue<String> hostQueue = this.fulltext.getSolr().concurrentIDs(YaCySchema.host_id_s + ":" + hh, 0, Integer.MAX_VALUE, 10000); docQueue = this.fulltext.getSolr().concurrentQuery(YaCySchema.host_id_s + ":\"" + hh + "\"", 0, Integer.MAX_VALUE, maxtime, maxcount, YaCySchema.id.getSolrFieldName(), YaCySchema.sku.getSolrFieldName());
urlstub = stub.toNormalform(true);
final String urlstub = stub.toNormalform(true); }
// now filter the stub from the iterated urls // now filter the stub from the iterated urls
return new LookAheadIterator<DigestURI>() { return new LookAheadIterator<DigestURI>() {
@Override @Override
protected DigestURI next0() { protected DigestURI next0() {
while (true) { while (true) {
String id; SolrDocument doc;
try { try {
id = hostQueue.take(); doc = docQueue.take();
} catch (InterruptedException e) { } catch (InterruptedException e) {
Log.logException(e); Log.logException(e);
return null; return null;
} }
if (id == null || id == AbstractSolrConnector.POISON_ID) return null; if (doc == null || doc == AbstractSolrConnector.POISON_DOCUMENT) return null;
DigestURI u = Segment.this.fulltext.getURL(ASCII.getBytes(id)); String u = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName());
if (u.toNormalform(true).startsWith(urlstub)) return u; String id = (String) doc.getFieldValue(YaCySchema.id.getSolrFieldName());
DigestURI url;
try {
url = new DigestURI(u, ASCII.getBytes(id));
} catch (MalformedURLException e) {
continue;
}
if (urlstub == null || u.startsWith(urlstub)) return url;
} }
} }
}; };

Loading…
Cancel
Save