skip loading document on crawl start for YMark bookmarks

by adding a constructor giving the already loaded document as parameter.
pull/38/head
reger 9 years ago
parent 50f64ddc3b
commit 6d54eb3d36

@ -398,6 +398,52 @@ public class YMarkTables {
this.worktables.bookmarks.addBookmark(bmk_user, bmk_entry, true, true);
}
/**
* Create YMark bookmark from a loaded document
* this is identical to {@link #createBookmark(net.yacy.repository.LoaderDispatcher, net.yacy.cora.document.id.DigestURL, net.yacy.cora.protocol.ClientIdentification.Agent, java.lang.String, boolean, java.lang.String, java.lang.String) }
* without try calling a loader
*
* @param document to create the YMark bookmark for
* @param bmk_user
* @param autotag
* @param tagsString
* @param foldersString
* @throws IOException
* @throws net.yacy.document.Parser.Failure
*/
public void createBookmark(final Document document, final String bmk_user, final boolean autotag, final String tagsString, final String foldersString) throws IOException, Failure {
if (document != null) {
final YMarkEntry bmk_entry = new YMarkEntry(false);
final YMarkMetadata meta = new YMarkMetadata(document);
final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();
final String urls = document.dc_identifier();
bmk_entry.put(YMarkEntry.BOOKMARK.URL.key(), urls);
if (!this.worktables.has(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), YMarkUtil.getBookmarkId(urls))) {
bmk_entry.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false");
bmk_entry.put(YMarkEntry.BOOKMARK.TITLE.key(), metadata.get(YMarkMetadata.METADATA.TITLE));
bmk_entry.put(YMarkEntry.BOOKMARK.DESC.key(), metadata.get(YMarkMetadata.METADATA.DESCRIPTION));
}
final String fs = YMarkUtil.cleanFoldersString(foldersString);
if (fs.isEmpty())
bmk_entry.put(YMarkEntry.BOOKMARK.FOLDERS.key(), YMarkEntry.BOOKMARK.FOLDERS.deflt());
else
bmk_entry.put(YMarkEntry.BOOKMARK.FOLDERS.key(), fs);
final StringBuilder strb = new StringBuilder();
if (autotag) {
final String autotags = YMarkAutoTagger.autoTag(document, 3, this.worktables.bookmarks.getTags(bmk_user));
strb.append(autotags);
}
if (!tagsString.isEmpty()) {
strb.append(YMarkUtil.TAGS_SEPARATOR);
strb.append(tagsString);
}
bmk_entry.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(strb.toString()));
this.worktables.bookmarks.addBookmark(bmk_user, bmk_entry, true, true);
}
}
public boolean hasBookmark(final String bmk_user, final String urlhash) {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
try {

@ -3228,7 +3228,6 @@ public final class Switchboard extends serverSwitch {
if (reasonString != null) return reasonString;
// create a bookmark from crawl start url
//final Set<String> tags=ListManager.string2set(BookmarkHelper.cleanTagsString(post.get("bookmarkFolder","/crawlStart")));
final Set<String> tags=ListManager.string2set(BookmarkHelper.cleanTagsString("/crawlStart"));
tags.add("crawlStart");
final Set<String> keywords = scraper.dc_subject();
@ -3238,8 +3237,10 @@ public final class Switchboard extends serverSwitch {
if (kk.length() > 0) tags.add(kk);
}
}
String tagStr = tags.toString();
if (tagStr.length() > 2 && tagStr.startsWith("[") && tagStr.endsWith("]")) tagStr = tagStr.substring(1, tagStr.length() - 2);
// TODO: what to do with the result ?
//String tagStr = tags.toString();
//if (tagStr.length() > 2 && tagStr.startsWith("[") && tagStr.endsWith("]")) tagStr = tagStr.substring(1, tagStr.length() - 2);
// we will create always a bookmark to use this to track crawled hosts
final BookmarksDB.Bookmark bookmark = this.bookmarksDB.createorgetBookmark(url.toNormalform(true), "admin");
@ -3254,7 +3255,7 @@ public final class Switchboard extends serverSwitch {
// do the same for ymarks
// TODO: could a non admin user add crawls?
try {
this.tables.bookmarks.createBookmark(this.loader, url, profile.getAgent(), YMarkTables.USER_ADMIN, true, "crawlStart", "/Crawl Start");
this.tables.bookmarks.createBookmark(scraper, YMarkTables.USER_ADMIN, true, "crawlStart", "/Crawl Start");
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final Failure e) {

Loading…
Cancel
Save