- included YMarks in addition to old bookmarks in yacysearchitem.html; don't get confused by the old bookmark dialog, the ymark is automatically added silently beforehand.

- reworked bookmark creation on crawlstart
- many smaller adjustments to ymarks


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8072 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
apfelmaennchen 13 years ago
parent 05f34a3fa7
commit 564374d1fe

@ -27,7 +27,6 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.util.Date;
@ -46,7 +45,6 @@ import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.TransformerWriter;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.NewsPool;
@ -62,7 +60,7 @@ import de.anomic.data.BookmarkHelper;
import de.anomic.data.BookmarksDB;
import de.anomic.data.ListManager;
import de.anomic.data.WorkTables;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -376,31 +374,21 @@ public class Crawler_p {
String tagStr = tags.toString();
if (tagStr.length() > 2 && tagStr.startsWith("[") && tagStr.endsWith("]")) tagStr = tagStr.substring(1, tagStr.length() - 2);
// we will create always a bookmark to use this to track crawled hosts
final YMarkEntry bmk = new YMarkEntry();
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url.toNormalform(true, false));
bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), title);
bmk.put(YMarkEntry.BOOKMARK.DESC.key(), description);
bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false");
bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), tagStr);
bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), "/crawlStart");
try {
sb.tables.bookmarks.addBookmark("admin", bmk, false, false);
} catch (final IOException e) {
Log.logException(e);
} catch (final RowSpaceExceededException e) {
}
// we will create always a bookmark to use this to track crawled hosts
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(crawlingStart, "admin");
if (bookmark != null) {
bookmark.setProperty(BookmarksDB.Bookmark.BOOKMARK_TITLE, title);
bookmark.setProperty(BookmarksDB.Bookmark.BOOKMARK_DESCRIPTION, description);
bookmark.setOwner("admin");
bookmark.setPublic(false);
bookmark.setTags(tags, true);
sb.bookmarksDB.saveBookmark(bookmark);
}
// do the same for ymarks
// TODO: could a non admin user add crawls?
sb.tables.bookmarks.createBookmark(sb.loader, url, YMarkTables.USER_ADMIN, true, "crawlStart", "/Crawl Start");
// liftoff!
prop.put("info", "8");//start msg
prop.putHTML("info_crawlingURL", post.get("crawlingURL"));

@ -0,0 +1,14 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Import Bookmarks</title>
#(redirect)#::<meta HTTP-EQUIV="REFRESH" content="0; url=#[url]#" />#(/redirect)#
</head>
<body>
#(status)#
<p>Status: error</p>
::
<p>Status: ok</p>
#(/status)#
</body>
</html>

@ -1,8 +1,11 @@
import java.io.IOException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkTables;
@ -24,35 +27,62 @@ public class add_ymark {
if(isAdmin || isAuthUser) {
final String bmk_user = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN);
String url = post.get(YMarkEntry.BOOKMARK.URL.key(),YMarkEntry.BOOKMARK.URL.deflt());
boolean hasProtocol = false;
for (YMarkTables.PROTOCOLS p : YMarkTables.PROTOCOLS.values()) {
if(url.toLowerCase().startsWith(p.protocol())) {
hasProtocol = true;
break;
}
}
if (!hasProtocol) {
url=YMarkTables.PROTOCOLS.HTTP.protocol(url);
}
final YMarkEntry bmk = new YMarkEntry();
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), post.get(YMarkEntry.BOOKMARK.TITLE.key(),YMarkEntry.BOOKMARK.TITLE.deflt()));
bmk.put(YMarkEntry.BOOKMARK.DESC.key(), post.get(YMarkEntry.BOOKMARK.DESC.key(),YMarkEntry.BOOKMARK.DESC.deflt()));
bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), post.get(YMarkEntry.BOOKMARK.PUBLIC.key(),YMarkEntry.BOOKMARK.PUBLIC.deflt()));
bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkEntry.BOOKMARK.TAGS.deflt())));
bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), YMarkUtil.cleanFoldersString(post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.FOLDERS_UNSORTED)));
try {
sb.tables.bookmarks.addBookmark(bmk_user, bmk, false, false);
if(post.containsKey("redirect") && post.get("redirect").length() > 0) {
prop.put("redirect_url", post.get("redirect"));
prop.put("redirect", "1");
}
if(post.containsKey("urlHash")) {
final String urlHash = post.get("urlHash",YMarkUtil.EMPTY_STRING);
final DigestURI url = sb.indexSegments.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash.getBytes()).metadata().url();
final String folders = post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.FOLDERS_UNSORTED);
final String tags = post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.EMPTY_STRING);
try {
sb.tables.bookmarks.createBookmark(sb.loader, url, bmk_user, true, tags, folders);
prop.put("status", "1");
} catch (IOException e) {
Log.logException(e);
// TODO Auto-generated catch block
Log.logException(e);
} catch (Failure e) {
// TODO Auto-generated catch block
Log.logException(e);
} catch (RowSpaceExceededException e) {
}
prop.put("result", "1");
// TODO Auto-generated catch block
Log.logException(e);
}
} else if(post.containsKey(YMarkEntry.BOOKMARK.URL.key())) {
String url = post.get(YMarkEntry.BOOKMARK.URL.key(),YMarkEntry.BOOKMARK.URL.deflt());
boolean hasProtocol = false;
for (YMarkTables.PROTOCOLS p : YMarkTables.PROTOCOLS.values()) {
if(url.toLowerCase().startsWith(p.protocol())) {
hasProtocol = true;
break;
}
}
if (!hasProtocol) {
url=YMarkTables.PROTOCOLS.HTTP.protocol(url);
}
final YMarkEntry bmk = new YMarkEntry();
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), post.get(YMarkEntry.BOOKMARK.TITLE.key(),YMarkEntry.BOOKMARK.TITLE.deflt()));
bmk.put(YMarkEntry.BOOKMARK.DESC.key(), post.get(YMarkEntry.BOOKMARK.DESC.key(),YMarkEntry.BOOKMARK.DESC.deflt()));
bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), post.get(YMarkEntry.BOOKMARK.PUBLIC.key(),YMarkEntry.BOOKMARK.PUBLIC.deflt()));
bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(post.get(YMarkEntry.BOOKMARK.TAGS.key(),YMarkEntry.BOOKMARK.TAGS.deflt())));
bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), YMarkUtil.cleanFoldersString(post.get(YMarkEntry.BOOKMARK.FOLDERS.key(),YMarkEntry.FOLDERS_UNSORTED)));
try {
sb.tables.bookmarks.addBookmark(bmk_user, bmk, false, false);
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
}
prop.put("status", "1");
} else {
prop.put("status", "0");
}
} else {
prop.put(YMarkTables.USER_AUTHENTICATE,YMarkTables.USER_AUTHENTICATE_MSG);
}

@ -1,2 +1,2 @@
<?xml version='1.0' encoding="UTF-8" standalone='yes'?>
<status code="#(result)#error::ok#(/result)#" />
<status code="#(status)#error::ok#(/status)#" />

@ -5,10 +5,10 @@
#(redirect)#::<meta HTTP-EQUIV="REFRESH" content="0; url=#[url]#" />#(/redirect)#
</head>
<body>
#(result)#
<p>something went wrong</p>
#(status)#
<p>Status: error</p>
::
<p>done</p>
#(/result)#
<p>Status: ok</p>
#(/status)#
</body>
</html>

@ -3,18 +3,15 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.regex.Pattern;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.Document;
import net.yacy.document.Parser.Failure;
import net.yacy.document.content.SurrogateReader;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
@ -28,7 +25,6 @@ import de.anomic.data.ymark.YMarkAutoTagger;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkHTMLImporter;
import de.anomic.data.ymark.YMarkJSONImporter;
import de.anomic.data.ymark.YMarkMetadata;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkUtil;
import de.anomic.data.ymark.YMarkXBELImporter;
@ -90,7 +86,7 @@ public class import_ymark {
} catch (IOException e) {
//TODO: display an error message
Log.logException(e);
prop.put("result", "0");
prop.put("status", "0");
return prop;
}
t = new Thread(surrogateReader, "YMarks - Surrogate Reader");
@ -98,7 +94,7 @@ public class import_ymark {
while ((bmk = new YMarkEntry(surrogateReader.take())) != YMarkEntry.POISON) {
putBookmark(sb.tables.bookmarks, bmk_user, bmk, autoTaggingQueue, autotag, empty);
}
prop.put("result", "1");
prop.put("status", "1");
} else {
InputStreamReader reader = null;
try {
@ -106,7 +102,7 @@ public class import_ymark {
} catch (UnsupportedEncodingException e1) {
//TODO: display an error message
Log.logException(e1);
prop.put("result", "0");
prop.put("status", "0");
return prop;
}
if(post.get("importer").equals("html") && reader != null) {
@ -116,7 +112,7 @@ public class import_ymark {
while ((bmk = htmlImporter.take()) != YMarkEntry.POISON) {
putBookmark(sb.tables.bookmarks, bmk_user, bmk, autoTaggingQueue, autotag, empty);
}
prop.put("result", "1");
prop.put("status", "1");
} else if(post.get("importer").equals("xbel") && reader != null) {
final YMarkXBELImporter xbelImporter;
try {
@ -125,7 +121,7 @@ public class import_ymark {
} catch (SAXException e) {
//TODO: display an error message
Log.logException(e);
prop.put("result", "0");
prop.put("status", "0");
return prop;
}
t = new Thread(xbelImporter, "YMarks - XBEL Importer");
@ -133,7 +129,7 @@ public class import_ymark {
while ((bmk = xbelImporter.take()) != YMarkEntry.POISON) {
putBookmark(sb.tables.bookmarks, bmk_user, bmk, autoTaggingQueue, autotag, empty);
}
prop.put("result", "1");
prop.put("status", "1");
} else if(post.get("importer").equals("json") && reader != null) {
YMarkJSONImporter jsonImporter;
jsonImporter = new YMarkJSONImporter(reader, queueSize, root);
@ -142,7 +138,7 @@ public class import_ymark {
while ((bmk = jsonImporter.take()) != YMarkEntry.POISON) {
putBookmark(sb.tables.bookmarks, bmk_user, bmk, autoTaggingQueue, autotag, empty);
}
prop.put("result", "1");
prop.put("status", "1");
}
}
} else if(post.containsKey("importer") && post.get("importer").equals("crawls")) {
@ -154,23 +150,10 @@ public class import_ymark {
row = APIcalls.next();
if(row.get(WorkTables.TABLE_API_COL_TYPE, "").equals("crawler")) {
final String url = row.get(WorkTables.TABLE_API_COL_COMMENT, "").substring(16);
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(url), sb.indexSegments);
final Document document = meta.loadDocument(sb.loader);
final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();
final YMarkEntry bmk_entry = new YMarkEntry(false);
bmk_entry.put(YMarkEntry.BOOKMARK.URL.key(), url);
if(!sb.tables.has(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), YMarkUtil.getBookmarkId(url))) {
bmk_entry.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false");
bmk_entry.put(YMarkEntry.BOOKMARK.TITLE.key(), metadata.get(YMarkMetadata.METADATA.TITLE));
bmk_entry.put(YMarkEntry.BOOKMARK.DESC.key(), metadata.get(YMarkMetadata.METADATA.DESCRIPTION));
}
bmk_entry.put(YMarkEntry.BOOKMARK.FOLDERS.key(), root);
if(autotag) {
bmk_entry.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkAutoTagger.autoTag(document, 3, sb.tables.bookmarks.getTags(bmk_user)));
}
sb.tables.bookmarks.addBookmark(bmk_user, bmk_entry, merge, true);
sb.tables.bookmarks.createBookmark(sb.loader, url, bmk_user, autotag, "crawlStart", "/Crawl Start");
}
}
}
prop.put("status", "1");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
@ -200,6 +183,7 @@ public class import_ymark {
bmk_entry.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkAutoTagger.autoTag(bookmark.getUrl(), sb.loader, 3, sb.tables.bookmarks.getTags(bmk_user)));
}
sb.tables.bookmarks.addBookmark(bmk_user, bmk_entry, merge, true);
prop.put("status", "1");
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();

@ -1,2 +1,2 @@
<?xml version='1.0' encoding="UTF-8" standalone='yes'?>
<result code="#(result)#something went wrong::done#(/result)#" />
<status code="#(status)#error::ok#(/status)#" />

@ -15,7 +15,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class replace_tags {
public class manage_tags {
private static Switchboard sb = null;
private static serverObjects prop = null;

@ -87,6 +87,9 @@ function bm_action(com,grid) {
success: function() {
}
});
} else if (com=='XBEL') {
window.open("/api/ymarks/get_xbel.xml","_blank");
return false;
} else {
alert("Sorry, the function you have requested is not yet available!");
return false;

@ -100,10 +100,12 @@ $(document).ready(function() {
if ($("input[name=importer]:checked").val() == 'crawls') {
$("input[name='root']").setValue("/Crawl Start");
$("input[name='bmkfile']").attr("disabled","disabled");
$("input[name='root']").attr("disabled","disabled");
} else if ($("input[name=importer]:checked").val() == 'bmks') {
$("input[name='bmkfile']").attr("disabled","disabled");
} else {
$("input[name='bmkfile']").removeAttr("disabled");
$("input[name='root']").removeAttr("disabled");
$("input[name='root']").setValue("/Imported Bookmarks");
}
});
@ -157,8 +159,8 @@ $(document).ready(function() {
};
param[param.length] = { name : 'tags', value : tags };
$.ajax({
type: "GET",
url: "/api/ymarks/replace_tags.xml",
type: "POST",
url: "/api/ymarks/manage_tags.xml",
data: param,
dataType: "xml",
cache: false,

@ -9,7 +9,7 @@
<img width="16" height="9" src="/env/grafics/heuristic_new.gif" title="heuristic:#[name]# (new link)" style="width:16px; height:9px;" alt="heuristic:#[name]# (new link)"/>
#(/heuristic)#
#(authorized)#::
<a href="/Bookmarks.html?edit=#[urlhash]#" class="bookmarklink" title="bookmark"><img width="11" height="11" src="/env/grafics/empty.gif" title="bookmark" alt="bookmark" class="recommendIcon" /></a>
<a href="/api/ymarks/add_ymark.html?urlHash=#[urlhash]#&folders=/Search+Result&redirect=/Bookmarks.html?edit=#[urlhash]#" class="bookmarklink" title="bookmark"><img width="11" height="11" src="/env/grafics/empty.gif" title="bookmark" alt="bookmark" class="recommendIcon" /></a>
#(recommend)#
<img width="11" height="11" src="/env/grafics/empty.gif" title="" alt="recommend" class="recommendIcon" />
<img width="11" height="11" src="/env/grafics/empty.gif" title="" alt="delete" class="deleteIcon" />

@ -159,6 +159,9 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
}
}
final String clean = YMarkUtil.cleanTagsString(buffer.toString());
if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
return document.getFileExtension();
}
return clean;
}
return new String();

@ -77,6 +77,12 @@ public class YMarkMetadata {
this.document = null;
this.indexSegment = indexSegment;
}
public YMarkMetadata(final byte[] urlHash, final Segments indexSegment) {
this.document = null;
this.indexSegment = indexSegment;
this.uri = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(urlHash).metadata().url();
}
public YMarkMetadata(final Document document) {
this.document = document;
@ -95,7 +101,7 @@ public class YMarkMetadata {
this.document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
}
return this.document;
}
}
public EnumMap<METADATA, String> getMetadata() {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);

@ -29,6 +29,7 @@ package de.anomic.data.ymark;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@ -36,9 +37,13 @@ import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;
import net.yacy.document.Document;
import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.data.WorkTables;
public class YMarkTables {
@ -301,8 +306,37 @@ public class YMarkTables {
bmk.put(YMarkEntry.BOOKMARK.DATE_VISITED.key(), (new YMarkDate()).toString());
addBookmark(bmk_user, bmk, true, true);
}
public void createBookmark(final LoaderDispatcher loader, final String url, final String bmk_user, final boolean autotag, final String tagsString, final String foldersString) throws IOException, Failure, RowSpaceExceededException {
createBookmark(loader, new DigestURI(url), bmk_user, autotag, tagsString, foldersString);
}
public void createBookmark(final LoaderDispatcher loader, final DigestURI url, final String bmk_user, final boolean autotag, final String tagsString, final String foldersString) throws IOException, Failure, RowSpaceExceededException {
final YMarkEntry bmk_entry = new YMarkEntry(false);
final YMarkMetadata meta = new YMarkMetadata(url);
final Document document = meta.loadDocument(loader);
final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();
bmk_entry.put(YMarkEntry.BOOKMARK.URL.key(), url.toNormalform(true, false));
if(!this.worktables.has(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), YMarkUtil.getBookmarkId(url.toNormalform(true, false)))) {
bmk_entry.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false");
bmk_entry.put(YMarkEntry.BOOKMARK.TITLE.key(), metadata.get(YMarkMetadata.METADATA.TITLE));
bmk_entry.put(YMarkEntry.BOOKMARK.DESC.key(), metadata.get(YMarkMetadata.METADATA.DESCRIPTION));
}
bmk_entry.put(YMarkEntry.BOOKMARK.FOLDERS.key(), YMarkUtil.cleanFoldersString(foldersString));
final StringBuilder strb = new StringBuilder();
if(autotag) {
final String autotags = YMarkAutoTagger.autoTag(document, 3, this.worktables.bookmarks.getTags(bmk_user));
strb.append(autotags);
}
if(!tagsString.isEmpty()) {
strb.append(YMarkUtil.TAGS_SEPARATOR);
strb.append(tagsString);
}
bmk_entry.put(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.cleanTagsString(strb.toString()));
this.worktables.bookmarks.addBookmark(bmk_user, bmk_entry, true, true);
}
public void addBookmark(final String bmk_user, final YMarkEntry bmk, final boolean mergeTags, final boolean mergeFolders) throws IOException, RowSpaceExceededException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final String date = String.valueOf(System.currentTimeMillis());

@ -132,7 +132,11 @@ public class Document {
}
public Set<String> getContentLanguages() {
return this.languages;
return this.languages;
}
public String getFileExtension() {
return this.source.getFileExtension();
}
/**

Loading…
Cancel
Save