Merge remote-tracking branch 'origin/master'

Conflicts:
	htroot/api/ymarks/import_ymark.java
	source/de/anomic/data/ymark/YMarkEntry.java
	source/de/anomic/data/ymark/YMarkTables.java
pull/1/head
Michael Peter Christen 13 years ago
commit 8c099d2106

@ -240,8 +240,8 @@ public class Table_YMark_p {
mapIterator = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, post.get("folders"));
} else if(post.containsKey("tags") && !post.get("tags").isEmpty()) {
// mapIterator = sb.tables.orderByPK(sb.tables.bookmarks.tags.getBookmarks(bmk_user, post.get("tags")), maxcount).iterator();
final String[] tagArray = YMarkUtil.cleanTagsString(post.get(YMarkEntry.BOOKMARK.TAGS.key())).split(YMarkUtil.TAGS_SEPARATOR);
mapIterator = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagArray);
final String tagsString = YMarkUtil.cleanTagsString(post.get(YMarkEntry.BOOKMARK.TAGS.key()));
mapIterator = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagsString);
} else {
mapIterator = sb.tables.orderByPK(sb.tables.iterator(table, matcher), maxcount).iterator();
}

@ -37,7 +37,7 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
#%env/templates/header.template%#
<div class="SubMenu">
<h3>Bookmarks</h3>
<h3>Bookmarks (user: #[user]# size: #[size]#)</h3>
<!--
<ul class="SubMenu">
#(login)#<li><a href="YMarks.html" class="MenuItemLink">Login</a></li>::#(/login)#
@ -161,7 +161,7 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
<td>Surrogate XML</td>
</tr>
<tr>
<td><input type="radio" name="importer" value="dmoz" disabled="disabled" /></td>
<td><input type="radio" name="importer" value="dmoz" /></td>
<td>DMOZ XML</td>
</tr>
<tr>
@ -260,7 +260,8 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
</div>
<!-- Bookmarks Edit Dialog -->
<div id="ymarks_add_dialog" class="bm_dialog" title="Add & Edit Bookmark">
<div id="ymarks_add_dialog" class="bm_dialog" title="Add & Edit Bookmark">
<form id="bmaddform" method="post" accept-charset="UTF-8" action="jQuery">
<table>
<tr>
<td>
@ -275,8 +276,8 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
</select>
</td>
</tr>
</table>
<form id="bmaddform" method="post" accept-charset="UTF-8" action="jQuery"><div>
</table>
<div>
<label for="bm_url">URL:</label>
<br />
<input type="text" name="url" id="bm_url" class="bm_input" size="80" />
@ -297,7 +298,8 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
<label for="bm_tags">Tags (comma separated):</label>
<br />
<input type="text" name="tags" id="bm_tags" class="bm_input" size="80" />
</div></form>
</div>
</form>
</div>
<div id="ymarks_crawlstart" class="bm_dialog" title="Craw Start">

@ -1,27 +1,81 @@
import java.io.IOException;
import java.util.Iterator;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkRDF;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkTables.TABLES;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class YMarks {
public static serverObjects respond(final RequestHeader header, @SuppressWarnings("unused") final serverObjects post, final serverSwitch env) {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
final UserDB.Entry user = sb.userDB.getUser(header);
final boolean isAdmin = (sb.verifyAuthentication(header));
final boolean isAuthUser = user!= null && user.hasRight(UserDB.AccessRight.BOOKMARK_RIGHT);
final String path = header.get(HeaderFramework.CONNECTION_PROP_PATH);
if(path != null && path.endsWith(".rdf")) {
YMarkRDF rdf = new YMarkRDF("http://"+sb.peers.myAlternativeAddress());
if(post != null && post.containsKey(YMarkEntry.BOOKMARKS_ID)) {
final String id[] = post.get(YMarkEntry.BOOKMARKS_ID).split(":");
if(id[1].equals("b")) {
final String bmk_user = id[0];
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final byte[] urlHash = UTF8.getBytes(id[2]);
Tables.Row bmk_row;
try {
bmk_row = sb.tables.select(bmk_table, urlHash);
rdf.addBookmark(bmk_user, bmk_row);
} catch (IOException e) {
} catch (SpaceExceededException e) {
}
}
} else {
final Iterator<String> iter = sb.tables.iterator();
while(iter.hasNext()) {
final String bmk_table = iter.next();
final int i = bmk_table.indexOf(TABLES.BOOKMARKS.basename());
if(i > 0) {
final String bmk_user = bmk_table.substring(0, i);
try {
// TODO select only public bookmarks
rdf.addBookmarks(bmk_user, sb.tables.iterator(bmk_table));
} catch (IOException e) {
// TODO exception handling
}
}
}
}
prop.put("rdf", rdf.getRDF("RDF/XML-ABBREV"));
return prop;
}
if(isAdmin || isAuthUser) {
prop.put("login", 1);
final String bmk_user = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN);
prop.putHTML("user", bmk_user.substring(0,1).toUpperCase() + bmk_user.substring(1));
int size;
try {
size = sb.tables.bookmarks.getSize(bmk_user);
} catch (IOException e) {
Log.logException(e);
size = 0;
}
prop.put("size", size);
} else {
prop.put("login", 0);
}
}
return prop;
}
}

@ -46,12 +46,8 @@ public class get_tags {
YMarkTag t;
if (post != null && post.containsKey(TAG) && !post.get(TAG).isEmpty()) {
final String[] tagArray = YMarkUtil.cleanTagsString(post.get(TAG)).split(YMarkUtil.TAGS_SEPARATOR);
try {
tags = new TreeSet<YMarkTag>(sb.tables.bookmarks.getTags(sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagArray)).values());
} catch (final IOException e) {
return prop;
}
final String tagsString = YMarkUtil.cleanTagsString(post.get(TAG));
tags = new TreeSet<YMarkTag>(sb.tables.bookmarks.getTags(sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagsString)).values());
} else {
try {
tags = new TreeSet<YMarkTag>(sb.tables.bookmarks.getTags(bmk_user).values());

@ -119,43 +119,38 @@ public class get_treeview {
count++;
}
}
// loop through bookmarkList
try {
if(displayBmk && !root.isEmpty()) {
bit = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, root);
while (bit.hasNext()) {
bmk_row = bit.next();
if(bmk_row != null) {
final String url = UTF8.String(bmk_row.get(YMarkEntry.BOOKMARK.URL.key()));
final String title = bmk_row.get(YMarkEntry.BOOKMARK.TITLE.key(), YMarkEntry.BOOKMARK.TITLE.deflt());
if(displayBmk && !root.isEmpty()) {
bit = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, root);
while (bit.hasNext()) {
bmk_row = bit.next();
if(bmk_row != null) {
final String url = UTF8.String(bmk_row.get(YMarkEntry.BOOKMARK.URL.key()));
final String title = bmk_row.get(YMarkEntry.BOOKMARK.TITLE.key(), YMarkEntry.BOOKMARK.TITLE.deflt());
// TODO: get_treeview - get rid of bmtype
if (post.containsKey("bmtype")) {
if (post.get("bmtype").equals("title")) {
prop.putJSON("folders_"+count+"_foldername", title);
} else if (post.get("bmtype").equals("href")) {
prop.putJSON("folders_"+count+"_foldername", "<a href='"+url+"' target='_blank'>"+title+"</a>");
}
} else {
prop.putJSON("folders_"+count+"_foldername", url);
}
prop.put("folders_"+count+"_expanded", "false");
prop.put("folders_"+count+"_url", url);
prop.put("folders_"+count+"_type", "file");
prop.put("folders_"+count+"_hash", "b:"+new String(bmk_row.getPK()));
prop.put("folders_"+count+"_hasChildren", "true");
prop.put("folders_"+count+"_comma", ",");
count++;
}
}
}
count--;
prop.put("folders_"+count+"_comma", "");
count++;
prop.put("folders", count);
} catch (final IOException e) {
Log.logException(e);
// TODO: get_treeview - get rid of bmtype
if (post.containsKey("bmtype")) {
if (post.get("bmtype").equals("title")) {
prop.putJSON("folders_"+count+"_foldername", title);
} else if (post.get("bmtype").equals("href")) {
prop.putJSON("folders_"+count+"_foldername", "<a href='"+url+"' target='_blank'>"+title+"</a>");
}
} else {
prop.putJSON("folders_"+count+"_foldername", url);
}
prop.put("folders_"+count+"_expanded", "false");
prop.put("folders_"+count+"_url", url);
prop.put("folders_"+count+"_type", "file");
prop.put("folders_"+count+"_hash", "b:"+new String(bmk_row.getPK()));
prop.put("folders_"+count+"_hasChildren", "true");
prop.put("folders_"+count+"_comma", ",");
count++;
}
}
}
count--;
prop.put("folders_"+count+"_comma", "");
count++;
prop.put("folders", count);
} else if(displayBmk && isBookmark) {
try {
final String urlHash = post.get(ROOT).substring(2);

@ -85,12 +85,7 @@ public class get_xbel {
prop.put("xbel_"+count+"_elements", "<title>" + CharacterCoding.unicode2xml(foldername[n], true) + "</title>");
count++;
}
try {
bit = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, folder);
} catch (final IOException e) {
// TODO: better error handling (avoid NPE)
bit = null;
}
bit = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, folder);
Tables.Row bmk_row = null;
String urlHash;
final YMarkDate date = new YMarkDate();

@ -13,6 +13,7 @@ import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkCrawlStart;
import de.anomic.data.ymark.YMarkDate;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkRDF;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkTables.TABLES;
import de.anomic.data.ymark.YMarkUtil;
@ -31,7 +32,7 @@ public class get_ymark {
prop = new serverObjects();
int rp; // items per page
int page; // page
int page; // page
int total;
String sortorder;
String sortname;
@ -68,8 +69,8 @@ public class get_ymark {
if(!query.isEmpty()) {
if(!qtype.isEmpty()) {
if(qtype.equals("_tags")) {
final String[] tagArray = YMarkUtil.cleanTagsString(query).split(YMarkUtil.TAGS_SEPARATOR);
result = sb.tables.bookmarks.orderBookmarksBy(sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagArray), sortname, sortorder);
final String tags = YMarkUtil.cleanTagsString(query);
result = sb.tables.bookmarks.orderBookmarksBy(sb.tables.bookmarks.getBookmarksByTag(bmk_user, tags), sortname, sortorder);
} else if(qtype.equals("_folder")) {
result = sb.tables.bookmarks.orderBookmarksBy(sb.tables.bookmarks.getBookmarksByFolder(bmk_user, query), sortname, sortorder);
} else {
@ -89,7 +90,6 @@ public class get_ymark {
prop.put("page", page);
prop.put("total", total);
putProp(bookmarks, rp, page);
} else {
prop.put(serverObjects.ACTION_AUTHENTICATE, YMarkTables.USER_AUTHENTICATE_MSG);
}

@ -1,32 +1,35 @@
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.util.Date;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.Parser.Failure;
import net.yacy.document.content.SurrogateReader;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.workflow.InstantBusyThread;
import net.yacy.search.Switchboard;
import org.xml.sax.SAXException;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.CrawlSwitchboard;
import de.anomic.crawler.retrieval.Request;
import de.anomic.data.BookmarksDB;
import de.anomic.data.UserDB;
import de.anomic.data.WorkTables;
import de.anomic.data.ymark.MonitoredReader;
import de.anomic.data.ymark.YMarkAutoTagger;
import de.anomic.data.ymark.YMarkCrawlStart;
import de.anomic.data.ymark.YMarkDMOZImporter;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkHTMLImporter;
import de.anomic.data.ymark.YMarkJSONImporter;
@ -47,7 +50,6 @@ public class import_ymark {
final boolean isAuthUser = user!= null && user.hasRight(UserDB.AccessRight.BOOKMARK_RIGHT);
final int queueSize = 200;
Thread t;
YMarkEntry bmk;
// String root = YMarkEntry.FOLDERS_IMPORTED;
String root = "";
@ -70,8 +72,8 @@ public class import_ymark {
if(post.get("autotag").equals("empty")) {
empty = true;
}
t = new Thread(new YMarkAutoTagger(autoTaggingQueue, sb.loader, sb.tables.bookmarks, bmk_user, merge),"YMarks - autoTagger");
t.start();
YMarkAutoTagger autoTagger = new YMarkAutoTagger(autoTaggingQueue, sb.loader, sb.tables.bookmarks, bmk_user, merge);
InstantBusyThread.oneTimeJob(autoTagger, 0);
}
if(isAdmin && post.containsKey("table") && post.get("table").length() > 0) {
@ -85,7 +87,8 @@ public class import_ymark {
root = post.get("root");
}
if(post.containsKey("bmkfile") && !post.get("bmkfile").isEmpty() && post.containsKey("importer")){
stream = new ByteArrayInputStream(UTF8.getBytes(post.get("bmkfile$file")));
final byte[] bytes = UTF8.getBytes(post.get("bmkfile$file"));
stream = new ByteArrayInputStream(bytes);
if(post.get("importer").equals("surro") && stream != null) {
SurrogateReader surrogateReader;
try {
@ -96,16 +99,15 @@ public class import_ymark {
prop.put("status", "0");
return prop;
}
t = new Thread(surrogateReader, "YMarks - Surrogate Reader");
t.start();
InstantBusyThread.oneTimeJob(surrogateReader, 0);
while ((bmk = new YMarkEntry(surrogateReader.take())) != YMarkEntry.POISON) {
putBookmark(sb, bmk_user, bmk, autoTaggingQueue, autotag, empty, indexing, medialink);
}
prop.put("status", "1");
} else {
InputStreamReader reader = null;
MonitoredReader reader = null;
try {
reader = new InputStreamReader(stream,"UTF-8");
reader = new MonitoredReader(new InputStreamReader(stream,"UTF-8"), 1024*16, bytes.length);
} catch (final UnsupportedEncodingException e1) {
//TODO: display an error message
Log.logException(e1);
@ -114,11 +116,8 @@ public class import_ymark {
}
if(post.get("importer").equals("html") && reader != null) {
final YMarkHTMLImporter htmlImporter = new YMarkHTMLImporter(reader, queueSize, root);
t = new Thread(htmlImporter, "YMarks - HTML Importer");
t.start();
while ((bmk = htmlImporter.take()) != YMarkEntry.POISON) {
putBookmark(sb, bmk_user, bmk, autoTaggingQueue, autotag, empty, indexing, medialink);
}
InstantBusyThread.oneTimeJob(htmlImporter, 0);
InstantBusyThread.oneTimeJob(htmlImporter.getConsumer(sb, bmk_user, autoTaggingQueue, autotag, empty, indexing, medialink), 0);
prop.put("status", "1");
} else if(post.get("importer").equals("xbel") && reader != null) {
final YMarkXBELImporter xbelImporter;
@ -131,17 +130,13 @@ public class import_ymark {
prop.put("status", "0");
return prop;
}
t = new Thread(xbelImporter, "YMarks - XBEL Importer");
t.start();
while ((bmk = xbelImporter.take()) != YMarkEntry.POISON) {
putBookmark(sb, bmk_user, bmk, autoTaggingQueue, autotag, empty, indexing, medialink);
}
InstantBusyThread.oneTimeJob(xbelImporter, 0);
InstantBusyThread.oneTimeJob(xbelImporter.getConsumer(sb, bmk_user, autoTaggingQueue, autotag, empty, indexing, medialink), 0);
prop.put("status", "1");
} else if(post.get("importer").equals("json") && reader != null) {
YMarkJSONImporter jsonImporter;
jsonImporter = new YMarkJSONImporter(reader, queueSize, root);
t = new Thread(jsonImporter, "YMarks - JSON Importer");
t.start();
InstantBusyThread.oneTimeJob(jsonImporter, 0);
while ((bmk = jsonImporter.take()) != YMarkEntry.POISON) {
putBookmark(sb, bmk_user, bmk, autoTaggingQueue, autotag, empty, indexing, medialink);
}
@ -166,11 +161,9 @@ public class import_ymark {
}
prop.put("status", "1");
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
Log.logException(e);
} catch (final Failure e) {
// TODO Auto-generated catch block
e.printStackTrace();
Log.logException(e);
}
} else if(post.containsKey("importer") && post.get("importer").equals("bmks")) {
if(!isAdmin) {
@ -197,22 +190,37 @@ public class import_ymark {
sb.tables.bookmarks.addBookmark(bmk_user, bmk_entry, merge, true);
prop.put("status", "1");
} catch (final MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
Log.logException(e);
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
Log.logException(e);
}
}
} else if(post.containsKey("importer") && post.get("importer").equals("dmoz")) {
if(!isAdmin) {
prop.authenticationRequired();
return prop;
}
try {
final File in = new File(sb.workPath, "content.rdf.u8.gz");
final InputStream gzip = new FileInputStream(in);
final InputStream content = new GZIPInputStream(gzip);
final InputStreamReader reader = new InputStreamReader(content, "UTF-8");
final BufferedReader breader = new BufferedReader(reader);
final MonitoredReader mreader = new MonitoredReader(breader, 1024*1024, in.length());
final String source = post.get("source", "");
final YMarkDMOZImporter DMOZImporter = new YMarkDMOZImporter(mreader, queueSize, root, source);
mreader.addChangeListener(sb.tables.bookmarks.getProgressListener("DMOZImporter"));
DMOZImporter.setDepth(6);
InstantBusyThread.oneTimeJob(DMOZImporter, 0);
InstantBusyThread.oneTimeJob(DMOZImporter.getConsumer(sb, bmk_user, autoTaggingQueue, autotag, empty, indexing, medialink), 0);
prop.put("status", "1");
} catch (Exception e) {
Log.logException(e);
}
}
if(post.containsKey("autotag") && !post.get("autotag", "off").equals("off")) {
try {
autoTaggingQueue.put(YMarkAutoTagger.POISON);
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "Importer inserted poison pill in autoTagging queue");
} catch (final InterruptedException e) {
Log.logException(e);
}
}
} else {
prop.put(serverObjects.ACTION_AUTHENTICATE, YMarkTables.USER_AUTHENTICATE_MSG);
}
@ -234,15 +242,13 @@ public class import_ymark {
autoTaggingQueue.put(url);
}
}
// fill crawler
if (indexing.equals("single")) {
crawlStart(sb, new DigestURI(url), CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, 0, true, medialink);
bmk.crawl(YMarkCrawlStart.CRAWLSTART.SINGLE, medialink, sb);
} else if (indexing.equals("onelink")) {
crawlStart(sb, new DigestURI(url), CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, 1, true, medialink);
bmk.crawl(YMarkCrawlStart.CRAWLSTART.ONE_LINK, medialink, sb);
} else if (indexing.equals("fulldomain")) {
final DigestURI u = new DigestURI(url);
crawlStart(sb, u, CrawlProfile.mustMatchFilterFullDomain(u), CrawlProfile.MATCH_NEVER_STRING, 99, false, medialink);
bmk.crawl(YMarkCrawlStart.CRAWLSTART.FULL_DOMAIN, medialink, sb);
}
}
} catch (final IOException e) {
@ -251,43 +257,4 @@ public class import_ymark {
Log.logException(e);
}
}
public static String crawlStart(
final Switchboard sb,
final DigestURI startURL,
final String urlMustMatch,
final String urlMustNotMatch,
final int depth,
final boolean crawlingQ, final boolean medialink) {
final CrawlProfile pe = new CrawlProfile(
(startURL.getHost() == null) ? startURL.toNormalform(true, false) : startURL.getHost(), null,
urlMustMatch,
urlMustNotMatch,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"", depth, medialink,
CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, crawlingQ,
true, true, true, false, true, true, true,
CacheStrategy.IFFRESH,
"robot_import");
sb.crawler.putActive(pe.handle().getBytes(), pe);
return sb.crawlStacker.stackCrawl(new Request(
sb.peers.mySeed().hash.getBytes(),
startURL,
null,
"CRAWLING-ROOT",
new Date(),
pe.handle(), 0, 0, 0, 0
));
}
}

@ -56,8 +56,8 @@ public class manage_tags {
if(qtype.equals("_tags")) {
if(query.isEmpty())
query = tags;
final String[] tagArray = YMarkUtil.cleanTagsString(query).split(YMarkUtil.TAGS_SEPARATOR);
row_iter = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagArray);
final String tagsString = YMarkUtil.cleanTagsString(query);
row_iter = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagsString);
} else if(qtype.equals("_folder")) {
row_iter = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, query);
} else {
@ -67,8 +67,8 @@ public class manage_tags {
row_iter = sb.tables.iterator(bmk_table, Pattern.compile(query));
}
} else {
final String[] tagArray = YMarkUtil.cleanTagsString(tags).split(YMarkUtil.TAGS_SEPARATOR);
row_iter = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagArray);
final String tagsString = YMarkUtil.cleanTagsString(tags);
row_iter = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagsString);
// row_iter = sb.tables.iterator(bmk_table);
}
sb.tables.bookmarks.replaceTags(row_iter, bmk_user, tags, replace);

@ -62,7 +62,7 @@ function bm_action(com,grid) {
$("#bm_desc").setValue($('.trSelected',grid).find('p.desc').text().trim());
$('#bm_tags').importTags($('.trSelected',grid).find('p.tags').text().trim().replace(/,\s/g,","));
/* $("#bm_tags").setValue($('.trSelected',grid).find('p.tags').text().trim().replace(/,\s/g,",")); */
$("#bm_path").setValue($('.trSelected',grid).find('p.folders').text().replace(/,\s/g,","));
$("#bm_path").setValue($('.trSelected',grid).find('p.folders').text().replace(/, \s/g,","));
$("#bm_public").setValue($('.trSelected',grid).find('img').attr('alt'));
$("#ymarks_add_dialog").dialog('open');
} else if (com=='Crawl') {

@ -8,7 +8,7 @@ $(document).ready(function() {
/* Initialize Bookmark Dialog */
bm_dialog();
/* Initialize Flexigrid */
$('#ymarks_flexigrid').flexigrid({
url: '/api/ymarks/get_ymark.json',
@ -103,10 +103,22 @@ $(document).ready(function() {
$("input[name='root']").attr("disabled","disabled");
} else if ($("input[name=importer]:checked").val() == 'bmks') {
$("input[name='bmkfile']").attr("disabled","disabled");
} else if ($("input[name=importer]:checked").val() == 'dmoz') {
$("input[name='bmkfile']").attr("disabled","disabled");
$("input[name='root']").setValue("/DMOZ");
$("input[name='source']").removeAttr("disabled");
$("input[name='source']").setValue("Top/");
alert("The DMOZ RDF dump is exspected on your YaCy peer at DATA/WORK/content.rdf.u8.gz" +
"\nYou can download the file from http://rdf.dmoz.org/rdf/content.rdf.u8.gz (ca. 320 MB)." +
"\n\nPlease check http://www.dmoz.org/license.html before you import any DMOZ data into YaCy!" +
"\n\nDue to the large number of links contained in the dmoz file it is recommended" +
"\nto limit the import volume with an appropriate value for the source folder (e.g. Top/Games).")
} else {
$("input[name='bmkfile']").removeAttr("disabled");
$("input[name='root']").removeAttr("disabled");
$("input[name='root']").setValue("/Imported Bookmarks");
$("input[name='source']").attr("disabled","disabled");
$("input[name='source']").setValue("");
}
});
@ -155,6 +167,38 @@ $(document).ready(function() {
minWidth: 200,
maxWidth: 200,
header: "",
multiple: false,
selectedList: 1
});
$("#ymarks_importer").multiselect({
noneSelectedText: "Select an Importer ...",
minWidth: 200,
maxWidth: 200,
header: "",
multiple: false,
selectedList: 1
});
$("#ymarks_autotag").multiselect({
noneSelectedText: "Select an option ...",
minWidth: 200,
maxWidth: 200,
header: "",
multiple: false,
selectedList: 1
});
$("#ymarks_indexing").multiselect({
position: {
my: 'left bottom',
at: 'left top'
},
noneSelectedText: "Select an option ...",
minWidth: 200,
maxWidth: 200,
header: "",
multiple: false,
selectedList: 1
});
@ -254,12 +298,12 @@ function loadTagCloud() {
};
function loadTreeView() {
$("#ymarks_treeview").empty();
$("#ymarks_treeview").empty();
$("#ymarks_treeview").treeview({
url: "/api/ymarks/get_treeview.json?bmtype=href",
unique: true,
unique: false,
persist: "location"
});
});
$("#ymarks_treeview").bind("click", function(event) {
if ($(event.target).is("li") || $(event.target).parents("li").length) {
@ -270,7 +314,8 @@ function loadTreeView() {
newp: 1
});
$('#ymarks_flexigrid').flexReload();
return false;
}
}
return false;
});
return false;
}

@ -0,0 +1,102 @@
package de.anomic.data.ymark;
import java.io.FilterReader;
import java.io.IOException;
import java.io.Reader;
import java.nio.CharBuffer;
import javax.swing.event.ChangeEvent;
import javax.swing.event.ChangeListener;
/**
* This class monitors the read progress
*
*/
public class MonitoredReader extends FilterReader {
private volatile long mark = 0;
private volatile long location = 0;
private final int threshold;
private final long maxProgress;
private long lastTriggeredLocation = 0;
private ChangeListener listener = null;
public MonitoredReader(Reader in, int threshold, long maxProgress) {
super(in);
this.threshold = threshold;
this.maxProgress = maxProgress;
}
public void addChangeListener(ChangeListener l) {
this.listener = l;
}
protected void triggerChanged(final long location) {
if ( threshold > 0 && Math.abs( location-lastTriggeredLocation ) < threshold )
return;
lastTriggeredLocation = location;
if (listener == null)
return;
listener.stateChanged(new ChangeEvent(this));
}
public long getProgress() {
return this.location;
}
public long maxProgress() {
return this.maxProgress;
}
@Override
public int read() throws IOException {
final int i = super.read();
if ( i != -1 )
triggerChanged(location++);
return i;
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
final int i = super.read(cbuf, off, len);
if ( i != -1 )
triggerChanged(location+=i);
return i;
}
@Override
public int read(char[] cbuf) throws IOException {
final int i = super.read(cbuf);
if ( i != -1 )
triggerChanged(location+=i);
return i;
}
@Override
public int read(CharBuffer target) throws IOException {
final int i = super.read(target);
if ( i != -1 )
triggerChanged(location+=i);
return i;
}
@Override
public long skip(long n) throws IOException {
final long i = super.skip(n);
if ( i != -1 )
triggerChanged(location+=i);
return i;
}
@Override
public synchronized void mark(int readlimit) throws IOException {
super.mark(readlimit);
mark = location;
}
@Override
public synchronized void reset() throws IOException {
super.reset();
if ( location != mark )
triggerChanged(location = mark);
}
}

@ -58,7 +58,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
this.merge = true;
}
private static Document loadDocument(final String url, final LoaderDispatcher loader) {
private static Document loadDocument(final String url, final LoaderDispatcher loader) throws IOException {
DigestURI uri;
Response response;
try {
@ -67,12 +67,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
Log.logWarning(YMarkTables.BOOKMARKS_LOG, "loadDocument failed due to malformed url: "+url);
return null;
}
try {
response = loader.load(loader.request(uri, true, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, null, TextSnippet.snippetMinLoadDelay);
} catch (final IOException e) {
Log.logWarning(YMarkTables.BOOKMARKS_LOG, "loadDocument failed due to IOException for url: "+url);
return null;
}
response = loader.load(loader.request(uri, true, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, null, TextSnippet.snippetMinLoadDelay);
try {
return Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
} catch (final Failure e) {
@ -214,8 +209,18 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
}
public static String autoTag(final String url, final LoaderDispatcher loader, final int max, final TreeMap<String, YMarkTag> tags) {
final Document document = loadDocument(url, loader);
return (document != null) ? autoTag(document, max, tags) : "/IOExceptions";
Document document = null;
String exception = "/IOExceptions";
try {
document = loadDocument(url, loader);
} catch (IOException e) {
exception = e.getMessage();
int start = exception.indexOf('\'')+9;
int end = exception.indexOf('\'', start);
if(start >= 0 && end > 0 && start < exception.length() && end < exception.length())
exception = "/IOExceptions/" + exception.substring(start, end);
}
return (document != null) ? autoTag(document, max, tags) : exception;
}
public static boolean isDigitSpace(String str) {
@ -233,17 +238,15 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
@Override
public void run() {
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "autoTagger run()");
Thread.currentThread().setUncaughtExceptionHandler(this);
String url = null;
String tagString;
Iterator<String> tit;
try {
final TreeMap<String, YMarkTag> tags = this.ymarks.getTags(this.bmk_user);
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "autoTagger queue size: "+this.bmkQueue.size());
while((url = this.bmkQueue.take()) != POISON) {
tagString = autoTag(url, this.loader, 5, tags);
if (tagString.equals("/IOExceptions")) {
if (tagString.startsWith("/IOExceptions")) {
this.ymarks.addFolder(this.bmk_user, url, tagString);
tagString = "";
}
@ -261,7 +264,6 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
}
}
}
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "autoTagger has been poisoned");
} catch (final InterruptedException e) {
Log.logException(e);
} catch (final IOException e) {

@ -1,6 +1,6 @@
// YMarkCrawlStart.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
// (C) 2012 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2011 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
@ -33,9 +33,13 @@ import java.util.Iterator;
import java.util.regex.Pattern;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.CrawlSwitchboard;
import de.anomic.crawler.retrieval.Request;
import de.anomic.data.WorkTables;
public class YMarkCrawlStart extends HashMap<String,String>{
@ -48,6 +52,10 @@ public class YMarkCrawlStart extends HashMap<String,String>{
private String apicall_pk;
private String url;
public static enum CRAWLSTART {
SINGLE, ONE_LINK, FULL_DOMAIN
}
public YMarkCrawlStart(final WorkTables worktables) {
super();
this.date_recording = new Date(0);
@ -82,7 +90,10 @@ public class YMarkCrawlStart extends HashMap<String,String>{
}
public boolean hasSchedule() {
return !this.isEmpty() && this.date_next_exec.after(new Date());
if(!this.isEmpty() && this.date_next_exec.after(new Date()))
return true;
else
return false;
}
public boolean isRunning(final CrawlSwitchboard crawler) {
@ -158,4 +169,37 @@ public class YMarkCrawlStart extends HashMap<String,String>{
// TODO Auto-generated catch block
}
}
public static String crawlStart(
final Switchboard sb,
final DigestURI startURL,
final String urlMustMatch,
final String urlMustNotMatch,
final int depth,
final boolean crawlingQ, final boolean medialink) {
final CrawlProfile pe = new CrawlProfile(
(startURL.getHost() == null) ? startURL.toNormalform(true, false) : startURL.getHost(), null,
urlMustMatch,
urlMustNotMatch,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
depth,
medialink,
CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_PROXY_RECRAWL_CYCLE),
-1,
crawlingQ,
true, true, true, false, true, true, true,
CacheStrategy.IFFRESH,
"robot_" + CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA); // TODO: make this a default profile in CrawlSwitchboard
sb.crawler.putActive(pe.handle().getBytes(), pe);
return sb.crawlStacker.stackCrawl(new Request(
sb.peers.mySeed().hash.getBytes(),
startURL,
null,
"CRAWLING-ROOT",
new Date(),
pe.handle(), 0, 0, 0, 0
));
}
}

@ -0,0 +1,152 @@
// YMarkDMOZImporter.java
// (C) 2012 by Stefan Foerster (apfelmaennchen), sof@gmx.de, Norderstedt, Germany
// first published 2012 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import net.yacy.cora.lod.vocabulary.DMOZ;
import net.yacy.cora.lod.vocabulary.DublinCore;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class YMarkDMOZImporter extends YMarkImporter {
// Statics
public static String IMPORTER = "DMOZ";
// Importer Variables
private final XMLReader xmlReader;
private int depth;
public YMarkDMOZImporter(final MonitoredReader dmoz_file, final int queueSize, final String targetFolder, final String sourceFolder) throws SAXException {
super(dmoz_file, queueSize, sourceFolder, targetFolder);
setImporter(IMPORTER);
this.xmlReader = XMLReaderFactory.createXMLReader();
this.xmlReader.setFeature(XML_NAMESPACE_PREFIXES, false);
this.xmlReader.setFeature(XML_NAMESPACES, false);
this.xmlReader.setFeature(XML_VALIDATION, false);
this.xmlReader.setContentHandler(new DMOZParser());
this.depth = Integer.MAX_VALUE;
}
public void parse() throws Exception {
xmlReader.parse(new InputSource(bmk_file));
}
public void setDepth(int d) {
this.depth = d + this.targetFolder.split(YMarkUtil.FOLDERS_SEPARATOR).length-1;
}
public class DMOZParser extends DefaultHandler {
private YMarkEntry bmk;
private boolean isNewEntry;
private boolean isSubtopic;
private String tag;
private final StringBuilder buffer;
public DMOZParser() {
this.bmk = new YMarkEntry();
this.isNewEntry = false;
this.isSubtopic = false;
this.buffer = new StringBuilder(512);
}
public void startElement(final String uri, String localName, final String qName, final Attributes attributes) throws SAXException {
// get rid of namespace prefixes
if (localName.isEmpty()) {
localName = qName.substring(qName.indexOf(':')+1);
}
this.tag = null;
if (localName.equals(DMOZ.ExternalPage.name())) {
this.bmk = new YMarkEntry();
this.bmk.put(YMarkEntry.BOOKMARK.URL.key(), attributes.getValue(0));
this.isNewEntry = true;
}
if(isNewEntry && localName.equals(DublinCore.Title.name())) {
this.tag = YMarkEntry.BOOKMARK.TITLE.key();
}
if(isNewEntry && localName.equals(DublinCore.Description.name())) {
this.tag = YMarkEntry.BOOKMARK.DESC.key();
}
if(isNewEntry && localName.equals(DMOZ.topic.name())) {
this.tag = YMarkEntry.BOOKMARK.FOLDERS.key();
buffer.append(targetFolder);
buffer.append(YMarkUtil.FOLDERS_SEPARATOR);
}
}
public void endElement(final String uri, String localName, final String qName) throws SAXException {
// get rid of namespace prefixes
if (localName.isEmpty()) {
localName = qName.substring(qName.indexOf(':')+1);
}
if (this.isNewEntry && this.isSubtopic && localName.equals(DMOZ.ExternalPage.name())) {
try {
bookmarks.put(this.bmk);
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
this.isSubtopic = false;
this.isNewEntry = false;
}
} else if(localName.equals(DMOZ.topic.name())) {
int d = 0;
for(int i=0; i<this.buffer.length(); i++) {
if (this.buffer.charAt(i) == '/') {
d++;
if (d > depth) {
this.buffer.setLength(i);
break;
}
}
}
if (this.buffer.substring(targetFolder.length()+1).startsWith(sourceFolder)) {
this.isSubtopic = true;
this.bmk.put(this.tag, YMarkUtil.cleanFoldersString(buffer));
} else {
this.isSubtopic = false;
this.isNewEntry = false;
}
} else if (this.tag != null) {
this.bmk.put(this.tag, buffer.toString());
}
this.tag = null;
this.buffer.setLength(0);
}
public void characters(final char ch[], final int start, final int length) throws SAXException {
// no processing here, as the SAX Parser characters method could be called more than once per tag!
if(this.tag != null) {
buffer.append(ch, start, length);
}
}
}
}

@ -1,5 +1,33 @@
// YMarkEntry.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2011 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.net.MalformedURLException;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
@ -7,11 +35,15 @@ import java.util.TreeMap;
import net.yacy.document.content.DCEntry;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.crawler.CrawlProfile;
public class YMarkEntry extends TreeMap<String, String> {
private static final long serialVersionUID = 2179622977348536148L;
public static final YMarkEntry POISON = new YMarkEntry();
public static final YMarkEntry EMPTY = new YMarkEntry();
public static final String BOOKMARKS_ID = "id";
@ -19,22 +51,22 @@ public class YMarkEntry extends TreeMap<String, String> {
public static final String FOLDERS_IMPORTED = "/imported";
public static enum BOOKMARK {
// key dc_attrb dflt html_attrb xbel_attrb json_attrb type
URL ("url", "dc:identifier", "", "href", "href", "uri", "link"),
TITLE ("title", "dc:title", "", "", "", "title", "meta"),
DESC ("desc", "dc:description", "", "", "", "", "comment"),
DATE_ADDED ("date_added", "", "", "add_date", "added", "dateAdded", "date"),
DATE_MODIFIED ("date_modified", "", "", "last_modified", "modified", "lastModified", "date"),
DATE_VISITED ("date_visited", "", "", "last_visited", "visited", "", "date"),
PUBLIC ("public", "", "false", "private", "yacy:public", "", "lock"),
TAGS ("tags", "dc:subject", "unsorted", "shortcuturl", "yacy:tags", "keyword", "tag"),
VISITS ("visits", "", "0", "", "yacy:visits", "", "stat"),
FOLDERS ("folders", "", "/unsorted", "", "", "", "folder"),
FILTER ("filter", "", "", "", "yacy:filter", "", "filter"),
OAI ("oai", "", "", "", "yacy:oai", "", "oai"),
URLHASH ("urlhash", "", "", "", "yacy:urlhash", "", "urlhash"),
STARRATING ("starrating", "", "", "", "yacy:starrating", "", "stat");
// key dc_attrb dflt html_attrb xbel_attrb json_attrb type index separator
URL ("url", "dc:identifier", "", "href", "href", "uri", "link", false, YMarkUtil.EMPTY_STRING),
TITLE ("title", "dc:title", "", "", "", "title", "meta", false, YMarkUtil.EMPTY_STRING),
DESC ("desc", "dc:description", "", "", "", "", "comment", false, YMarkUtil.EMPTY_STRING),
DATE_ADDED ("date_added", "", "", "add_date", "added", "dateAdded", "date", false, YMarkUtil.EMPTY_STRING),
DATE_MODIFIED ("date_modified", "", "", "last_modified", "modified", "lastModified", "date", false, YMarkUtil.EMPTY_STRING),
DATE_VISITED ("date_visited", "", "", "last_visited", "visited", "", "date", false, YMarkUtil.EMPTY_STRING),
PUBLIC ("public", "", "false", "private", "yacy:public", "", "lock", false, YMarkUtil.EMPTY_STRING),
TAGS ("tags", "dc:subject", "unsorted", "shortcuturl", "yacy:tags", "keyword", "tag", true, YMarkUtil.TAGS_SEPARATOR),
VISITS ("visits", "", "0", "", "yacy:visits", "", "stat", false, YMarkUtil.EMPTY_STRING),
FOLDERS ("folders", "", "/unsorted", "", "", "", "folder", true, YMarkUtil.TAGS_SEPARATOR),
FILTER ("filter", "", "", "", "yacy:filter", "", "filter", false, YMarkUtil.EMPTY_STRING),
OAI ("oai", "", "", "", "yacy:oai", "", "oai", false, YMarkUtil.EMPTY_STRING),
URLHASH ("urlhash", "", "", "", "yacy:urlhash", "", "urlhash", false, YMarkUtil.EMPTY_STRING),
STARRATING ("starrating", "", "", "", "yacy:starrating", "", "stat", false, YMarkUtil.EMPTY_STRING);
private String key;
private String dc_attrb;
private String dflt;
@ -42,16 +74,23 @@ public class YMarkEntry extends TreeMap<String, String> {
private String xbel_attrb;
private String json_attrb;
private String type;
private boolean index;
private String seperator;
private static final Map<String,BOOKMARK> lookup = new HashMap<String,BOOKMARK>();
private static final Map<String,String> indexColumns = new HashMap<String,String>();
static {
for(BOOKMARK b : EnumSet.allOf(BOOKMARK.class))
lookup.put(b.key(), b);
for(BOOKMARK b : EnumSet.allOf(BOOKMARK.class)) {
lookup.put(b.key, b);
if(b.index) {
indexColumns.put(b.key, b.seperator);
}
}
}
private static StringBuilder buffer = new StringBuilder(25);
private BOOKMARK(final String k, final String d, final String s, final String a, final String x, final String j, final String t) {
private BOOKMARK(final String k, final String d, final String s, final String a, final String x, final String j, final String t, final boolean index, final String separator) {
this.key = k;
this.dc_attrb = d;
this.dflt = s;
@ -59,16 +98,21 @@ public class YMarkEntry extends TreeMap<String, String> {
this.xbel_attrb = x;
this.json_attrb = j;
this.type = t;
this.index = index;
this.seperator = separator;
}
public static Map<String,String> indexColumns() {
return Collections.unmodifiableMap(indexColumns);
}
public static BOOKMARK get(String key) {
return lookup.get(key);
public static BOOKMARK get(String key) {
return lookup.get(key);
}
public static boolean contains(String key) {
return lookup.containsKey(key);
}
public String key() {
return this.key;
}
}
public String deflt() {
return this.dflt;
}
@ -97,12 +141,19 @@ public class YMarkEntry extends TreeMap<String, String> {
public String type() {
return this.type;
}
public boolean index() {
return this.index;
}
public String seperator() {
return this.seperator;
}
}
public YMarkEntry() {
this(true);
}
public YMarkEntry(final boolean setDefaults) {
super();
if(setDefaults) {
@ -110,10 +161,11 @@ public class YMarkEntry extends TreeMap<String, String> {
setCurrentTimeMillis(BOOKMARK.DATE_MODIFIED);
setDefaults();
}
}
}
public YMarkEntry(final DCEntry dc) {
for (BOOKMARK b : BOOKMARK.values()) {
super();
for (BOOKMARK b : BOOKMARK.values()) {
if(dc.containsKey(b.dc_attrb)) {
this.put(b.key(), dc.get(b.dc_attrb));
}
@ -122,15 +174,16 @@ public class YMarkEntry extends TreeMap<String, String> {
setCurrentTimeMillis(BOOKMARK.DATE_MODIFIED);
setDefaults();
}
public YMarkEntry(final Tables.Row bmk_row) {
for (BOOKMARK b : BOOKMARK.values()) {
super();
for (BOOKMARK b : BOOKMARK.values()) {
if(bmk_row.containsKey(b.key())) {
this.put(b.key(), bmk_row.get(b.key(), b.deflt()));
}
}
}
private void setCurrentTimeMillis(BOOKMARK b) {
switch(b) {
case DATE_ADDED:
@ -140,9 +193,9 @@ public class YMarkEntry extends TreeMap<String, String> {
break;
default:
break;
}
}
}
public void setDefaults() {
for (BOOKMARK b : BOOKMARK.values()) {
if(!b.deflt().isEmpty() && !this.containsKey(b.key())) {
@ -150,7 +203,17 @@ public class YMarkEntry extends TreeMap<String, String> {
}
}
}
public byte[] getUrlHash() {
if(this.containsKey(YMarkEntry.BOOKMARK.URL.key()))
try {
return YMarkUtil.getBookmarkId(this.get(YMarkEntry.BOOKMARK.URL.key()));
} catch (MalformedURLException e) {
Log.logWarning(YMarkTables.BOOKMARKS_LOG, "getUrlHash - MalformedURLException for YMarkEntry: "+this.get(YMarkEntry.BOOKMARK.URL.key()));
}
return null;
}
public DCEntry getDCEntry() {
final DCEntry dc = new DCEntry();
for (BOOKMARK b : BOOKMARK.values()) {
@ -160,7 +223,7 @@ public class YMarkEntry extends TreeMap<String, String> {
}
return dc;
}
public Tables.Data getData() {
final Tables.Data data = new Tables.Data();
for (BOOKMARK b : BOOKMARK.values()) {
@ -172,4 +235,21 @@ public class YMarkEntry extends TreeMap<String, String> {
}
return data;
}
public void crawl(final YMarkCrawlStart.CRAWLSTART type, final boolean medialink, final Switchboard sb) throws MalformedURLException {
final DigestURI url = new DigestURI(this.get(BOOKMARK.URL.key()));
switch(type) {
case SINGLE:
YMarkCrawlStart.crawlStart(sb, url, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, 0, true, medialink);
break;
case ONE_LINK:
YMarkCrawlStart.crawlStart(sb, url, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, 1, true, medialink);
break;
case FULL_DOMAIN:
YMarkCrawlStart.crawlStart(sb, url, CrawlProfile.mustMatchFilterFullDomain(url), CrawlProfile.MATCH_NEVER_STRING, 99, false, medialink);
break;
default:
break;
}
}
}

@ -26,10 +26,6 @@
package de.anomic.data.ymark;
import java.io.IOException;
import java.io.Reader;
import java.util.concurrent.ArrayBlockingQueue;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
@ -37,17 +33,13 @@ import javax.swing.text.html.parser.ParserDelegator;
import net.yacy.kelondro.logging.Log;
public class YMarkHTMLImporter extends HTMLEditorKit.ParserCallback implements Runnable {
public class YMarkHTMLImporter extends YMarkImporter {
// Importer Variables
private final ArrayBlockingQueue<YMarkEntry> bookmarks;
private final Reader bmk_file;
private final String RootFolder;
private final StringBuilder folderstring;
private YMarkEntry bmk;
private final ParserDelegator htmlParser;
// Statics
public static String IMPORTER = "HTML";
public static enum STATE {
NOTHING,
BOOKMARK,
@ -56,153 +48,137 @@ public class YMarkHTMLImporter extends HTMLEditorKit.ParserCallback implements R
FOLDER_DESC
}
public static final String MILLIS = "000";
// Parser variables
private STATE state;
private HTML.Tag prevTag;
public YMarkHTMLImporter(final Reader bmk_file, final int queueSize, final String root) {
this.bookmarks = new ArrayBlockingQueue<YMarkEntry>(queueSize);
this.bmk_file = bmk_file;
this.RootFolder = root;
this.folderstring = new StringBuilder(YMarkTables.BUFFER_LENGTH);
this.folderstring.append(this.RootFolder);
this.bmk = new YMarkEntry();
this.htmlParser = new ParserDelegator();
this.state = STATE.NOTHING;
this.prevTag = null;
}
public void run() {
try {
this.htmlParser.parse(this.bmk_file, this, true);
} catch (IOException e) {
Log.logException(e);
} finally {
try {
this.bookmarks.put(YMarkEntry.POISON);
} catch (InterruptedException e) {
Log.logException(e);
}
try {
this.bmk_file.close();
} catch (IOException e) {
Log.logException(e);
}
}
public YMarkHTMLImporter(final MonitoredReader bmk_file, final int queueSize, final String targetFolder, final String sourceFolder) {
super(bmk_file, queueSize, targetFolder, sourceFolder);
setImporter(IMPORTER);
this.htmlParser = new ParserDelegator();
}
public void handleText(char[] data, int pos) {
switch (state) {
case NOTHING:
break;
case BOOKMARK:
this.bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), new String(data));
this.bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), this.folderstring.toString());
this.bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), YMarkEntry.BOOKMARK.PUBLIC.deflt());
this.bmk.put(YMarkEntry.BOOKMARK.VISITS.key(), YMarkEntry.BOOKMARK.VISITS.deflt());
break;
case FOLDER:
this.folderstring.append(YMarkUtil.FOLDERS_SEPARATOR);
this.folderstring.append(data);
break;
case FOLDER_DESC:
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - folder: "+this.folderstring+" desc: " + new String(data));
break;
case BMK_DESC:
this.bmk.put(YMarkEntry.BOOKMARK.DESC.key(), new String(data));
break;
default:
break;
}
public YMarkHTMLImporter (final MonitoredReader bmk_file, final int queueSize, final String targetFolder) {
this(bmk_file, queueSize, targetFolder, "");
}
public void parse() throws Exception {
htmlParser.parse(bmk_file, new HTMLParser(), true);
}
public class HTMLParser extends HTMLEditorKit.ParserCallback {
private YMarkEntry bmk;
private final StringBuilder folderstring;
private STATE state;
private HTML.Tag prevTag;
public HTMLParser() {
this.folderstring = new StringBuilder(YMarkTables.BUFFER_LENGTH);
this.folderstring.append(targetFolder);
this.bmk = new YMarkEntry();
this.state = STATE.NOTHING;
this.prevTag = null;
}
public void handleText(char[] data, int pos) {
switch (state) {
case NOTHING:
break;
case BOOKMARK:
this.bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), new String(data));
this.bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), this.folderstring.toString());
this.bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), YMarkEntry.BOOKMARK.PUBLIC.deflt());
this.bmk.put(YMarkEntry.BOOKMARK.VISITS.key(), YMarkEntry.BOOKMARK.VISITS.deflt());
break;
case FOLDER:
this.folderstring.append(YMarkUtil.FOLDERS_SEPARATOR);
this.folderstring.append(data);
break;
case FOLDER_DESC:
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - folder: "+this.folderstring+" desc: " + new String(data));
break;
case BMK_DESC:
this.bmk.put(YMarkEntry.BOOKMARK.DESC.key(), new String(data));
break;
default:
break;
}
}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
if (t == HTML.Tag.A) {
if (!this.bmk.isEmpty()) {
try {
this.bookmarks.put(this.bmk);
bmk = new YMarkEntry();
} catch (InterruptedException e) {
Log.logException(e);
}
}
final String url = (String)a.getAttribute(HTML.Attribute.HREF);
this.bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
final StringBuilder sb = new StringBuilder(255);
for (YMarkEntry.BOOKMARK bmk : YMarkEntry.BOOKMARK.values()) {
sb.setLength(0);
if (a.isDefined(bmk.html_attrb())) {
sb.append((String)a.getAttribute(bmk.html_attrb()));
Log.logInfo(YMarkTables.BOOKMARKS_LOG, bmk.key()+" : "+sb.toString());
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
if (t == HTML.Tag.A) {
if (!this.bmk.isEmpty()) {
try {
bookmarks.put(this.bmk);
bmk = new YMarkEntry();
} catch (InterruptedException e) {
Log.logException(e);
}
}
switch(bmk) {
case TAGS:
// sb already contains the mozilla shortcuturl
// add delicious.com tags that are stored in the tags attribute
if (a.isDefined(YMarkEntry.BOOKMARK.TAGS.key())) {
sb.append(YMarkUtil.TAGS_SEPARATOR);
sb.append((String)a.getAttribute(YMarkEntry.BOOKMARK.TAGS.key()));
}
this.bmk.put(bmk.key(), YMarkUtil.cleanTagsString(sb.toString()));
break;
case PUBLIC:
// look for delicious.com private attribute
if(sb.toString().equals("0"))
this.bmk.put(bmk.key(), "true");
break;
case DATE_ADDED:
case DATE_MODIFIED:
case DATE_VISITED:
sb.append(MILLIS);
this.bmk.put(bmk.key(), sb.toString());
break;
default:
break;
}
}
state = STATE.BOOKMARK;
} else if (t == HTML.Tag.H3) {
state = STATE.FOLDER;
} else if (t == HTML.Tag.DD && this.prevTag == HTML.Tag.A) {
state = STATE.BMK_DESC;
} else {
state = STATE.NOTHING;
}
this.prevTag = t;
}
final String url = (String)a.getAttribute(HTML.Attribute.HREF);
this.bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
final StringBuilder sb = new StringBuilder(255);
for (YMarkEntry.BOOKMARK bmk : YMarkEntry.BOOKMARK.values()) {
sb.setLength(0);
if (a.isDefined(bmk.html_attrb())) {
sb.append((String)a.getAttribute(bmk.html_attrb()));
Log.logInfo(YMarkTables.BOOKMARKS_LOG, bmk.key()+" : "+sb.toString());
}
switch(bmk) {
case TAGS:
// sb already contains the mozilla shortcuturl
// add delicious.com tags that are stored in the tags attribute
if (a.isDefined(YMarkEntry.BOOKMARK.TAGS.key())) {
sb.append(YMarkUtil.TAGS_SEPARATOR);
sb.append((String)a.getAttribute(YMarkEntry.BOOKMARK.TAGS.key()));
}
this.bmk.put(bmk.key(), YMarkUtil.cleanTagsString(sb.toString()));
break;
case PUBLIC:
// look for delicious.com private attribute
if(sb.toString().equals("0"))
this.bmk.put(bmk.key(), "true");
break;
case DATE_ADDED:
case DATE_MODIFIED:
case DATE_VISITED:
sb.append(MILLIS);
this.bmk.put(bmk.key(), sb.toString());
break;
default:
break;
}
}
state = STATE.BOOKMARK;
} else if (t == HTML.Tag.H3) {
state = STATE.FOLDER;
} else if (t == HTML.Tag.DD && this.prevTag == HTML.Tag.A) {
state = STATE.BMK_DESC;
} else {
state = STATE.NOTHING;
}
this.prevTag = t;
}
public void handleEndTag(HTML.Tag t, int pos) {
// write the last bookmark, as no more <a> tags are following
if (t == HTML.Tag.HTML) {
if (!this.bmk.isEmpty()) {
try {
this.bookmarks.put(this.bmk);
} catch (InterruptedException e) {
Log.logException(e);
public void handleEndTag(HTML.Tag t, int pos) {
// write the last bookmark, as no more <a> tags are following
if (t == HTML.Tag.HTML) {
if (!this.bmk.isEmpty()) {
try {
bookmarks.put(this.bmk);
} catch (InterruptedException e) {
Log.logException(e);
}
}
}
if (t == HTML.Tag.H3) {
state = STATE.FOLDER_DESC;
} else if (t == HTML.Tag.DL) {
//TODO: get rid of .toString.equals()
if(!this.folderstring.toString().equals(targetFolder)) {
folderstring.setLength(folderstring.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
} else {
state = STATE.NOTHING;
}
}
if (t == HTML.Tag.H3) {
state = STATE.FOLDER_DESC;
} else if (t == HTML.Tag.DL) {
//TODO: get rid of .toString.equals()
if(!this.folderstring.toString().equals(this.RootFolder)) {
folderstring.setLength(folderstring.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
} else {
state = STATE.NOTHING;
}
}
public YMarkEntry take() {
try {
return this.bookmarks.take();
} catch (InterruptedException e) {
Log.logException(e);
return null;
}
}
}

@ -0,0 +1,156 @@
// YMarkImporter.java
// (C) 2012 by Stefan Foerster (apfelmaennchen), sof@gmx.de, Norderstedt, Germany
// first published 2012 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.io.IOException;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
public abstract class YMarkImporter implements Runnable {
// Statics
public final static String XML_NAMESPACE_PREFIXES = "http://xml.org/sax/features/namespace-prefixes";
public final static String XML_NAMESPACES = "http://xml.org/sax/features/namespaces";
public final static String XML_VALIDATION = "http://xml.org/sax/features/validation";
protected String importer;
protected ArrayBlockingQueue<YMarkEntry> bookmarks;
protected final MonitoredReader bmk_file;
protected final String targetFolder;
protected final String sourceFolder;
public YMarkImporter(final MonitoredReader bmk_file, final int queueSize, final String sourceFolder, final String targetFolder) {
this.bookmarks = new ArrayBlockingQueue<YMarkEntry>(queueSize);
this.bmk_file = bmk_file;
this.sourceFolder = YMarkUtil.cleanFoldersString(sourceFolder);
this.targetFolder = YMarkUtil.cleanFoldersString(targetFolder);
}
@Override
public void run() {
try {
parse();
} catch (Exception e) {
Log.logException(e);
} finally {
try {
Log.logInfo(YMarkTables.BOOKMARKS_LOG, this.importer+" Importer inserted poison pill in queue");
this.bookmarks.put(YMarkEntry.POISON);
} catch (InterruptedException e1) {
Log.logException(e1);
}
}
}
public YMarkEntry take() {
try {
return this.bookmarks.take();
} catch (InterruptedException e) {
Log.logException(e);
return null;
}
}
public void setImporter(final String importer) {
this.importer = importer;
}
public long getProgress() {
return this.bmk_file.getProgress();
}
public long maxProgress() {
return this.bmk_file.maxProgress();
}
public abstract void parse() throws Exception;
public Consumer getConsumer(final Switchboard sb, final String bmk_user, final ArrayBlockingQueue<String> autoTaggingQueue,
final boolean autotag, final boolean empty, final String indexing, final boolean medialink) {
return new Consumer(sb, bmk_user, autoTaggingQueue, autotag, empty, indexing, medialink);
}
public class Consumer implements Runnable {
private final Switchboard sb;
private final String bmk_user;
private final ArrayBlockingQueue<String> autoTaggingQueue;
private final String indexing;
private final boolean autotag;
private final boolean empty;
private final boolean medialink;
public Consumer(final Switchboard sb, final String bmk_user, final ArrayBlockingQueue<String> autoTaggingQueue,
final boolean autotag, final boolean empty, final String indexing, final boolean medialink) {
this.sb = sb;
this.bmk_user = bmk_user;
this.autoTaggingQueue = autoTaggingQueue;
this.autotag = autotag;
this.empty = empty;
this.indexing = indexing;
this.medialink = medialink;
}
@Override
public void run() {
YMarkEntry bmk;
while ((bmk = take()) != YMarkEntry.POISON) {
try {
final String url = bmk.get(YMarkEntry.BOOKMARK.URL.key());
// other protocols could cause problems
if(url != null && url.startsWith("http")) {
this.sb.tables.bookmarks.addBookmark(this.bmk_user, bmk, true, true);
if(this.autotag) {
if(!this.empty) {
this.autoTaggingQueue.put(url);
} else if(!bmk.containsKey(YMarkEntry.BOOKMARK.TAGS.key()) || bmk.get(YMarkEntry.BOOKMARK.TAGS.key()).equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
this.autoTaggingQueue.put(url);
}
}
// fill crawler
if (this.indexing.equals("single")) {
bmk.crawl(YMarkCrawlStart.CRAWLSTART.SINGLE, this.medialink, this.sb);
} else if (this.indexing.equals("onelink")) {
bmk.crawl(YMarkCrawlStart.CRAWLSTART.ONE_LINK, this.medialink, this.sb);
} else if (this.indexing.equals("fulldomain")) {
bmk.crawl(YMarkCrawlStart.CRAWLSTART.FULL_DOMAIN, this.medialink, this.sb);
}
}
} catch (final IOException e) {
Log.logException(e);
} catch (final InterruptedException e) {
Log.logException(e);
}
}
if(this.autotag) {
try {
this.autoTaggingQueue.put(YMarkAutoTagger.POISON);
Log.logInfo(YMarkTables.BOOKMARKS_LOG, YMarkImporter.this.importer+" inserted poison pill into autoTagging queue");
} catch (final InterruptedException e) {
Log.logException(e);
}
}
}
}
}

@ -0,0 +1,161 @@
package de.anomic.data.ymark;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.lod.vocabulary.AnnoteaA;
import net.yacy.cora.lod.vocabulary.AnnoteaB;
import net.yacy.cora.lod.vocabulary.DCElements;
import net.yacy.cora.lod.vocabulary.Rdf;
import net.yacy.kelondro.blob.Tables;
import com.hp.hpl.jena.rdf.model.Bag;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Resource;
public class YMarkRDF {
public final Model model;
public final static String USER = "USER";
public final static String TYPE = "TYPE";
public final static String SUBTOPIC = "SUBTOPIC";
private final Map<String, Property> property;
public final static String BOOKMARK = "/YMarks.rdf?id=";
private final StringBuilder resourceURI;
private final int len;
public YMarkRDF(final String peerURI) {
this.model = ModelFactory.createDefaultModel();
this.property = new HashMap<String, Property>();
this.len = peerURI.length()+BOOKMARK.length();
this.resourceURI = new StringBuilder(len+20);
this.resourceURI.append(peerURI);
this.resourceURI.append(BOOKMARK);
model.setNsPrefix(Rdf.PREFIX, Rdf.IDENTIFIER);
model.setNsPrefix(DCElements.PREFIX, DCElements.IDENTIFIER);
model.setNsPrefix(AnnoteaA.PREFIX, AnnoteaA.NAMESPACE);
model.setNsPrefix(AnnoteaB.PREFIX, AnnoteaB.NAMESPACE);
this.property.put(YMarkEntry.BOOKMARK.URL.key(), this.model.createProperty(AnnoteaB.recalls.getNamespace(), AnnoteaB.recalls.name()));
this.property.put(YMarkEntry.BOOKMARK.FOLDERS.key(), this.model.createProperty(AnnoteaB.hasTopic.getNamespace(), AnnoteaB.hasTopic.name()));
this.property.put(YMarkEntry.BOOKMARK.TITLE.key(), this.model.createProperty(DCElements.title.getNamespace(), DCElements.title.name()));
this.property.put(YMarkEntry.BOOKMARK.DESC.key(), this.model.createProperty(DCElements.description.getNamespace(), DCElements.description.name()));
this.property.put(YMarkEntry.BOOKMARK.DATE_ADDED.key(), this.model.createProperty(AnnoteaA.created.getNamespace(), AnnoteaA.created.name()));
this.property.put(YMarkEntry.BOOKMARK.DATE_MODIFIED.key(), this.model.createProperty(DCElements.date.getNamespace(), DCElements.date.name()));
this.property.put(YMarkEntry.BOOKMARK.TAGS.key(), this.model.createProperty(DCElements.subject.getNamespace(), DCElements.subject.name()));
this.property.put(USER, this.model.createProperty(DCElements.creator.getNamespace(), DCElements.creator.name()));
this.property.put(TYPE, this.model.createProperty(Rdf.type.getNamespace(), Rdf.type.name()));
this.property.put(SUBTOPIC, this.model.createProperty(AnnoteaB.subTopicOf.getNamespace(), AnnoteaB.subTopicOf.name()));
}
/**
* @param format {RDF/XML, RDF/XML-ABBREV, N-TRIPLE, N3, N3-PP, N3-PLAIN, N3-TRIPLE, TURTLE}
* @return RDF
*/
public String getRDF(final String format) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
this.model.write(baos, format);
try {
return baos.toString("UTF-8");
} catch (UnsupportedEncodingException e) {
return new String();
}
}
public void addTopic(final String bmk_user, final String folder) {
this.resourceURI.append(bmk_user);
this.resourceURI.append(":f:");
this.resourceURI.append(UTF8.String(YMarkUtil.getKeyId(folder)));
final Resource topic = this.model.createResource(this.resourceURI.toString());
this.resourceURI.setLength(this.len);
topic.addProperty(this.property.get(YMarkEntry.BOOKMARK.DATE_MODIFIED.key()), YMarkUtil.EMPTY_STRING);
topic.addProperty(this.property.get(YMarkEntry.BOOKMARK.DATE_ADDED.key()), YMarkUtil.EMPTY_STRING);
topic.addProperty(this.property.get(USER), bmk_user);
topic.addProperty(this.property.get(YMarkEntry.BOOKMARK.DESC.key()), YMarkUtil.EMPTY_STRING);
final int i = folder.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR);
if(i>0)
topic.addProperty(this.property.get(SUBTOPIC), folder.substring(0, i));
topic.addProperty(this.property.get(YMarkEntry.BOOKMARK.TITLE.key()), folder);
topic.addProperty(this.property.get(TYPE), AnnoteaB.Topic.getPredicate());
}
public void addBookmark (final String bmk_user, final Tables.Row bmk_row) {
if(bmk_row == null || bmk_row.get(YMarkEntry.BOOKMARK.PUBLIC.key(), YMarkEntry.BOOKMARK.PUBLIC.deflt()).equals("false"))
return;
// create an annotea bookmark resource
this.resourceURI.append(bmk_user);
this.resourceURI.append(":b:");
this.resourceURI.append(UTF8.String(bmk_row.getPK()));
final Resource bmk = this.model.createResource(this.resourceURI.toString());
this.resourceURI.setLength(this.len);
// add properties
bmk.addProperty(this.property.get(USER), bmk_user);
for (final YMarkEntry.BOOKMARK b : YMarkEntry.BOOKMARK.values()) {
switch(b) {
case FOLDERS:
final String[] folders = bmk_row.get(b.key(), b.deflt()).split(b.seperator());
if(folders.length > 1) {
Bag topics = this.model.createBag();
for(String folder : folders) {
topics.add(folder);
this.addTopic(bmk_user, folder);
}
bmk.addProperty(this.property.get(b.key()), topics);
} else {
bmk.addProperty(this.property.get(b.key()), folders[0]);
this.addTopic(bmk_user, folders[0]);
}
break;
case TAGS:
final String[] tags = bmk_row.get(b.key(), b.deflt()).split(b.seperator());
if(tags.length > 1) {
Bag subjects = this.model.createBag();
for(String tag : tags) {
subjects.add(tag);
}
bmk.addProperty(this.property.get(b.key()), subjects);
} else {
bmk.addProperty(this.property.get(b.key()), tags[0]);
}
break;
case DATE_ADDED:
case DATE_MODIFIED:
final YMarkDate date = new YMarkDate(bmk_row.get(b.key()));
bmk.addProperty(this.property.get(b.key()), date.toISO8601());
break;
// these cases are inserted for better readable RDF output
case DESC:
case URL:
case TITLE:
bmk.addProperty(this.property.get(b.key()), bmk_row.get(b.key(), b.deflt()));
break;
default:
if(this.property.containsKey(b.key())) {
bmk.addProperty(this.property.get(b.key()), bmk_row.get(b.key(), b.deflt()));
}
}
}
bmk.addProperty(this.property.get(TYPE), AnnoteaB.Bookmark.getPredicate());
}
public void addBookmarks(final String bmk_user, final Iterator<Tables.Row> riter) {
while(riter.hasNext()) {
this.addBookmark(bmk_user, riter.next());
}
}
}

@ -34,9 +34,13 @@ import java.util.EnumMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.concurrent.ConcurrentHashMap;
import javax.swing.event.ChangeEvent;
import javax.swing.event.ChangeListener;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.util.SpaceExceededException;
@ -44,6 +48,7 @@ import net.yacy.document.Document;
import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.kelondro.blob.TablesColumnIndex;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.repository.LoaderDispatcher;
@ -91,22 +96,64 @@ public class YMarkTables {
public final static String USER_ADMIN = "admin";
public final static String USER_AUTHENTICATE_MSG = "Bookmark user authentication required!";
public final static String p1 = "(?:^|.*,)";
public final static String p4 = "(?:,.*|$)";
public final static String p5 = "((?:";
public final static String p6 = ")(?:,.*|$)){";
public final static String p7 = "/.*)";
public final static String p8 = "(?:,|$)";
public final static int BUFFER_LENGTH = 256;
private final WorkTables worktables;
private final Map<String, ChangeListener> progressListeners;
public boolean dirty = false;
public YMarkTables(final Tables wt) {
this.worktables = (WorkTables)wt;
this.dirty = true;
this.progressListeners = new ConcurrentHashMap<String, ChangeListener>();
this.buildIndex();
}
public ChangeListener getProgressListener(String thread) {
final ChangeListener l = new ProgressListener();
this.progressListeners.put(thread, l);
return l;
}
public void removeProgressListener(String thread) {
this.progressListeners.remove(thread);
}
public class ProgressListener implements ChangeListener {
// the progress in %
private int progress = 0;
@Override
public void stateChanged(ChangeEvent e) {
final MonitoredReader mreader = (MonitoredReader)e.getSource();
this.progress = (int)((mreader.getProgress() / mreader.maxProgress())*100);
}
public int progress() {
return this.progress;
}
}
public void buildIndex() {
final Iterator<String> iter = this.worktables.iterator();
while(iter.hasNext()) {
final String bmk_table = iter.next();
if(bmk_table.endsWith(TABLES.BOOKMARKS.basename())) {
try {
final long time = System.currentTimeMillis();
final TablesColumnIndex index = this.worktables.getIndex(bmk_table);
if(index.getType() == TablesColumnIndex.INDEXTYPE.RAM || index.size() == 0) {
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "buildIndex() "+YMarkEntry.BOOKMARK.indexColumns().keySet().toString());
index.buildIndex(YMarkEntry.BOOKMARK.indexColumns(), this.worktables.iterator(bmk_table));
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "build "+index.getType().name()+" index for columns "+YMarkEntry.BOOKMARK.indexColumns().keySet().toString()
+" of table "+bmk_table+" containing "+this.worktables.size(bmk_table)+ " bookmarks"
+" ("+(System.currentTimeMillis()-time)+"ms)");
}
} catch (IOException e) {
Log.logException(e);
} catch (Exception e) {
Log.logException(e);
}
}
}
}
public void deleteBookmark(final String bmk_user, final byte[] urlHash) throws IOException, SpaceExceededException {
@ -116,11 +163,18 @@ public class YMarkTables {
if(bmk_row != null) {
this.worktables.delete(bmk_table,urlHash);
}
this.dirty = true;
if(this.worktables.hasIndex(bmk_table, YMarkEntry.BOOKMARK.FOLDERS.key())) {
try {
this.worktables.getIndex(bmk_table).delete(urlHash);
} catch (Exception e) {
// nothing to do
}
}
}
public void deleteBookmark(final String bmk_user, final String url) throws IOException, SpaceExceededException {
this.deleteBookmark(bmk_user, YMarkUtil.getBookmarkId(url));
final byte[] urlHash = YMarkUtil.getBookmarkId(url);
this.deleteBookmark(bmk_user, urlHash);
}
public TreeMap<String, YMarkTag> getTags(final Iterator<Row> rowIterator) {
@ -147,32 +201,65 @@ public class YMarkTables {
public TreeMap<String, YMarkTag> getTags(final String bmk_user) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final TreeMap<String,YMarkTag> tags = getTags(this.worktables.iterator(bmk_table));
return tags;
final TreeMap<String,YMarkTag> tags = new TreeMap<String,YMarkTag>();
if(this.worktables.hasIndex(bmk_table, YMarkEntry.BOOKMARK.TAGS.key())) {
try {
final TablesColumnIndex index = this.worktables.getIndex(bmk_table);
final Iterator<String> iter = index.keySet(YMarkEntry.BOOKMARK.TAGS.key()).iterator();
while(iter.hasNext()) {
final String tag = iter.next();
tags.put(tag, new YMarkTag(tag, index.get(YMarkEntry.BOOKMARK.TAGS.key(), tag).size()));
}
return tags;
} catch (Exception e) {
// nothing to do
}
}
return getTags(this.worktables.iterator(bmk_table));
}
public TreeSet<String> getFolders(final String bmk_user, final String root) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
public TreeSet<String> getFolders(final String bmk_user, String root) throws IOException {
final TreeSet<String> folders = new TreeSet<String>();
final StringBuilder path = new StringBuilder(200);
final StringBuffer patternBuilder = new StringBuffer(BUFFER_LENGTH);
patternBuilder.setLength(0);
patternBuilder.append(p1);
patternBuilder.append('(');
patternBuilder.append(root);
patternBuilder.append(p7);
patternBuilder.append(p8);
final Pattern r = Pattern.compile(patternBuilder.toString());
final Iterator<Tables.Row> bit = this.worktables.iterator(bmk_table, YMarkEntry.BOOKMARK.FOLDERS.key(), r);
Tables.Row bmk_row = null;
final StringBuilder path = new StringBuilder(BUFFER_LENGTH);
final String r = root + YMarkUtil.FOLDERS_SEPARATOR;
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
// if exists, try the index first
if(this.worktables.hasIndex(bmk_table, YMarkEntry.BOOKMARK.FOLDERS.key())) {
TablesColumnIndex index;
try {
index = this.worktables.getIndex(bmk_table);
final Iterator<String> fiter = index.keySet(YMarkEntry.BOOKMARK.FOLDERS.key()).iterator();
while(fiter.hasNext()) {
final String folder = fiter.next();
if(folder.startsWith(r)) {
path.setLength(0);
path.append(folder);
while(path.length() > 0 && !path.toString().equals(root)){
final String p = path.toString();
if(folders.isEmpty() || !p.equals(folders.floor(p))) {
folders.add(p);
}
path.setLength(path.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
}
}
if (!root.equals(YMarkTables.FOLDERS_ROOT)) { folders.add(root); }
return folders;
} catch (Exception e) {
Log.logException(e);
}
}
// by default iterate all bookmarks and extract folder information
final Iterator<Tables.Row> bit = this.worktables.iterator(bmk_table);
Tables.Row bmk_row = null;
while(bit.hasNext()) {
bmk_row = bit.next();
if(bmk_row.containsKey(YMarkEntry.BOOKMARK.FOLDERS.key())) {
final String[] folderArray = (new String(bmk_row.get(YMarkEntry.BOOKMARK.FOLDERS.key()),"UTF8")).split(YMarkUtil.TAGS_SEPARATOR);
for (final String folder : folderArray) {
if(folder.length() > root.length() && folder.substring(0, root.length()+1).equals(root+'/')) {
if(folder.length() > root.length() && folder.substring(0, root.length()+1).equals(r)) {
if(!folders.contains(folder)) {
path.setLength(0);
path.append(folder);
@ -186,50 +273,23 @@ public class YMarkTables {
}
}
}
if (!root.equals(YMarkTables.FOLDERS_ROOT)) { folders.add(root); }
return folders;
if (!root.equals(YMarkTables.FOLDERS_ROOT)) { folders.add(root); }
return folders;
}
public Iterator<Tables.Row> getBookmarksByFolder(final String bmk_user, final String folder) throws IOException {
public int getSize(final String bmk_user) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final StringBuilder patternBuilder = new StringBuilder(BUFFER_LENGTH);
patternBuilder.setLength(0);
patternBuilder.append(p1);
patternBuilder.append('(');
patternBuilder.append(Pattern.quote(folder));
patternBuilder.append(')');
patternBuilder.append(p4);
final Pattern p = Pattern.compile(patternBuilder.toString());
return this.worktables.iterator(bmk_table, YMarkEntry.BOOKMARK.FOLDERS.key(), p);
return this.worktables.size(bmk_table);
}
public Iterator<Tables.Row> getBookmarksByTag(final String bmk_user, final String[] tagArray) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final StringBuilder patternBuilder = new StringBuilder(BUFFER_LENGTH);
patternBuilder.setLength(0);
patternBuilder.append(p1);
patternBuilder.append(p5);
for (final String tag : tagArray) {
patternBuilder.append(Pattern.quote(tag));
patternBuilder.append('|');
}
patternBuilder.deleteCharAt(patternBuilder.length()-1);
patternBuilder.append(p6);
patternBuilder.append(tagArray.length);
patternBuilder.append('}');
final Pattern p = Pattern.compile(patternBuilder.toString(), Pattern.CASE_INSENSITIVE);
return this.worktables.iterator(bmk_table, YMarkEntry.BOOKMARK.TAGS.key(), p);
public Iterator<Tables.Row> getBookmarksByFolder(final String bmk_user, final String foldersString) {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
return this.worktables.getByIndex(bmk_table, YMarkEntry.BOOKMARK.FOLDERS.key(), YMarkEntry.BOOKMARK.FOLDERS.seperator(), foldersString);
}
public Iterator<Tables.Row> getBookmarksByTag(final String bmk_user, String regex) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final StringBuilder patternBuilder = new StringBuilder(BUFFER_LENGTH);
patternBuilder.setLength(0);
patternBuilder.append(regex);
final Pattern p = Pattern.compile(patternBuilder.toString(), Pattern.CASE_INSENSITIVE);
return this.worktables.iterator(bmk_table, YMarkEntry.BOOKMARK.TAGS.key(), p);
public Iterator<Tables.Row> getBookmarksByTag(final String bmk_user, final String tagsString) {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
return this.worktables.getByIndex(bmk_table, YMarkEntry.BOOKMARK.TAGS.key(), YMarkEntry.BOOKMARK.TAGS.seperator(), tagsString);
}
public List<Row> orderBookmarksBy(final Iterator<Row> rowIterator, final String sortname, final String sortorder) {
@ -256,6 +316,7 @@ public class YMarkTables {
}
public void replaceTags(final Iterator<Row> rowIterator, final String bmk_user, final String tagString, final String replaceString) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final HashSet<String> remove = YMarkUtil.keysStringToSet(YMarkUtil.cleanTagsString(tagString.toLowerCase()));
final StringBuilder t = new StringBuilder(200);
HashSet<String> tags;
@ -270,7 +331,14 @@ public class YMarkTables {
t.append(YMarkUtil.TAGS_SEPARATOR);
t.append(replaceString);
row.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(t.toString()));
this.worktables.update(TABLES.BOOKMARKS.tablename(bmk_user), row);
this.worktables.update(bmk_table, row);
if(this.worktables.hasIndex(bmk_table)) {
try {
this.worktables.getIndex(bmk_table).update(YMarkEntry.BOOKMARK.TAGS.key(), YMarkEntry.BOOKMARK.TAGS.seperator(), row);
} catch (Exception e) {
// nothing to do
}
}
}
this.dirty = true;
}
@ -361,6 +429,12 @@ public class YMarkTables {
bmk.put(YMarkEntry.BOOKMARK.DATE_MODIFIED.key(), date);
}
this.worktables.insert(bmk_table, urlHash, bmk.getData());
try {
if(this.worktables.hasIndex(bmk_table))
this.worktables.getIndex(bmk_table).add(YMarkEntry.BOOKMARK.indexColumns(), bmk, urlHash);
} catch (Exception e) {
// nothing to do
}
} else {
// modify and update existing entry
HashSet<String> oldSet;
@ -417,6 +491,12 @@ public class YMarkTables {
}
// update bmk_table
this.worktables.update(bmk_table, bmk_row);
try {
if(this.worktables.hasIndex(bmk_table))
this.worktables.getIndex(bmk_table).update(YMarkEntry.BOOKMARK.indexColumns(), bmk_row);
} catch (Exception e) {
// nothing to do
}
}
this.dirty = true;

@ -1,6 +1,6 @@
// YMarkUtil.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
// (C) 2011 by Stefan Foerster, sof@gmx.de, Norderstedt, Germany
// first published 2011 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
@ -30,12 +30,14 @@ import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Iterator;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
public class YMarkUtil {
public final static String TAGS_SEPARATOR = ",";
public final static String FOLDERS_SEPARATOR = "/";
public final static String SPACE = " ";
public final static String EMPTY_STRING = new String();
/**
@ -60,7 +62,7 @@ public class YMarkUtil {
}
public final static byte[] keySetToBytes(final HashSet<String> urlSet) {
return keySetToString(urlSet).getBytes();
return UTF8.getBytes(keySetToString(urlSet));
}
public final static String keySetToString(final HashSet<String> urlSet) {
@ -106,17 +108,21 @@ public class YMarkUtil {
ts.deleteCharAt(0);
if (ts.length()>0 && ts.charAt(ts.length()-1) == TAGS_SEPARATOR.charAt(0))
ts.deleteCharAt(ts.length()-1);
return ts.toString();
return new String(ts);
}
public final static String cleanFoldersString(final String foldersString) {
return cleanFoldersString(foldersString, YMarkUtil.EMPTY_STRING);
}
public final static String cleanFoldersString(final String foldersString, final String dflt) {
if(foldersString.isEmpty())
if(foldersString.isEmpty()) {
return dflt;
StringBuilder fs = new StringBuilder(cleanTagsString(foldersString));
}
return cleanFoldersString(new StringBuilder(cleanTagsString(foldersString)));
}
public final static String cleanFoldersString(final StringBuilder fs) {
if(fs.length() == 0)
return YMarkEntry.BOOKMARK.FOLDERS.deflt();
for (int i = 0; i < fs.length()-1; i++) {
@ -132,7 +138,7 @@ public class YMarkUtil {
}
if (fs.charAt(fs.length()-1) == FOLDERS_SEPARATOR.charAt(0)) {
fs.deleteCharAt(fs.length()-1);
}
return fs.toString();
}
return new String(fs);
}
}

@ -1,5 +1,5 @@
// YMarkXBELImporter.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// (C) 2011 by Stefan Foerster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -26,35 +26,24 @@
package de.anomic.data.ymark;
import java.io.IOException;
import java.io.Reader;
import java.text.ParseException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.logging.Log;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class YMarkXBELImporter extends DefaultHandler implements Runnable {
// Importer Variables
private final ArrayBlockingQueue<YMarkEntry> bookmarks;
private final Reader bmk_file;
private final String RootFolder;
private final StringBuilder folderstring;
private YMarkEntry bmk;
private final XMLReader xmlReader;
// Statics
public static enum XBEL {
public class YMarkXBELImporter extends YMarkImporter {
// Statics
public static String IMPORTER = "XBEL";
public static enum XBEL {
NOTHING (""),
XBEL ("<xbel"),
TITLE ("<title"),
@ -65,10 +54,10 @@ public class YMarkXBELImporter extends DefaultHandler implements Runnable {
ALIAS ("<alias"),
INFO ("<info"),
METADATA ("<metadata");
private static StringBuilder buffer = new StringBuilder(25);
private static StringBuilder buffer = new StringBuilder(25);
private String tag;
private XBEL(String t) {
this.tag = t;
}
@ -77,9 +66,9 @@ public class YMarkXBELImporter extends DefaultHandler implements Runnable {
}
public String endTag(boolean empty) {
buffer.setLength(0);
buffer.append(this.tag);
buffer.append(tag);
if(empty) {
buffer.append('/');
buffer.append('/');
} else {
buffer.insert(1, '/');
}
@ -88,232 +77,217 @@ public class YMarkXBELImporter extends DefaultHandler implements Runnable {
}
public String startTag(boolean att) {
buffer.setLength(0);
buffer.append(this.tag);
buffer.append(tag);
if(!att)
buffer.append('>');
return buffer.toString();
}
}
// Parser Variables
private final HashMap<String,YMarkEntry> bmkRef;
private final HashSet<YMarkEntry> aliasRef;
private final StringBuilder buffer;
private final StringBuilder folder;
private YMarkEntry ref;
private XBEL outer_state; // BOOKMARK, FOLDER, NOTHING
private XBEL inner_state; // DESC, TITLE, INFO, ALIAS, (METADATA), NOTHING
private boolean parse_value;
public YMarkXBELImporter (final Reader bmk_file, final int queueSize, final String root) throws SAXException {
this.bookmarks = new ArrayBlockingQueue<YMarkEntry>(queueSize);
this.bmk_file = bmk_file;
this.RootFolder = root;
this.folderstring = new StringBuilder(YMarkTables.BUFFER_LENGTH);
this.folderstring.append(this.RootFolder);
this.bmk = new YMarkEntry();
this.xmlReader = XMLReaderFactory.createXMLReader();
this.xmlReader.setContentHandler(this);
this.xmlReader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
this.xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
this.xmlReader.setFeature("http://xml.org/sax/features/validation", false);
this.bmkRef = new HashMap<String,YMarkEntry>();
this.aliasRef = new HashSet<YMarkEntry>();
this.buffer = new StringBuilder();
this.folder = new StringBuilder(YMarkTables.BUFFER_LENGTH);
this.folder.append(this.RootFolder);
}
@Override
public void run() {
try {
this.xmlReader.parse(new InputSource(this.bmk_file));
} catch (SAXParseException e) {
Log.logException(e);
} catch (SAXException e) {
Log.logException(e);
} catch (IOException e) {
Log.logException(e);
} finally {
try {
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "XBEL Importer inserted poison pill in queue");
this.bookmarks.put(YMarkEntry.POISON);
} catch (InterruptedException e1) {
Log.logException(e1);
}
}
}
@Override
public void endDocument() throws SAXException {
// put alias references in the bookmark queue to ensure that folders get updated
// we do that at endDocument to ensure all referenced bookmarks already exist
this.bookmarks.addAll(this.aliasRef);
this.aliasRef.clear();
this.bmkRef.clear();
}
@Override
public void startElement(final String uri, final String name, String tag, final Attributes atts) throws SAXException {
YMarkDate date = new YMarkDate();
if (tag == null) return;
tag = tag.toLowerCase();
if (XBEL.BOOKMARK.tag().equals(tag)) {
this.bmk = new YMarkEntry();
this.bmk.put(YMarkEntry.BOOKMARK.URL.key(), atts.getValue(uri, YMarkEntry.BOOKMARK.URL.xbel_attrb()));
//TODO: include a dynamic loop over all annotation tags
this.bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), atts.getValue(uri, YMarkEntry.BOOKMARK.TAGS.xbel_attrb()));
this.bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), atts.getValue(uri, YMarkEntry.BOOKMARK.PUBLIC.xbel_attrb()));
this.bmk.put(YMarkEntry.BOOKMARK.VISITS.key(), atts.getValue(uri, YMarkEntry.BOOKMARK.VISITS.xbel_attrb()));
try {
date.parseISO8601(atts.getValue(uri, YMarkEntry.BOOKMARK.DATE_ADDED.xbel_attrb()));
} catch (ParseException e) {
// TODO: exception handling
}
this.bmk.put(YMarkEntry.BOOKMARK.DATE_ADDED.key(), date.toString());
try {
date.parseISO8601(atts.getValue(uri, YMarkEntry.BOOKMARK.DATE_VISITED.xbel_attrb()));
} catch (ParseException e) {
// TODO: exception handling
}
this.bmk.put(YMarkEntry.BOOKMARK.DATE_VISITED.key(), date.toString());
try {
date.parseISO8601(atts.getValue(uri, YMarkEntry.BOOKMARK.DATE_MODIFIED.xbel_attrb()));
} catch (ParseException e) {
// TODO: exception handling
}
this.bmk.put(YMarkEntry.BOOKMARK.DATE_MODIFIED.key(), date.toString());
UpdateBmkRef(atts.getValue(uri, YMarkEntry.BOOKMARKS_ID), true);
this.outer_state = XBEL.BOOKMARK;
this.inner_state = XBEL.NOTHING;
this.parse_value = false;
} else if(XBEL.FOLDER.tag().equals(tag)) {
this.outer_state = XBEL.FOLDER;
this.inner_state = XBEL.NOTHING;
} else if (XBEL.DESC.tag().equals(tag)) {
this.inner_state = XBEL.DESC;
this.parse_value = true;
} else if (XBEL.TITLE.tag().equals(tag)) {
this.inner_state = XBEL.TITLE;
this.parse_value = true;
} else if (XBEL.INFO.tag().equals(tag)) {
this.inner_state = XBEL.INFO;
this.parse_value = false;
} else if (XBEL.METADATA.tag().equals(tag)) {
// Support for old YaCy BookmarksDB XBEL Metadata (non valid XBEL)
if(this.outer_state == XBEL.BOOKMARK) {
final boolean isMozillaShortcutURL = atts.getValue(uri, "owner").equals("Mozilla") && !atts.getValue(uri, "ShortcutURL").isEmpty();
final boolean isYacyPublic = atts.getValue(uri, "owner").equals("YaCy") && !atts.getValue(uri, "public").isEmpty();
if(isMozillaShortcutURL)
this.bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(atts.getValue(uri, "ShortcutURL")));
if(isYacyPublic)
this.bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), atts.getValue(uri, "public"));
}
} else if (XBEL.ALIAS.tag().equals(tag)) {
final String r = atts.getValue(uri, YMarkEntry.BOOKMARKS_REF);
UpdateBmkRef(r, false);
this.aliasRef.add(this.bmkRef.get(r));
}
else {
this.outer_state = XBEL.NOTHING;
this.inner_state = XBEL.NOTHING;
this.parse_value = false;
}
}
@Override
public void endElement(final String uri, final String name, String tag) {
if (tag == null) return;
tag = tag.toLowerCase();
if(XBEL.BOOKMARK.tag().equals(tag)) {
// write bookmark
if (!this.bmk.isEmpty()) {
this.bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), this.folder.toString());
try {
this.bookmarks.put(this.bmk);
this.bmk = new YMarkEntry();
} catch (InterruptedException e) {
Log.logException(e);
// Importer Variables
private final XMLReader xmlReader;
public YMarkXBELImporter (final MonitoredReader bmk_file, final int queueSize, final String targetFolder, final String sourceFolder) throws SAXException {
super(bmk_file, queueSize, targetFolder, sourceFolder);
setImporter(IMPORTER);
this.xmlReader = XMLReaderFactory.createXMLReader();
this.xmlReader.setFeature(XML_NAMESPACE_PREFIXES, false);
this.xmlReader.setFeature(XML_NAMESPACES, false);
this.xmlReader.setFeature(XML_VALIDATION, false);
this.xmlReader.setContentHandler(new XBELParser());
}
public YMarkXBELImporter (final MonitoredReader bmk_file, final int queueSize, final String targetFolder) throws SAXException {
this(bmk_file, queueSize, "", targetFolder);
}
public void parse() throws Exception {
xmlReader.parse(new InputSource(bmk_file));
}
public class XBELParser extends DefaultHandler {
// Parser Variables
private final StringBuilder folderstring;
private final HashMap<String,YMarkEntry> bmkRef;
private final HashSet<YMarkEntry> aliasRef;
private final StringBuilder buffer;
private final StringBuilder folder;
private YMarkEntry bmk;
private YMarkEntry ref;
private XBEL outer_state; // BOOKMARK, FOLDER, NOTHING
private XBEL inner_state; // DESC, TITLE, INFO, ALIAS, (METADATA), NOTHING
private boolean parse_value;
public XBELParser() {
this.folderstring = new StringBuilder(YMarkTables.BUFFER_LENGTH);
this.folderstring.append(targetFolder);
this.bmk = new YMarkEntry();
this.bmkRef = new HashMap<String,YMarkEntry>();
this.aliasRef = new HashSet<YMarkEntry>();
this.buffer = new StringBuilder();
this.folder = new StringBuilder(YMarkTables.BUFFER_LENGTH);
this.folder.append(targetFolder);
}
public void endDocument() throws SAXException {
// put alias references in the bookmark queue to ensure that folders get updated
// we do that at endDocument to ensure all referenced bookmarks already exist
bookmarks.addAll(this.aliasRef);
this.aliasRef.clear();
this.bmkRef.clear();
}
public void startElement(final String uri, final String name, String tag, final Attributes atts) throws SAXException {
YMarkDate date = new YMarkDate();
if (tag == null) return;
tag = tag.toLowerCase();
if (XBEL.BOOKMARK.tag().equals(tag)) {
this.bmk = new YMarkEntry();
this.bmk.put(YMarkEntry.BOOKMARK.URL.key(), atts.getValue(uri, YMarkEntry.BOOKMARK.URL.xbel_attrb()));
//TODO: include a dynamic loop over all annotation tags
this.bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), atts.getValue(uri, YMarkEntry.BOOKMARK.TAGS.xbel_attrb()));
this.bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), atts.getValue(uri, YMarkEntry.BOOKMARK.PUBLIC.xbel_attrb()));
this.bmk.put(YMarkEntry.BOOKMARK.VISITS.key(), atts.getValue(uri, YMarkEntry.BOOKMARK.VISITS.xbel_attrb()));
try {
date.parseISO8601(atts.getValue(uri, YMarkEntry.BOOKMARK.DATE_ADDED.xbel_attrb()));
} catch (ParseException e) {
// TODO: exception handling
}
}
this.outer_state = XBEL.FOLDER;
} else if (XBEL.FOLDER.tag().equals(tag)) {
// go up one folder
//TODO: get rid of .toString.equals()
if(!this.folder.toString().equals(this.RootFolder)) {
this.folder.setLength(this.folder.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
this.outer_state = XBEL.FOLDER;
} else if (XBEL.INFO.tag().equals(tag)) {
this.inner_state = XBEL.NOTHING;
} else if (XBEL.METADATA.tag().equals(tag)) {
this.inner_state = XBEL.INFO;
}
}
@Override
public void characters(final char ch[], final int start, final int length) {
if (this.parse_value) {
this.buffer.append(ch, start, length);
switch(this.outer_state) {
case BOOKMARK:
switch(this.inner_state) {
case DESC:
this.bmk.put(YMarkEntry.BOOKMARK.DESC.key(), this.buffer.toString().trim());
break;
case TITLE:
this.bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), this.buffer.toString().trim());
break;
default:
break;
}
break;
case FOLDER:
switch(this.inner_state) {
case DESC:
break;
case TITLE:
this.folder.append(YMarkUtil.FOLDERS_SEPARATOR);
this.folder.append(this.buffer);
break;
default:
break;
}
break;
default:
break;
}
this.buffer.setLength(0);
this.parse_value = false;
}
}
public YMarkEntry take() {
try {
return this.bookmarks.take();
} catch (InterruptedException e) {
Log.logException(e);
return null;
}
}
this.bmk.put(YMarkEntry.BOOKMARK.DATE_ADDED.key(), date.toString());
try {
date.parseISO8601(atts.getValue(uri, YMarkEntry.BOOKMARK.DATE_VISITED.xbel_attrb()));
} catch (ParseException e) {
// TODO: exception handling
}
this.bmk.put(YMarkEntry.BOOKMARK.DATE_VISITED.key(), date.toString());
try {
date.parseISO8601(atts.getValue(uri, YMarkEntry.BOOKMARK.DATE_MODIFIED.xbel_attrb()));
} catch (ParseException e) {
// TODO: exception handling
}
this.bmk.put(YMarkEntry.BOOKMARK.DATE_MODIFIED.key(), date.toString());
UpdateBmkRef(atts.getValue(uri, YMarkEntry.BOOKMARKS_ID), true);
this.outer_state = XBEL.BOOKMARK;
this.inner_state = XBEL.NOTHING;
this.parse_value = false;
} else if(XBEL.FOLDER.tag().equals(tag)) {
this.outer_state = XBEL.FOLDER;
this.inner_state = XBEL.NOTHING;
} else if (XBEL.DESC.tag().equals(tag)) {
this.inner_state = XBEL.DESC;
this.parse_value = true;
} else if (XBEL.TITLE.tag().equals(tag)) {
this.inner_state = XBEL.TITLE;
this.parse_value = true;
} else if (XBEL.INFO.tag().equals(tag)) {
this.inner_state = XBEL.INFO;
this.parse_value = false;
} else if (XBEL.METADATA.tag().equals(tag)) {
// Support for old YaCy BookmarksDB XBEL Metadata (non valid XBEL)
if(this.outer_state == XBEL.BOOKMARK) {
final boolean isMozillaShortcutURL = atts.getValue(uri, "owner").equals("Mozilla") && !atts.getValue(uri, "ShortcutURL").isEmpty();
final boolean isYacyPublic = atts.getValue(uri, "owner").equals("YaCy") && !atts.getValue(uri, "public").isEmpty();
if(isMozillaShortcutURL)
this.bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(atts.getValue(uri, "ShortcutURL")));
if(isYacyPublic)
this.bmk.put(YMarkEntry.BOOKMARK.PUBLIC.key(), atts.getValue(uri, "public"));
}
} else if (XBEL.ALIAS.tag().equals(tag)) {
final String r = atts.getValue(uri, YMarkEntry.BOOKMARKS_REF);
UpdateBmkRef(r, false);
this.aliasRef.add(this.bmkRef.get(r));
}
else {
this.outer_state = XBEL.NOTHING;
this.inner_state = XBEL.NOTHING;
this.parse_value = false;
}
}
private void UpdateBmkRef(final String id, final boolean url) {
this.folderstring.setLength(0);
public void endElement(final String uri, final String name, String tag) {
if (tag == null) return;
tag = tag.toLowerCase();
if(XBEL.BOOKMARK.tag().equals(tag)) {
// write bookmark
if (!this.bmk.isEmpty()) {
this.bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), this.folder.toString());
try {
bookmarks.put(this.bmk);
bmk = new YMarkEntry();
} catch (InterruptedException e) {
Log.logException(e);
}
}
this.outer_state = XBEL.FOLDER;
} else if (XBEL.FOLDER.tag().equals(tag)) {
// go up one folder
//TODO: get rid of .toString.equals()
if(!this.folder.toString().equals(targetFolder)) {
folder.setLength(folder.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
this.outer_state = XBEL.FOLDER;
} else if (XBEL.INFO.tag().equals(tag)) {
this.inner_state = XBEL.NOTHING;
} else if (XBEL.METADATA.tag().equals(tag)) {
this.inner_state = XBEL.INFO;
}
}
if(this.bmkRef.containsKey(id)) {
this.folderstring.append(this.bmkRef.get(id).get(YMarkEntry.BOOKMARK.FOLDERS.key()));
this.folderstring.append(',');
this.ref = this.bmkRef.get(id);
} else {
this.ref = new YMarkEntry();
}
this.folderstring.append(this.folder);
if(url)
this.ref.put(YMarkEntry.BOOKMARK.URL.key(), this.bmk.get(YMarkEntry.BOOKMARK.URL.key()));
this.ref.put(YMarkEntry.BOOKMARK.FOLDERS.key(), this.folderstring.toString());
this.bmkRef.put(id, this.ref);
}
public void characters(final char ch[], final int start, final int length) {
// TODO move string processing to endElement as characters() could be called more than once per tag
if (parse_value) {
buffer.append(ch, start, length);
switch(outer_state) {
case BOOKMARK:
switch(inner_state) {
case DESC:
this.bmk.put(YMarkEntry.BOOKMARK.DESC.key(), buffer.toString().trim());
break;
case TITLE:
this.bmk.put(YMarkEntry.BOOKMARK.TITLE.key(), buffer.toString().trim());
break;
default:
break;
}
break;
case FOLDER:
switch(inner_state) {
case DESC:
break;
case TITLE:
this.folder.append(YMarkUtil.FOLDERS_SEPARATOR);
this.folder.append(this.buffer);
break;
default:
break;
}
break;
default:
break;
}
this.buffer.setLength(0);
this.parse_value = false;
}
}
private void UpdateBmkRef(final String id, final boolean url) {
this.folderstring.setLength(0);
if(this.bmkRef.containsKey(id)) {
this.folderstring.append(this.bmkRef.get(id).get(YMarkEntry.BOOKMARK.FOLDERS.key()));
this.folderstring.append(',');
this.ref = this.bmkRef.get(id);
} else {
this.ref = new YMarkEntry();
}
this.folderstring.append(this.folder);
if(url)
this.ref.put(YMarkEntry.BOOKMARK.URL.key(), this.bmk.get(YMarkEntry.BOOKMARK.URL.key()));
this.ref.put(YMarkEntry.BOOKMARK.FOLDERS.key(), this.folderstring.toString());
this.bmkRef.put(id, ref);
}
}
}

@ -0,0 +1,92 @@
/**
* AnnoteaA
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 16.12.2011 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.lod.vocabulary;
import java.util.Set;
import net.yacy.cora.lod.Literal;
import net.yacy.cora.lod.Vocabulary;
/**
* Annotea [Annotea] is a W3C Semantic Web Advanced Development project that
* provides a framework for rich communication about Web pages through shared RDF metadata.
*
* The Annotea Annotation schema [AnnotationNS] defines properties for identifying
* the document being annotated, a specific context within that document to which
* the body of the annotation refers, the author of the annotation, and more.
*
* http://www.w3.org/2003/07/Annotea/BookmarkSchema-20030707
*/
public enum AnnoteaA implements Vocabulary {
Annotation, // The target type of a annotation resource.
annotates, // Relates an Annotation to the resource to which the Annotation applies. The inverse relation is 'hasAnnotation'
author, // The name of the person or organization most responsible for creating the Annotation. Sub property of dc:creator
body, // Relates the resource representing the 'content' of an Annotation to the Annotation resourceSub property of related
context, // The context within the resource named in 'annotates' to which the Annotation most directly applies
created, // The date and time on which the Annotation was created. yyyy-mm-ddThh:mm:ssZ format recommended.Sub property of dc:date
modified, // The date and time on which the Annotation was modified. yyyy-mm-ddThh:mm:ssZ format recommended.Sub property of dc:date
related; // A relationship between an annotation and additional resources that is less specific than 'body'.
// The 'related' property is expected to be subclassed by more specific relationships
public final static String NAMESPACE = "http://www.w3.org/2000/10/annotation-ns#";
public final static String PREFIX = "a";
private final String predicate;
private AnnoteaA() {
this.predicate = NAMESPACE + this.name();
}
@Override
public String getNamespace() {
return NAMESPACE;
}
@Override
public String getNamespacePrefix() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;
}
@Override
public String getPredicate() {
return this.predicate;
}
@Override
public String getURIref() {
return PREFIX + ':' + this.name();
}
}

@ -0,0 +1,97 @@
/**
* AnnoteaB
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 16.12.2011 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.lod.vocabulary;
import java.util.Set;
import net.yacy.cora.lod.Literal;
import net.yacy.cora.lod.Vocabulary;
/**
* Annotea [Annotea] is a W3C Semantic Web Advanced Development project that
* provides a framework for rich communication about Web pages through shared RDF metadata.
*
* The Annotea Bookmark schema [BookmarkNS] provides the basic concepts found in common browser bookmark implementations.
* These basic concepts are also captured in the XML Bookmark Exchange Language [XBEL].
* The use of RDF in Annotea permits bookmarks to express additional semantics.
* XBEL can be easily mapped into this schema.
*
* http://www.w3.org/2003/07/Annotea/BookmarkSchema-20030707
*/
public enum AnnoteaB implements Vocabulary {
Bookmark, // The class to which all bookmarks belong
Shortcut, // Specifies a behavior; when the object of type 'Shortcut' is activated, the client follows the 'recalls' property
// and activates the object at the end of that 'recalls' property. The target object may be another Bookmark or may be a Topic.
Topic, //
bookmarks, // This corresponds to XBEL:href an object of type Bookmark is expected to have a 'recalls' relationship to the document being bookmarked.
// The 'bookmarks' property is an older name for the 'recalls' relationship.
hasTopic, // relates a bookmark to a topic. A bookmark must have at least one hasTopic property. The typical user operation of following a bookmark link
// will use the value of the b:recalls property. This property corresponds to XBEL:href property.An instance of
leadsTo, // connects a Shortcut to the bookmark or topic that is being included by reference in some other topic
recalls, // Relates a bookmark with the resource that has been bookmarked. This corresponds to XBEL:href;
// an object of type Bookmark is expected to have a 'recalls' relationship to the document being bookmarked
subTopicOf; // Describes a relationship between Topics. When a topic T is a sub-topic of a topic U then all bookmarks that have topic T are also considered to have topic U.
// A topic may be a sub-topic of one or more topics; trivially, every topic is a sub-topic of itself.
// More formally; for all B, T, and U: b b:hasTopic T, T b:subTopicOf U implies B b:hasTopic U.
public final static String NAMESPACE = "http://www.w3.org/2002/01/bookmark#";
public final static String PREFIX = "b";
private final String predicate;
private AnnoteaB() {
this.predicate = NAMESPACE + this.name();
}
@Override
public String getNamespace() {
return NAMESPACE;
}
@Override
public String getNamespacePrefix() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;
}
@Override
public String getPredicate() {
return this.predicate;
}
@Override
public String getURIref() {
return PREFIX + ':' + this.name();
}
}

@ -0,0 +1,76 @@
/**
* DublinCore
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 16.12.2011 at http://yacy.net
*
* $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $
* $LastChangedRevision: 7653 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.lod.vocabulary;
import java.util.Set;
import net.yacy.cora.lod.Literal;
import net.yacy.cora.lod.Vocabulary;
public enum DCElements implements Vocabulary {
creator,
date,
description,
subject,
title;
public final static String IDENTIFIER = "http://purl.org/dc/elements/1.1/";
public final static String PREFIX = "dc";
private final String predicate, uriref;
private DCElements() {
this.predicate = IDENTIFIER + this.name().toLowerCase();
this.uriref = PREFIX + ':' + this.name().toLowerCase();
}
@Override
public String getNamespace() {
return IDENTIFIER;
}
@Override
public String getNamespacePrefix() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;
}
@Override
public String getPredicate() {
return this.predicate;
}
@Override
public String getURIref() {
return this.uriref;
}
}

@ -0,0 +1,104 @@
/**
* DMOZ
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 16.12.2011 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.lod.vocabulary;
import java.util.Set;
import net.yacy.cora.lod.Literal;
import net.yacy.cora.lod.Vocabulary;
/**
* The Open Directory Project is the largest, most comprehensive human-edited directory of the Web.
* It is constructed and maintained by a vast, global community of volunteer editors.
*
* RDF dumps of the Open Directory database are available for download at http://www.dmoz.org/rdf.html *
* An overview of the vocabulary can be found at http://rdf.dmoz.org/rdf/tags.html
*/
public enum DMOZ implements Vocabulary {
// Content
ExternalPage,
atom,
link,
link1,
mediadate,
pdf,
pdf1,
priority,
rss,
rss1,
topic,
type,
// Structure
Alias,
Target,
Topic,
altlang,
altlang1,
catid,
editor,
lastUpdate,
letterbar,
narrow,
narrow1,
narrow2,
newsgroup,
related,
symbolic,
symbolic1,
symbolic2;
public final static String NAMESPACE = "http://dmoz.org/rdf/";
public final static String PREFIX = "dmoz";
private final String predicate;
private DMOZ() {
this.predicate = NAMESPACE + this.name().toLowerCase();
}
@Override
public String getNamespace() {
return NAMESPACE;
}
@Override
public String getNamespacePrefix() {
return PREFIX;
}
@Override
public Set<Literal> getLiterals() {
return null;
}
@Override
public String getPredicate() {
return this.predicate;
}
@Override
public String getURIref() {
return PREFIX + ':' + this.name();
}
}

@ -35,7 +35,8 @@ public enum Rdf implements Vocabulary {
Description,
Bag,
Seq,
Alt;
Alt,
type;
public final static String IDENTIFIER = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
public final static String PREFIX = "rdf";

@ -1,6 +1,5 @@
package net.yacy.interaction.contentcontrol;
import java.io.IOException;
import java.util.Iterator;
import net.yacy.kelondro.blob.Tables;
@ -67,29 +66,23 @@ public class ContentControlFilterUpdateThread {
Switchboard sb = Switchboard.getSwitchboard();
Iterator<Tables.Row> it;
try {
it = sb.tables.bookmarks.getBookmarksByTag(
sb.getConfig(
"contentcontrol.bookmarklist",
"contentcontrol"),
"^((?!sc:"
+ sb
.getConfig(
"contentcontrol.mandatoryfilterlist",
"") + ").*)$");
while (it.hasNext()) {
Row b = it.next();
if (!b.get("filter", "").equals("")) {
newfilter.add(b.get("filter", ""), null);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
it = sb.tables.bookmarks.getBookmarksByTag(
sb.getConfig(
"contentcontrol.bookmarklist",
"contentcontrol"),
"^((?!sc:"
+ sb
.getConfig(
"contentcontrol.mandatoryfilterlist",
"") + ").*)$");
while (it.hasNext()) {
Row b = it.next();
if (!b.get("filter", "").equals("")) {
newfilter.add(b.get("filter", ""), null);
}
}
return newfilter;
}

@ -34,10 +34,12 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
@ -50,16 +52,25 @@ import net.yacy.kelondro.util.ByteArray;
import net.yacy.kelondro.util.ByteBuffer;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.LookAheadIterator;
import de.anomic.data.ymark.YMarkUtil;
public class Tables implements Iterable<String> {
private static final String suffix = ".bheap";
public final static String p1 = "(?:^|.*,)";
public final static String p2 = "((?:";
public final static String p3 = ")(?:,.*|$)){";
public final static String CIDX = "_cidx";
public final static int NOINDEX = 50000;
public final static int RAMINDEX = 100000;
private static final String suffix = ".bheap";
private static final String system_table_pkcounter = "pkcounter";
private static final String system_table_pkcounter_counterName = "pk";
private final File location;
private final ConcurrentHashMap<String, BEncodedHeap> tables;
private final ConcurrentHashMap<String, TablesColumnIndex> cidx;
int keymaxlen;
// use our own formatter to prevent concurrency locks with other processes
@ -82,6 +93,143 @@ public class Tables implements Iterable<String> {
}
}
}
this.cidx = new ConcurrentHashMap<String, TablesColumnIndex>();
}
public TablesColumnIndex getIndex(final String tableName, TablesColumnIndex.INDEXTYPE indexType) throws Exception {
final TablesColumnIndex index;
switch(indexType) {
case RAM:
index = new TablesColumnRAMIndex();
break;
case BLOB:
final String idx_table = tableName+CIDX;
BEncodedHeap bheap;
bheap = this.getHeap(idx_table);
index = new TablesColumnBLOBIndex(bheap);
break;
default:
throw new Exception("Unsupported TableColumnIndex: "+indexType.name());
}
return index;
}
public TablesColumnIndex getIndex(final String tableName) throws Exception {
// return an existing index
if(this.cidx.containsKey(tableName)) {
return this.cidx.get(tableName);
}
// create a new index
int size;
try {
size = this.size(tableName);
} catch (IOException e) {
size = 0;
}
final TablesColumnIndex index;
if(size < NOINDEX) {
throw new Exception("TableColumnIndex not available for tables with less than "+NOINDEX+" rows: "+tableName);
}
if(size < RAMINDEX) {
index = new TablesColumnRAMIndex();
} else {
final String idx_table = tableName+CIDX;
BEncodedHeap bheap;
try {
bheap = this.getHeap(idx_table);
} catch (IOException e) {
bheap = null;
Log.logException(e);
}
if(bheap != null) {
index = new TablesColumnBLOBIndex(bheap);
} else {
index = new TablesColumnRAMIndex();
}
}
this.cidx.put(tableName, index);
return index;
}
public boolean hasIndex (final String tableName) {
return this.cidx.contains(tableName);
}
public boolean hasIndex (final String tableName, final String columnName) {
if(this.cidx.containsKey(tableName)) {
return this.cidx.get(tableName).hasIndex(columnName);
}
try {
if(this.has(tableName+CIDX, YMarkUtil.getKeyId(columnName))) {
return true;
}
} catch (IOException e) {
Log.logException(e);
}
return false;
}
public Iterator<Row> getByIndex(final String table, final String whereColumn, final String separator, final String whereValue) {
final HashSet<Tables.Row> rows = new HashSet<Tables.Row>();
final TreeSet<byte[]> set1 = new TreeSet<byte[]>(TablesColumnIndex.NATURALORDER);
final TreeSet<byte[]> set2 = new TreeSet<byte[]>(TablesColumnIndex.NATURALORDER);
final String[] values = whereValue.split(separator);
if(this.hasIndex(table, whereColumn)) {
try {
final TablesColumnIndex index = this.getIndex(table);
for(int i=0; i<values.length; i++) {
if(index.containsKey(whereColumn, values[i])) {
final Iterator<byte[]> biter = index.get(whereColumn, values[i]).iterator();
while(biter.hasNext()) {
set1.add(biter.next());
}
if(i==0) {
set2.addAll(set1);
} else {
set2.retainAll(set1);
}
set1.clear();
}
}
for(byte[] pk : set2) {
rows.add(this.select(table, pk));
}
} catch (Exception e) {
Log.logException(e);
return new HashSet<Row>().iterator();
}
} else if (!separator.isEmpty()) {
final StringBuilder patternBuilder = new StringBuilder(256);
patternBuilder.append(p1);
patternBuilder.append(p2);
for (final String value : values) {
patternBuilder.append(Pattern.quote(value));
patternBuilder.append('|');
}
patternBuilder.deleteCharAt(patternBuilder.length()-1);
patternBuilder.append(p3);
patternBuilder.append(values.length);
patternBuilder.append('}');
final Pattern p = Pattern.compile(patternBuilder.toString(), Pattern.CASE_INSENSITIVE);
try {
return this.iterator(table, whereColumn, p);
} catch (IOException e) {
Log.logException(e);
return new HashSet<Row>().iterator();
}
} else {
try {
return this.iterator(table, whereColumn, UTF8.getBytes(whereValue));
} catch (IOException e) {
Log.logException(e);
return new HashSet<Row>().iterator();
}
}
return rows.iterator();
}
@Override

@ -0,0 +1,205 @@
// TablesColumnBLOBIndex.java
// (C) 2012 by Stefan Foerster, sof@gmx.de, Norderstedt, Germany
// first published 2012 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.blob;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.ByteBuffer;
import de.anomic.data.ymark.YMarkUtil;
public class TablesColumnBLOBIndex extends TablesColumnIndex{
// Map<ColumnName, Map<ColumnValue, T<PrimaryKey>>>
private final BEncodedHeap index;
private final static byte SEPERATOR = (byte) ',';
public TablesColumnBLOBIndex(final BEncodedHeap bheap) {
super(TablesColumnIndex.INDEXTYPE.BLOB);
this.index = bheap;
}
public static Collection<byte[]> byteToCollection(final byte[] b) {
final Collection<byte[]> PKset = ByteBuffer.split(b, SEPERATOR);
return PKset;
}
public static byte[] CollectionToByte(final Collection<byte[]> bc) {
final ByteBuffer buf = new ByteBuffer(15 * bc.size());
final Iterator<byte[]> iter = bc.iterator();
while(iter.hasNext()) {
buf.append(iter.next());
buf.append(SEPERATOR);
}
return buf.getBytes();
}
public void deleteIndex(final String columnName) {
final byte[] column = YMarkUtil.getKeyId(columnName);
try {
this.index.remove(column);
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
}
}
protected void insertPK(final String columnName, final String columnValue, final byte[] pk) {
Map<String, byte[]> valueIdxMap;
Collection<byte[]> PKset;
final byte[] column = YMarkUtil.getKeyId(columnName);
try {
valueIdxMap = this.index.get(column);
if(valueIdxMap != null) {
if(valueIdxMap.containsKey(columnValue)) {
PKset = byteToCollection(valueIdxMap.get(columnValue));
if(!ByteBuffer.contains(PKset, pk)) {
PKset.add(pk);
}
} else {
PKset = new ArrayList<byte[]>(1);
PKset.add(pk);
valueIdxMap.put(columnValue, CollectionToByte(PKset));
}
} else {
PKset = new ArrayList<byte[]>(1);
PKset.add(pk);
valueIdxMap = new ConcurrentHashMap<String, byte[]>();
}
valueIdxMap.put(columnValue, CollectionToByte(PKset));
this.index.insert(column, valueIdxMap);
return;
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
}
}
protected void removePK(final byte[] pk) {
final Iterator<Map.Entry<byte[], Map<String, byte[]>>> niter = this.index.iterator();
while (niter.hasNext()) {
final Map.Entry<byte[], Map<String,byte[]>> entry = niter.next();
final Iterator<Map.Entry<String, byte[]>> viter = entry.getValue().entrySet().iterator();
while(viter.hasNext()) {
final Map.Entry<String, byte[]> columnValue = viter.next();
final Collection<byte[]> PKset = byteToCollection(columnValue.getValue());
ByteBuffer.remove(PKset, pk);
if(PKset.isEmpty()) {
viter.remove();
} else {
columnValue.setValue(CollectionToByte(PKset));
}
}
try {
this.index.insert(entry.getKey(), entry.getValue());
} catch (SpaceExceededException e) {
Log.logException(e);
} catch (IOException e) {
Log.logException(e);
}
}
}
public void clear() {
this.index.clear();
}
public Collection<String> columns() {
return this.index.columns();
}
public Set<String> keySet(final String columnName) {
final byte[] column = YMarkUtil.getKeyId(columnName);
// a TreeSet is used to get sorted set of keys (e.g. folders)
if(this.index.containsKey(column)) {
try {
return new TreeSet<String>(this.index.get(column).keySet());
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
}
}
return new TreeSet<String>();
}
public boolean containsKey(final String columnName, final String key) {
final byte[] column = YMarkUtil.getKeyId(columnName);
if(this.index.containsKey(column)) {
try {
return this.index.get(column).containsKey(key);
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
}
}
return false;
}
public boolean hasIndex(final String columnName) {
final byte[] column = YMarkUtil.getKeyId(columnName);
return this.index.containsKey(column);
}
public Collection<byte[]> get(final String columnName, final String key) {
final byte[] column = YMarkUtil.getKeyId(columnName);
// deserialize
try {
return byteToCollection(this.index.get(column).get(key));
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
}
return new ArrayList<byte[]>();
}
public int size(final String columnName) {
final byte[] column = YMarkUtil.getKeyId(columnName);
if(this.index.containsKey(column)) {
try {
return this.index.get(column).size();
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
}
}
return -1;
}
public int size() {
return this.index.size();
}
}

@ -0,0 +1,176 @@
// TablesColumnIndex.java
// (C) 2012 by Stefan Foerster, sof@gmx.de, Norderstedt, Germany
// first published 2012 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.blob;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.order.NaturalOrder;
/**
* a mapping from a column name to maps with the value of the columns to the primary keys where the entry exist in the table
*/
public abstract class TablesColumnIndex {
public static enum INDEXTYPE {RAM, BLOB}
private INDEXTYPE type;
// Map<ColumnName, Map<ColumnValue, T<PrimaryKey>>>
// private final Map<String, Map<String, TreeSet<byte[]>>> index;
protected final static Comparator<byte[]> NATURALORDER = new NaturalOrder(true);
protected abstract void insertPK(final String columnName, final String columnValue, final byte[] pk);
protected abstract void removePK(final byte[] pk);
protected abstract void clear();
public abstract Set<String> keySet(final String columnName);
public abstract boolean containsKey(final String columnName, final String key);
public abstract boolean hasIndex(final String columnName);
public abstract Collection<byte[]> get(final String columnName, final String key);
public abstract int size(final String columnName);
public abstract int size();
public abstract Collection<String> columns();
public abstract void deleteIndex(final String columnName);
public TablesColumnIndex(INDEXTYPE type) {
this.type = type;
}
public INDEXTYPE getType() {
return this.type;
}
/**
* create an index for a given table and column
* @param columnName - name of the column you want to build an index for
* @param valueIsArray - indicates whether the column value consist of an array (e.g. comma separated tags)
* @param separator - a string value used to split column values into an array
* @param table - an iterator over table rows which should be added to the index
*/
public synchronized void buildIndex(final String columnName, final String separator, final Iterator<Tables.Row> table) {
this.deleteIndex(columnName);
// loop through all rows of the table
while (table.hasNext()) {
this.add(columnName, separator, table.next());
}
}
/**
* create an index for a given table and column
* @param columnName - name of the column you want to build an index for
* @param table - an iterator over table rows which should be added to the index
*/
public synchronized void buildIndex(final String columnName, final Iterator<Tables.Row> table) {
this.buildIndex(columnName, "", table);
}
/**
* create an index for a given table and given columns
* @param columns - a map of column names and booleans for 'valueIsArray' you want to build an index for
* @param separator - a string value used to split column values into an array
* @param table - an iterator over table rows which should be added to the index
*/
public synchronized void buildIndex(final Map<String,String> columns, final Iterator<Tables.Row> table) {
this.clear();
// loop through all rows of the table
while (table.hasNext()) {
this.add(columns, table.next());
}
}
private void insertPK(final String columnName, final String[] columnValues, final byte[] pk) {
for (String columnValue : columnValues) {
this.insertPK(columnName, columnValue, pk);
}
}
public void delete(final byte[] pk) {
this.removePK(pk);
}
public void delete(final Tables.Row row) {
this.removePK(row.getPK());
}
public void update(final String columnName, final String separator, final Tables.Row row) {
this.removePK(row.getPK());
this.add(columnName, separator, row);
}
public void update(final Map<String,String> columns, final Tables.Row row) {
this.removePK(row.getPK());
this.add(columns, row);
}
public void add(final String columnName, final String separator, final Map<String,String> map, final byte[] pk) {
if(separator.isEmpty())
this.insertPK(columnName, map.get(columnName), pk);
else
this.insertPK(columnName, map.get(columnName).split(separator), pk);
}
public void add(final String columnName, final String separator, final Tables.Data row, final byte[] pk) {
if(separator.isEmpty())
this.insertPK(columnName, UTF8.String(row.get(columnName)), pk);
else
this.insertPK(columnName, UTF8.String(row.get(columnName)).split(separator), pk);
}
public void add(final String columnName, final String separator, final Tables.Row row) {
if(separator.isEmpty())
this.insertPK(columnName, UTF8.String(row.get(columnName)), row.getPK());
else
this.insertPK(columnName, UTF8.String(row.get(columnName)).split(separator), row.getPK());
}
public void add(final Map<String,String> columns, final Map<String,String> map, final byte[] pk) {
final Iterator<String> iter = columns.keySet().iterator();
while (iter.hasNext()) {
final String columnName = iter.next();
if(columns.get(columnName).isEmpty())
this.insertPK(columnName, map.get(columnName), pk);
else
this.insertPK(columnName, map.get(columnName).split(columns.get(columnName)), pk);
}
}
public void add(final Map<String,String> columns, final Tables.Data row, final byte[] pk) {
final Iterator<String> iter = columns.keySet().iterator();
while (iter.hasNext()) {
final String columnName = iter.next();
if(columns.get(columnName).isEmpty())
this.insertPK(columnName, UTF8.String(row.get(columnName)), pk);
else
this.insertPK(columnName, UTF8.String(row.get(columnName)).split(columns.get(columnName)), pk);
}
}
public void add(final Map<String,String> columns, final Tables.Row row) {
this.add(columns, row, row.getPK());
}
}

@ -0,0 +1,124 @@
// TablesColumnRAMIndex.java
// (C) 2012 by Stefan Foerster, sof@gmx.de, Norderstedt, Germany
// first published 2012 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.blob;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.kelondro.order.NaturalOrder;
public class TablesColumnRAMIndex extends TablesColumnIndex{
// Map<ColumnName, Map<ColumnValue, T<PrimaryKey>>>
private final Map<String, Map<String, TreeSet<byte[]>>> index;
private final static Comparator<byte[]> NATURALORDER = new NaturalOrder(true);
public TablesColumnRAMIndex() {
super(TablesColumnIndex.INDEXTYPE.RAM);
this.index = new ConcurrentHashMap<String, Map<String, TreeSet<byte[]>>>();
}
public void deleteIndex(final String columnName) {
this.index.remove(columnName);
}
protected void insertPK(final String columnName, final String columnValue, final byte[] pk) {
Map<String, TreeSet<byte[]>> valueIdxMap;
TreeSet<byte[]> PKset;
if(this.index.containsKey(columnName)) {
valueIdxMap = this.index.get(columnName);
}
else {
valueIdxMap = new ConcurrentHashMap<String, TreeSet<byte[]>>();
this.index.put(columnName, valueIdxMap);
}
if(valueIdxMap.containsKey(columnValue)) {
PKset = valueIdxMap.get(columnValue);
}
else {
PKset = new TreeSet<byte[]>(NATURALORDER);
valueIdxMap.put(columnValue, PKset);
}
PKset.add(pk);
}
protected synchronized void removePK(final byte[] pk) {
for(Map.Entry<String, Map<String, TreeSet<byte[]>>> columnName : this.index.entrySet()) {
final Iterator<Map.Entry<String, TreeSet<byte[]>>> viter = columnName.getValue().entrySet().iterator();
while(viter.hasNext()) {
final Map.Entry<String, TreeSet<byte[]>> columnValue = viter.next();
columnValue.getValue().remove(pk);
if(columnValue.getValue().isEmpty())
viter.remove();
}
}
}
public void clear() {
this.index.clear();
}
public Collection<String> columns() {
return this.index.keySet();
}
public Set<String> keySet(final String columnName) {
// a TreeSet is used to get sorted set of keys (e.g. folders)
if(this.index.containsKey(columnName)) {
return new TreeSet<String>(this.index.get(columnName).keySet());
}
return new TreeSet<String>();
}
public boolean containsKey(final String columnName, final String columnValue) {
if(this.index.containsKey(columnName)) {
return this.index.get(columnName).containsKey(columnValue);
}
return false;
}
public boolean hasIndex(final String columnName) {
return this.index.containsKey(columnName);
}
public Collection<byte[]> get(final String columnName, final String key) {
return this.index.get(columnName).get(key);
}
public int size(final String columnName) {
if(this.index.containsKey(columnName)) {
return this.index.get(columnName).size();
}
return -1;
}
public int size() {
return this.index.size();
}
}
Loading…
Cancel
Save