diff --git a/htroot/api/ymarks/import_ymark.java b/htroot/api/ymarks/import_ymark.java
index 9097c27d7..40a647483 100644
--- a/htroot/api/ymarks/import_ymark.java
+++ b/htroot/api/ymarks/import_ymark.java
@@ -3,21 +3,27 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
+import java.util.Date;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.regex.Pattern;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.Parser.Failure;
import net.yacy.document.content.SurrogateReader;
import net.yacy.kelondro.blob.Tables;
+import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import org.xml.sax.SAXException;
+import de.anomic.crawler.CrawlProfile;
+import de.anomic.crawler.CrawlSwitchboard;
+import de.anomic.crawler.retrieval.Request;
import de.anomic.data.BookmarksDB;
import de.anomic.data.UserDB;
import de.anomic.data.WorkTables;
@@ -54,6 +60,8 @@ public class import_ymark {
boolean autotag = false;
boolean merge = false;
boolean empty = false;
+ final String indexing = post.get("indexing", "off");
+ final boolean medialink = post.getBoolean("medialink", false);
if(post.containsKey("autotag") && !post.get("autotag", "off").equals("off")) {
autotag = true;
@@ -67,7 +75,7 @@ public class import_ymark {
t.start();
}
- if(isAdmin && post.containsKey("table") && post.get("table").length() > 0) {
+ if(isAdmin && post.containsKey("table") && post.get("table").length() > 0) {
bmk_user = post.get("table").substring(0, post.get("table").indexOf('_',0));
}
if(post.containsKey("redirect") && post.get("redirect").length() > 0) {
@@ -92,7 +100,7 @@ public class import_ymark {
t = new Thread(surrogateReader, "YMarks - Surrogate Reader");
t.start();
while ((bmk = new YMarkEntry(surrogateReader.take())) != YMarkEntry.POISON) {
- putBookmark(sb.tables.bookmarks, bmk_user, bmk, autoTaggingQueue, autotag, empty);
+ putBookmark(sb, bmk_user, bmk, autoTaggingQueue, autotag, empty, indexing, medialink);
}
prop.put("status", "1");
} else {
@@ -110,7 +118,7 @@ public class import_ymark {
t = new Thread(htmlImporter, "YMarks - HTML Importer");
t.start();
while ((bmk = htmlImporter.take()) != YMarkEntry.POISON) {
- putBookmark(sb.tables.bookmarks, bmk_user, bmk, autoTaggingQueue, autotag, empty);
+ putBookmark(sb, bmk_user, bmk, autoTaggingQueue, autotag, empty, indexing, medialink);
}
prop.put("status", "1");
} else if(post.get("importer").equals("xbel") && reader != null) {
@@ -127,7 +135,7 @@ public class import_ymark {
t = new Thread(xbelImporter, "YMarks - XBEL Importer");
t.start();
while ((bmk = xbelImporter.take()) != YMarkEntry.POISON) {
- putBookmark(sb.tables.bookmarks, bmk_user, bmk, autoTaggingQueue, autotag, empty);
+ putBookmark(sb, bmk_user, bmk, autoTaggingQueue, autotag, empty, indexing, medialink);
}
prop.put("status", "1");
} else if(post.get("importer").equals("json") && reader != null) {
@@ -136,7 +144,7 @@ public class import_ymark {
t = new Thread(jsonImporter, "YMarks - JSON Importer");
t.start();
while ((bmk = jsonImporter.take()) != YMarkEntry.POISON) {
- putBookmark(sb.tables.bookmarks, bmk_user, bmk, autoTaggingQueue, autotag, empty);
+ putBookmark(sb, bmk_user, bmk, autoTaggingQueue, autotag, empty, indexing, medialink);
}
prop.put("status", "1");
}
@@ -219,13 +227,13 @@ public class import_ymark {
return prop;
}
- public static void putBookmark(final YMarkTables ymarks, final String bmk_user, final YMarkEntry bmk,
- final ArrayBlockingQueue
autoTaggingQueue, final boolean autotag, final boolean empty) {
+ public static void putBookmark(final Switchboard sb, final String bmk_user, final YMarkEntry bmk,
+ final ArrayBlockingQueue autoTaggingQueue, final boolean autotag, final boolean empty, final String indexing, final boolean medialink) {
try {
final String url = bmk.get(YMarkEntry.BOOKMARK.URL.key());
// other protocols could cause problems
if(url != null && url.startsWith("http")) {
- ymarks.addBookmark(bmk_user, bmk, true, true);
+ sb.tables.bookmarks.addBookmark(bmk_user, bmk, true, true);
if(autotag) {
if(!empty) {
autoTaggingQueue.put(url);
@@ -233,6 +241,16 @@ public class import_ymark {
autoTaggingQueue.put(url);
}
}
+
+ // fill crawler
+ if (indexing.equals("single")) {
+ crawlStart(sb, new DigestURI(url), CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, 0, true, medialink);
+ } else if (indexing.equals("onelink")) {
+ crawlStart(sb, new DigestURI(url), CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, 1, true, medialink);
+ } else if (indexing.equals("fulldomain")) {
+ final DigestURI u = new DigestURI(url);
+ crawlStart(sb, u, CrawlProfile.mustMatchFilterFullDomain(u), CrawlProfile.MATCH_NEVER_STRING, 99, false, medialink);
+ }
}
} catch (final IOException e) {
Log.logException(e);
@@ -242,6 +260,35 @@ public class import_ymark {
Log.logException(e);
}
}
+
+ public static String crawlStart(
+ final Switchboard sb,
+ final DigestURI startURL,
+ final String urlMustMatch,
+ final String urlMustNotMatch,
+ final int depth,
+ final boolean crawlingQ, final boolean medialink) {
+ final CrawlProfile pe = new CrawlProfile(
+ (startURL.getHost() == null) ? startURL.toNormalform(true, false) : startURL.getHost(), null,
+ urlMustMatch,
+ urlMustNotMatch,
+ CrawlProfile.MATCH_ALL_STRING,
+ CrawlProfile.MATCH_NEVER_STRING,
+ "", depth, medialink,
+ CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, crawlingQ,
+ true, true, true, false, true, true, true,
+ CacheStrategy.IFFRESH);
+ sb.crawler.putActive(pe.handle().getBytes(), pe);
+ return sb.crawlStacker.stackCrawl(new Request(
+ sb.peers.mySeed().hash.getBytes(),
+ startURL,
+ null,
+ "CRAWLING-ROOT",
+ new Date(),
+ pe.handle(), 0, 0, 0, 0
+ ));
+ }
+
}
diff --git a/htroot/env/yacy-ymarks.css b/htroot/env/yacy-ymarks.css
index f0cc34c8e..50ab601bd 100644
--- a/htroot/env/yacy-ymarks.css
+++ b/htroot/env/yacy-ymarks.css
@@ -108,6 +108,10 @@ img.help {
margin: 5px 5px 5px 5px;
}
+#bm_tags_tagsinput {
+ margin: 0px 0px 2px 5px;
+}
+
/* YaCy Flexigrid ---------------------------*/
.flexigrid div.fbutton .burst {
diff --git a/htroot/jquery/css/jquery.tagsinput.css b/htroot/jquery/css/jquery.tagsinput.css
new file mode 100644
index 000000000..c595e249f
--- /dev/null
+++ b/htroot/jquery/css/jquery.tagsinput.css
@@ -0,0 +1,7 @@
+div.tagsinput { border:1px solid #CCC; background: #FFF; padding:5px; width:300px; height:100px; overflow-y: auto;}
+div.tagsinput span.tag { border: 1px solid #a5d24a; -moz-border-radius:2px; -webkit-border-radius:2px; display: block; float: left; padding: 5px; text-decoration:none; background: #cde69c; color: #638421; margin-right: 5px; margin-bottom:5px;font-family: helvetica; font-size:13px;}
+div.tagsinput span.tag a { font-weight: bold; color: #82ad2b; text-decoration:none; font-size: 11px; }
+div.tagsinput input { width:80px; margin:0px; font-family: helvetica; font-size: 13px; border:1px solid transparent; padding:5px; background: transparent; color: #000; outline:0px; margin-right:5px; margin-bottom:5px; }
+div.tagsinput div { display:block; float: left; }
+.tags_clear { clear: both; width: 100%; height: 0px; }
+.not_valid {background: #FBD8DB !important; color: #90111A !important;}
diff --git a/htroot/jquery/js/jquery.tagsinput.min.js b/htroot/jquery/js/jquery.tagsinput.min.js
new file mode 100644
index 000000000..edc0d5f93
--- /dev/null
+++ b/htroot/jquery/js/jquery.tagsinput.min.js
@@ -0,0 +1 @@
+(function(a){var b=new Array;var c=new Array;a.fn.doAutosize=function(b){var c=a(this).data("minwidth"),d=a(this).data("maxwidth"),e="",f=a(this),g=a("#"+a(this).data("tester_id"));if(e===(e=f.val())){return}var h=e.replace(/&/g,"&").replace(/\s/g," ").replace(//g,">");g.html(h);var i=g.width(),j=i+b.comfortZone>=c?i+b.comfortZone:c,k=f.width(),l=j=c||j>c&&j").css({position:"absolute",top:-9999,left:-9999,width:"auto",fontSize:f.css("fontSize"),fontFamily:f.css("fontFamily"),fontWeight:f.css("fontWeight"),letterSpacing:f.css("letterSpacing"),whiteSpace:"nowrap"}),h=a(this).attr("id")+"_autosize_tester";if(!a("#"+h).length>0){g.attr("id",h);g.appendTo("body")}f.data("minwidth",c);f.data("maxwidth",d);f.data("tester_id",h);f.css("width",c)};a.fn.addTag=function(d,e){e=jQuery.extend({focus:false,callback:true},e);this.each(function(){var f=a(this).attr("id");var g=a(this).val().split(b[f]);if(g[0]==""){g=new Array}d=jQuery.trim(d);if(e.unique){var h=a(g).tagExist(d);if(h==true){a("#"+f+"_tag").addClass("not_valid")}}else{var h=false}if(d!=""&&h!=true){a("").addClass("tag").append(a("").text(d).append(" "),a("",{href:"#",title:"Removing tag",text:"x"}).click(function(){return a("#"+f).removeTag(escape(d))})).insertBefore("#"+f+"_addTag");g.push(d);a("#"+f+"_tag").val("");if(e.focus){a("#"+f+"_tag").focus()}else{a("#"+f+"_tag").blur()}a.fn.tagsInput.updateTagsField(this,g);if(e.callback&&c[f]&&c[f]["onAddTag"]){var i=c[f]["onAddTag"];i.call(this,d)}if(c[f]&&c[f]["onChange"]){var j=g.length;var i=c[f]["onChange"];i.call(this,a(this),g[j-1])}}});return false};a.fn.removeTag=function(d){d=unescape(d);this.each(function(){var e=a(this).attr("id");var f=a(this).val().split(b[e]);a("#"+e+"_tagsinput .tag").remove();str="";for(i=0;i=0};a.fn.importTags=function(b){id=a(this).attr("id");a("#"+id+"_tagsinput .tag").remove();a.fn.tagsInput.importTags(this,b)};a.fn.tagsInput=function(d){var e=jQuery.extend({interactive:true,defaultText:"add a tag",minChars:0,width:"300px",height:"100px",autocomplete:{selectFirst:false},hide:true,delimiter:",",unique:true,removeWithBackspace:true,placeholderColor:"#666666",autosize:true,comfortZone:20,inputPadding:6*2},d);this.each(function(){if(e.hide){a(this).hide()}var d=a(this).attr("id");var f=jQuery.extend({pid:d,real_input:"#"+d,holder:"#"+d+"_tagsinput",input_wrapper:"#"+d+"_addTag",fake_input:"#"+d+"_tag"},e);b[d]=f.delimiter;if(e.onAddTag||e.onRemoveTag||e.onChange){c[d]=new Array;c[d]["onAddTag"]=e.onAddTag;c[d]["onRemoveTag"]=e.onRemoveTag;c[d]["onChange"]=e.onChange}var g='';a(g).insertAfter(this);a(f.holder).css("width",e.width);a(f.holder).css("height",e.height);if(a(f.real_input).val()!=""){a.fn.tagsInput.importTags(a(f.real_input),a(f.real_input).val())}if(e.interactive){a(f.fake_input).val(a(f.fake_input).attr("data-default"));a(f.fake_input).css("color",e.placeholderColor);a(f.fake_input).resetAutosize(e);a(f.holder).bind("click",f,function(b){a(b.data.fake_input).focus()});a(f.fake_input).bind("focus",f,function(b){if(a(b.data.fake_input).val()==a(b.data.fake_input).attr("data-default")){a(b.data.fake_input).val("")}a(b.data.fake_input).css("color","#000000")});if(e.autocomplete_url!=undefined){autocomplete_options={source:e.autocomplete_url};for(attrname in e.autocomplete){autocomplete_options[attrname]=e.autocomplete[attrname]}if(jQuery.Autocompleter!==undefined){a(f.fake_input).autocomplete(e.autocomplete_url,e.autocomplete);a(f.fake_input).bind("result",f,function(b,c,f){if(c){a("#"+d).addTag(c[0]+"",{focus:true,unique:e.unique})}})}else if(jQuery.ui.autocomplete!==undefined){a(f.fake_input).autocomplete(autocomplete_options);a(f.fake_input).bind("autocompleteselect",f,function(b,c){a(b.data.real_input).addTag(c.item.value,{focus:true,unique:e.unique});return false})}}else{a(f.fake_input).bind("blur",f,function(b){var c=a(this).attr("data-default");if(a(b.data.fake_input).val()!=""&&a(b.data.fake_input).val()!=c){if(b.data.minChars<=a(b.data.fake_input).val().length&&(!b.data.maxChars||b.data.maxChars>=a(b.data.fake_input).val().length))a(b.data.real_input).addTag(a(b.data.fake_input).val(),{focus:true,unique:e.unique})}else{a(b.data.fake_input).val(a(b.data.fake_input).attr("data-default"));a(b.data.fake_input).css("color",e.placeholderColor)}return false})}a(f.fake_input).bind("keypress",f,function(b){if(b.which==b.data.delimiter.charCodeAt(0)||b.which==13){b.preventDefault();if(b.data.minChars<=a(b.data.fake_input).val().length&&(!b.data.maxChars||b.data.maxChars>=a(b.data.fake_input).val().length))a(b.data.real_input).addTag(a(b.data.fake_input).val(),{focus:true,unique:e.unique});a(b.data.fake_input).resetAutosize(e);return false}else if(b.data.autosize){a(b.data.fake_input).doAutosize(e)}});f.removeWithBackspace&&a(f.fake_input).bind("keydown",function(b){if(b.keyCode==8&&a(this).val()==""){b.preventDefault();var c=a(this).closest(".tagsinput").find(".tag:last").text();var d=a(this).attr("id").replace(/_tag$/,"");c=c.replace(/[\s]+x$/,"");a("#"+d).removeTag(escape(c));a(this).trigger("focus")}});a(f.fake_input).blur();if(f.unique){a(f.fake_input).keydown(function(b){if(b.keyCode==8||String.fromCharCode(b.which).match(/\w+|[áéíóúÁÉÍÓÚñÑ,/]+/)){a(this).removeClass("not_valid")}})}}return false});return this};a.fn.tagsInput.updateTagsField=function(c,d){var e=a(c).attr("id");a(c).val(d.join(b[e]))};a.fn.tagsInput.importTags=function(d,e){a(d).val("");var f=a(d).attr("id");var g=e.split(b[f]);for(i=0;i implements M
public static long getRecrawlDate(final long oldTimeMinutes) {
return System.currentTimeMillis() - (60000L * oldTimeMinutes);
}
+
+ public static String mustMatchFilterFullDomain(final MultiProtocolURI crawlingStartURL) {
+ if (crawlingStartURL.isFile()) {
+ return "file://" + crawlingStartURL.getPath() + ".*";
+ } else if (crawlingStartURL.isSMB()) {
+ return "smb://" + crawlingStartURL.getHost() + ".*";
+ } else if (crawlingStartURL.isFTP()) {
+ return "ftp://" + crawlingStartURL.getHost() + ".*";
+ } else {
+ final String host = crawlingStartURL.getHost();
+ if (host.startsWith("www.")) {
+ return "https?://" + crawlingStartURL.getHost() + ".*";
+ } else {
+ // if the www is not given we accept that also
+ return "https?://(www.)?" + crawlingStartURL.getHost() + ".*";
+ }
+ }
+ }
}