|
|
@ -115,10 +115,10 @@ public class plasmaCrawlProfile {
|
|
|
|
public class profileIterator implements Iterator {
|
|
|
|
public class profileIterator implements Iterator {
|
|
|
|
// the iterator iterates all keys, which are byte[] objects
|
|
|
|
// the iterator iterates all keys, which are byte[] objects
|
|
|
|
kelondroDyn.dynKeyIterator handleIterator;
|
|
|
|
kelondroDyn.dynKeyIterator handleIterator;
|
|
|
|
entry next;
|
|
|
|
String lastkey;
|
|
|
|
public profileIterator(boolean up) throws IOException {
|
|
|
|
public profileIterator(boolean up) throws IOException {
|
|
|
|
handleIterator = profileTable.keys(up, false);
|
|
|
|
handleIterator = profileTable.keys(up, false);
|
|
|
|
next = null;
|
|
|
|
lastkey = null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public boolean hasNext() {
|
|
|
|
public boolean hasNext() {
|
|
|
|
try {
|
|
|
|
try {
|
|
|
@ -130,16 +130,16 @@ public class plasmaCrawlProfile {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public Object next() {
|
|
|
|
public Object next() {
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
return getEntry((String) handleIterator.next());
|
|
|
|
lastkey = (String) handleIterator.next();
|
|
|
|
|
|
|
|
return getEntry(lastkey);
|
|
|
|
} catch (kelondroException e) {
|
|
|
|
} catch (kelondroException e) {
|
|
|
|
resetDatabase();
|
|
|
|
resetDatabase();
|
|
|
|
return null;
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public void remove() {
|
|
|
|
public void remove() {
|
|
|
|
if (next != null) try {
|
|
|
|
if (lastkey != null) try {
|
|
|
|
Object handle = next.handle();
|
|
|
|
removeEntry(lastkey);
|
|
|
|
if (handle != null) removeEntry((String) handle);
|
|
|
|
|
|
|
|
} catch (kelondroException e) {
|
|
|
|
} catch (kelondroException e) {
|
|
|
|
resetDatabase();
|
|
|
|
resetDatabase();
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -221,7 +221,22 @@ public class plasmaCrawlProfile {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class DomProfile {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String referrer;
|
|
|
|
|
|
|
|
public int depth, count;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public DomProfile(String ref, int d) {
|
|
|
|
|
|
|
|
this.referrer = ref;
|
|
|
|
|
|
|
|
this.depth = d;
|
|
|
|
|
|
|
|
this.count = 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void inc() {
|
|
|
|
|
|
|
|
this.count++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public class entry {
|
|
|
|
public class entry {
|
|
|
|
// this is a simple record structure that hold all properties of a single crawl start
|
|
|
|
// this is a simple record structure that hold all properties of a single crawl start
|
|
|
@ -387,33 +402,44 @@ public class plasmaCrawlProfile {
|
|
|
|
mem.put(propName, newValue);
|
|
|
|
mem.put(propName, newValue);
|
|
|
|
profileTable.set(handle(), mem);
|
|
|
|
profileTable.set(handle(), mem);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public void domInc(String domain) {
|
|
|
|
public void domInc(String domain, String referrer, int depth) {
|
|
|
|
Integer c = (Integer) doms.get(domain);
|
|
|
|
DomProfile dp = (DomProfile) doms.get(domain);
|
|
|
|
if (c == null) {
|
|
|
|
if (dp == null) {
|
|
|
|
// new domain
|
|
|
|
// new domain
|
|
|
|
doms.put(domain, new Integer(1));
|
|
|
|
doms.put(domain, new DomProfile(referrer, depth));
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
// increase counter
|
|
|
|
// increase counter
|
|
|
|
doms.put(domain, new Integer(c.intValue() + 1));
|
|
|
|
dp.inc();
|
|
|
|
|
|
|
|
doms.put(domain, dp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
domsCache.put(this.mem.get("handle"), doms);
|
|
|
|
domsCache.put(this.mem.get("handle"), doms);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public int domCount(String domain) {
|
|
|
|
public int domCount(String domain) {
|
|
|
|
Integer c = (Integer) doms.get(domain);
|
|
|
|
DomProfile dp = (DomProfile) doms.get(domain);
|
|
|
|
if (c == null) {
|
|
|
|
if (dp == null) {
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
return c.intValue();
|
|
|
|
return dp.count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public int domSize() {
|
|
|
|
public int domSize() {
|
|
|
|
return doms.size();
|
|
|
|
return doms.size();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public boolean domExists(String domain) {
|
|
|
|
public boolean domExists(String domain) {
|
|
|
|
|
|
|
|
if (domFilterDepth() == Integer.MAX_VALUE) return true;
|
|
|
|
return doms.containsKey(domain);
|
|
|
|
return doms.containsKey(domain);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
public Iterator domNames() {
|
|
|
|
public String domNames(boolean attr) {
|
|
|
|
return doms.keySet().iterator();
|
|
|
|
Iterator domnamesi = doms.entrySet().iterator();
|
|
|
|
|
|
|
|
String domnames="";
|
|
|
|
|
|
|
|
Map.Entry ey;
|
|
|
|
|
|
|
|
DomProfile dp;
|
|
|
|
|
|
|
|
while (domnamesi.hasNext()) {
|
|
|
|
|
|
|
|
ey = (Map.Entry) domnamesi.next();
|
|
|
|
|
|
|
|
dp = (DomProfile) ey.getValue();
|
|
|
|
|
|
|
|
domnames += ((String) ey.getKey()) + ((attr) ? ("/d=" + dp.depth + ",c=" + dp.count + " ") : " ");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return domnames;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|