*) Bugfix for minimizeUrlDB

- function didn't work correctly because of new url hash structure
   See: http://www.yacy-forum.de/viewtopic.php?p=12753#12753

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1080 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 9913049009
commit 3c11d7b81c

@ -186,6 +186,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
*/ */
return new Entry( return new Entry(
oldEntry.url(), oldEntry.url(),
oldEntry.hash(),
oldEntry.descr(), oldEntry.descr(),
oldEntry.moddate(), oldEntry.moddate(),
oldEntry.loaddate(), oldEntry.loaddate(),
@ -388,7 +389,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
private String descr; private String descr;
private Date moddate; private Date moddate;
private Date loaddate; private Date loaddate;
private String urlHash; String urlHash;
private String referrerHash; private String referrerHash;
private int copyCount; private int copyCount;
private String flags; private String flags;
@ -399,11 +400,40 @@ public final class plasmaCrawlLURL extends plasmaURL {
private int wordCount; private int wordCount;
private String snippet; private String snippet;
public Entry(URL url, String descr, Date moddate, Date loaddate, public Entry(
String referrerHash, int copyCount, boolean localNeed, URL url,
int quality, String language, char doctype, long size, int wordCount) { String descr,
Date moddate,
Date loaddate,
String referrerHash,
int copyCount,
boolean localNeed,
int quality,
String language,
char doctype,
long size,
int wordCount
) {
this(url,null,descr,moddate,loaddate,referrerHash,copyCount,localNeed,quality,language,doctype,size,wordCount);
}
Entry(
URL url,
String theUrlHash,
String descr,
Date moddate,
Date loaddate,
String referrerHash,
int copyCount,
boolean localNeed,
int quality,
String language,
char doctype,
long size,
int wordCount
) {
// create new entry and store it into database // create new entry and store it into database
this.urlHash = urlHash(url); this.urlHash = (theUrlHash == null) ? urlHash(url) : theUrlHash;
this.url = url; this.url = url;
this.descr = (descr==null)?this.url.toString():descr; this.descr = (descr==null)?this.url.toString():descr;
this.moddate = moddate; this.moddate = moddate;
@ -417,7 +447,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
this.size = size; this.size = size;
this.wordCount = wordCount; this.wordCount = wordCount;
this.snippet = null; this.snippet = null;
store(); store();
} }
public Entry(String urlHash) { public Entry(String urlHash) {

@ -855,13 +855,13 @@ public final class yacy {
if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) { if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) {
urlCounter++; urlCounter++;
plasmaCrawlLURL.Entry urlEntry = currentUrlDB.getEntry(urlHash); plasmaCrawlLURL.Entry urlEntry = currentUrlDB.getEntry(urlHash);
minimizedUrlDB.newEntry(urlEntry); plasmaCrawlLURL.Entry newEntry = minimizedUrlDB.newEntry(urlEntry);
if (urlCounter % 500 == 0) { if (urlCounter % 500 == 0) {
log.logInfo(urlCounter + " URLs found so far."); log.logInfo(urlCounter + " URLs found so far.");
} }
} }
} }
// we have read all elements, now delete the entity // we have read all elements, now we can close it
wordIdxEntity.close(); wordIdxEntity = null; wordIdxEntity.close(); wordIdxEntity = null;
if (wordCounter%500 == 0) { if (wordCounter%500 == 0) {

Loading…
Cancel
Save