*) Bugfix for minimizeUrlDB

- function didn't work correctly because of new url hash structure
   See: http://www.yacy-forum.de/viewtopic.php?p=12753#12753

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1080 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 9913049009
commit 3c11d7b81c

@ -186,6 +186,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
*/
return new Entry(
oldEntry.url(),
oldEntry.hash(),
oldEntry.descr(),
oldEntry.moddate(),
oldEntry.loaddate(),
@ -388,7 +389,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
private String descr;
private Date moddate;
private Date loaddate;
private String urlHash;
String urlHash;
private String referrerHash;
private int copyCount;
private String flags;
@ -399,11 +400,40 @@ public final class plasmaCrawlLURL extends plasmaURL {
private int wordCount;
private String snippet;
public Entry(URL url, String descr, Date moddate, Date loaddate,
String referrerHash, int copyCount, boolean localNeed,
int quality, String language, char doctype, long size, int wordCount) {
public Entry(
URL url,
String descr,
Date moddate,
Date loaddate,
String referrerHash,
int copyCount,
boolean localNeed,
int quality,
String language,
char doctype,
long size,
int wordCount
) {
this(url,null,descr,moddate,loaddate,referrerHash,copyCount,localNeed,quality,language,doctype,size,wordCount);
}
Entry(
URL url,
String theUrlHash,
String descr,
Date moddate,
Date loaddate,
String referrerHash,
int copyCount,
boolean localNeed,
int quality,
String language,
char doctype,
long size,
int wordCount
) {
// create new entry and store it into database
this.urlHash = urlHash(url);
this.urlHash = (theUrlHash == null) ? urlHash(url) : theUrlHash;
this.url = url;
this.descr = (descr==null)?this.url.toString():descr;
this.moddate = moddate;
@ -417,7 +447,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
this.size = size;
this.wordCount = wordCount;
this.snippet = null;
store();
store();
}
public Entry(String urlHash) {

@ -855,13 +855,13 @@ public final class yacy {
if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) {
urlCounter++;
plasmaCrawlLURL.Entry urlEntry = currentUrlDB.getEntry(urlHash);
minimizedUrlDB.newEntry(urlEntry);
plasmaCrawlLURL.Entry newEntry = minimizedUrlDB.newEntry(urlEntry);
if (urlCounter % 500 == 0) {
log.logInfo(urlCounter + " URLs found so far.");
}
}
}
// we have read all elements, now delete the entity
// we have read all elements, now we can close it
wordIdxEntity.close(); wordIdxEntity = null;
if (wordCounter%500 == 0) {

Loading…
Cancel
Save