orbiter 20 years ago
parent ed978c8564
commit 6594541ef5

@ -230,19 +230,25 @@ public class plasmaWordIndexDistribution {
// fist check if we know all urls
urlEnum = indexEntity.elements(true);
unknownURLEntries = new HashSet();
while (urlEnum.hasMoreElements()) {
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
if ((lurl == null) || (lurl.toString() == null)) {
unknownURLEntries.add(indexEntry.getUrlHash());
} else {
if (lurl.toString() == null) {
urlPool.loadedURL.remove(indexEntry.getUrlHash());
try {
while (urlEnum.hasMoreElements()) {
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
if ((lurl == null) || (lurl.toString() == null)) {
unknownURLEntries.add(indexEntry.getUrlHash());
} else {
knownURLs.put(indexEntry.getUrlHash(), lurl);
if (lurl.toString() == null) {
urlPool.loadedURL.remove(indexEntry.getUrlHash());
unknownURLEntries.add(indexEntry.getUrlHash());
} else {
knownURLs.put(indexEntry.getUrlHash(), lurl);
}
}
}
} catch (kelondroException e) {
log.logError("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash());
e.printStackTrace();
try {indexEntity.deleteComplete();} catch (IOException ee) {}
}
// now delete all entries that have no url entry
hashIter = unknownURLEntries.iterator();
@ -258,21 +264,27 @@ public class plasmaWordIndexDistribution {
tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash());
urlEnum = indexEntity.elements(true);
unknownURLEntries = new HashSet();
while ((urlEnum.hasMoreElements()) && (count > 0)) {
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
if (lurl == null) {
unknownURLEntries.add(indexEntry.getUrlHash());
} else {
if (lurl.toString() == null) {
urlPool.loadedURL.remove(indexEntry.getUrlHash());
try {
while ((urlEnum.hasMoreElements()) && (count > 0)) {
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
if (lurl == null) {
unknownURLEntries.add(indexEntry.getUrlHash());
} else {
knownURLs.put(indexEntry.getUrlHash(), lurl);
tmpEntity.addEntry(indexEntry);
count--;
if (lurl.toString() == null) {
urlPool.loadedURL.remove(indexEntry.getUrlHash());
unknownURLEntries.add(indexEntry.getUrlHash());
} else {
knownURLs.put(indexEntry.getUrlHash(), lurl);
tmpEntity.addEntry(indexEntry);
count--;
}
}
}
} catch (kelondroException e) {
log.logError("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash());
e.printStackTrace();
try {indexEntity.deleteComplete();} catch (IOException ee) {}
}
// now delete all entries that have no url entry
hashIter = unknownURLEntries.iterator();

Loading…
Cancel
Save