with file:// protocol, 2 hostqueues accessing same cache file concurrently http://mantis.tokeek.de/view.php?id=668 Reason seems to be diff. hosthash key of hostqueues on reopen. Internal queue key and external representation (directoryname currently hostname.port) must be adjusted to fix it (not done yet).pull/62/head
parent
16420e5507
commit
fcc29c36f0
@ -0,0 +1,87 @@
|
||||
package net.yacy.crawler;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import net.yacy.cora.document.id.DigestURL;
|
||||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.crawler.retrieval.Request;
|
||||
import net.yacy.crawler.robots.RobotsTxt;
|
||||
import net.yacy.data.WorkTables;
|
||||
import static net.yacy.kelondro.util.FileUtils.deletedelete;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class HostBalancerTest {
|
||||
|
||||
final File queuesRoot = new File("test/DATA/INDEX/QUEUES");
|
||||
final File datadir = new File("test/DATA");
|
||||
|
||||
/**
|
||||
* Test of reopen existing HostBalancer cache to test/demonstrate issue with
|
||||
* HostQueue for file: protocol
|
||||
*/
|
||||
@Test
|
||||
public void testReopen() throws IOException, SpaceExceededException, InterruptedException {
|
||||
boolean exceed134217727 = true;
|
||||
int onDemandLimit = 1000;
|
||||
String hostDir = "C:\\filedirectory";
|
||||
|
||||
// prepare one urls for push test
|
||||
String urlstr = "file:///" + hostDir;
|
||||
DigestURL url = new DigestURL(urlstr);
|
||||
Request req = new Request(url, null);
|
||||
|
||||
deletedelete(queuesRoot); // start clean test
|
||||
|
||||
HostBalancer hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727);
|
||||
Thread.sleep(100); // wait for file operation
|
||||
hb.clear();
|
||||
|
||||
Thread.sleep(100);
|
||||
assertEquals("After clear", 0, hb.size());
|
||||
|
||||
WorkTables wt = new WorkTables(datadir);
|
||||
RobotsTxt rob = new RobotsTxt(wt, null);
|
||||
|
||||
String res = hb.push(req, null, rob); // push url
|
||||
assertNull(res); // should have no error text
|
||||
assertTrue(hb.has(url.hash())); // check existence
|
||||
assertEquals("first push of one url", 1, hb.size()); // expected size=1
|
||||
|
||||
res = hb.push(req, null, rob); // push same url (should be rejected = double occurence)
|
||||
assertNotNull(res); // should state double occurrence
|
||||
assertTrue(hb.has(url.hash()));
|
||||
assertEquals("second push of same url", 1, hb.size());
|
||||
|
||||
hb.close(); // close
|
||||
|
||||
Thread.sleep(200); // wait a bit for file operation
|
||||
|
||||
hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727); // reopen balancer
|
||||
Thread.sleep(200); // wait a bit for file operation
|
||||
|
||||
assertEquals("size after reopen (with one existing url)", 1, hb.size()); // expect size=1 from previous push
|
||||
assertTrue("check existance of pushed url", hb.has(url.hash())); // check url exists (it fails as after reopen internal queue.hosthash is wrong)
|
||||
|
||||
res = hb.push(req, null, rob); // push same url as before (should be rejected, but isn't due to hosthash mismatch afte reopen)
|
||||
assertNotNull("should state double occurence", res);
|
||||
assertEquals("first push of same url after reopen", 1, hb.size()); // should stay size=1
|
||||
assertTrue("check existance of pushed url", hb.has(url.hash()));
|
||||
|
||||
res = hb.push(req, null, rob);
|
||||
assertNotNull("should state double occurence", res);
|
||||
assertTrue("check existance of pushed url", hb.has(url.hash()));
|
||||
assertEquals("second push of same url after reopen", 1, hb.size()); // double check, should stay size=1
|
||||
|
||||
// list all urls in hostbalancer
|
||||
Iterator<Request> it = hb.iterator();
|
||||
while (it.hasNext()) {
|
||||
Request rres = it.next();
|
||||
System.out.println(rres.toString());
|
||||
}
|
||||
hb.close();
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue