You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
88 lines
3.5 KiB
88 lines
3.5 KiB
8 years ago
|
package net.yacy.crawler;
|
||
|
|
||
|
import java.io.File;
|
||
|
import java.io.IOException;
|
||
|
import java.util.Iterator;
|
||
|
import net.yacy.cora.document.id.DigestURL;
|
||
|
import net.yacy.cora.util.SpaceExceededException;
|
||
|
import net.yacy.crawler.retrieval.Request;
|
||
|
import net.yacy.crawler.robots.RobotsTxt;
|
||
|
import net.yacy.data.WorkTables;
|
||
|
import static net.yacy.kelondro.util.FileUtils.deletedelete;
|
||
|
import org.junit.Test;
|
||
|
import static org.junit.Assert.*;
|
||
|
|
||
|
public class HostBalancerTest {
|
||
|
|
||
|
final File queuesRoot = new File("test/DATA/INDEX/QUEUES");
|
||
|
final File datadir = new File("test/DATA");
|
||
|
|
||
|
/**
|
||
|
* Test of reopen existing HostBalancer cache to test/demonstrate issue with
|
||
|
* HostQueue for file: protocol
|
||
|
*/
|
||
|
@Test
|
||
|
public void testReopen() throws IOException, SpaceExceededException, InterruptedException {
|
||
|
boolean exceed134217727 = true;
|
||
|
int onDemandLimit = 1000;
|
||
|
String hostDir = "C:\\filedirectory";
|
||
|
|
||
|
// prepare one urls for push test
|
||
|
String urlstr = "file:///" + hostDir;
|
||
|
DigestURL url = new DigestURL(urlstr);
|
||
|
Request req = new Request(url, null);
|
||
|
|
||
|
deletedelete(queuesRoot); // start clean test
|
||
|
|
||
|
HostBalancer hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727);
|
||
|
Thread.sleep(100); // wait for file operation
|
||
|
hb.clear();
|
||
|
|
||
|
Thread.sleep(100);
|
||
|
assertEquals("After clear", 0, hb.size());
|
||
|
|
||
|
WorkTables wt = new WorkTables(datadir);
|
||
|
RobotsTxt rob = new RobotsTxt(wt, null);
|
||
|
|
||
|
String res = hb.push(req, null, rob); // push url
|
||
|
assertNull(res); // should have no error text
|
||
|
assertTrue(hb.has(url.hash())); // check existence
|
||
|
assertEquals("first push of one url", 1, hb.size()); // expected size=1
|
||
|
|
||
|
res = hb.push(req, null, rob); // push same url (should be rejected = double occurence)
|
||
|
assertNotNull(res); // should state double occurrence
|
||
|
assertTrue(hb.has(url.hash()));
|
||
|
assertEquals("second push of same url", 1, hb.size());
|
||
|
|
||
|
hb.close(); // close
|
||
|
|
||
|
Thread.sleep(200); // wait a bit for file operation
|
||
|
|
||
|
hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727); // reopen balancer
|
||
|
Thread.sleep(200); // wait a bit for file operation
|
||
|
|
||
|
assertEquals("size after reopen (with one existing url)", 1, hb.size()); // expect size=1 from previous push
|
||
|
assertTrue("check existance of pushed url", hb.has(url.hash())); // check url exists (it fails as after reopen internal queue.hosthash is wrong)
|
||
|
|
||
|
res = hb.push(req, null, rob); // push same url as before (should be rejected, but isn't due to hosthash mismatch afte reopen)
|
||
|
assertNotNull("should state double occurence", res);
|
||
|
assertEquals("first push of same url after reopen", 1, hb.size()); // should stay size=1
|
||
|
assertTrue("check existance of pushed url", hb.has(url.hash()));
|
||
|
|
||
|
res = hb.push(req, null, rob);
|
||
|
assertNotNull("should state double occurence", res);
|
||
|
assertTrue("check existance of pushed url", hb.has(url.hash()));
|
||
|
assertEquals("second push of same url after reopen", 1, hb.size()); // double check, should stay size=1
|
||
|
|
||
|
// list all urls in hostbalancer
|
||
|
Iterator<Request> it = hb.iterator();
|
||
|
while (it.hasNext()) {
|
||
|
Request rres = it.next();
|
||
|
System.out.println(rres.toString());
|
||
|
}
|
||
|
hb.close();
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|