Merge bitcoin/bitcoin#23737: test: make feature_init more robust

8904f17ea7 test: enable txindex in feature_init (James O'Beirne) 93db6d8422 test: feature_init: retain debug.log and improve detection (James O'Beirne) 24fcf6e435 test: feature_init: tweak all .ldb files of a certain type (James O'Beirne) a8ffbc01db test: add TestNode.wait_for_debug_log (James O'Beirne) a2fb62b632 test: introduce TestNode.debug_log_bytes (James O'Beirne) Pull request description: Hopefully fixes #23646. This makes a few changes to feature_init (along the lines of the discussion here: https://github.com/bitcoin/bitcoin/pull/23289#discussion_r766615073). The changes are detailed in the specific commits, but a summary might be: - more robust log-line detection (at the expense of maybe being slower) - retain all debug.log content - perturb .ldb files in a more complete way Top commit has no ACKs. Tree-SHA512: 9c8900998fdb96d3cc92bd30c4eeda2a47e9787e6432546d458c17f24eb9e2b6b3eb573db225172888ca71d7a62260f497f83827597b982b9e35c5452cb6e514
3 years ago · d69af93223
parent 31f385c138 8904f17ea7
commit d69af93223
2 changed files with 67 additions and 52 deletions
--- a/test/functional/feature_init.py
+++ b/test/functional/feature_init.py
@ -44,14 +44,9 @@ class InitStressTest(BitcoinTestFramework):
        def sigterm_node():
            node.process.terminate()
            node.process.wait()
-            node.debug_log_path.unlink()
-            node.debug_log_path.touch()

        def check_clean_start():
            """Ensure that node restarts successfully after various interrupts."""
-            # TODO: add -txindex=1 to fully test index initiatlization.
-            # See https://github.com/bitcoin/bitcoin/pull/23289#discussion_r735159180 for
-            # a discussion of the related bug.
            node.start()
            node.wait_for_rpc_connection()
            assert_equal(200, node.getblockcount())
@ -71,56 +66,33 @@ class InitStressTest(BitcoinTestFramework):
            'net thread start',
            'addcon thread start',
            'loadblk thread start',
-            # TODO: reenable - see above TODO
-            # 'txindex thread start',
-            'msghand thread start'
+            'txindex thread start',
+            'msghand thread start',
+            'net thread start',
+            'addcon thread start',
        ]
        if self.is_wallet_compiled():
            lines_to_terminate_after.append('Verifying wallet')

        for terminate_line in lines_to_terminate_after:
            self.log.info(f"Starting node and will exit after line '{terminate_line}'")
-            node.start(
-                # TODO: add -txindex=1 to fully test index initiatlization.
-                # extra_args=['-txindex=1'],
-            )
-            logfile = open(node.debug_log_path, 'r', encoding='utf8')
-
-            MAX_SECS_TO_WAIT = 30
-            start = time.time()
-            num_lines = 0
-
-            while True:
-                line = logfile.readline()
-                if line:
-                    num_lines += 1
-
-                if line and terminate_line.lower() in line.lower():
-                    self.log.debug(f"Terminating node after {num_lines} log lines seen")
-                    sigterm_node()
-                    break
-
-                if (time.time() - start) > MAX_SECS_TO_WAIT:
-                    raise AssertionError(
-                        f"missed line {terminate_line}; terminating now after {num_lines} lines")
+            node.start(extra_args=['-txindex=1'])

-                if node.process.poll() is not None:
-                    raise AssertionError(f"node failed to start (line: '{terminate_line}')")
+            num_total_logs = node.wait_for_debug_log([terminate_line], ignore_case=True)
+            self.log.debug(f"Terminating node after {num_total_logs} log lines seen")
+            sigterm_node()

        check_clean_start()
-        num_total_logs = len(node.debug_log_path.read_text().splitlines())
        self.stop_node(0)

        self.log.info(
            f"Terminate at some random point in the init process (max logs: {num_total_logs})")

        for _ in range(40):
-            terminate_after = random.randint(1, num_total_logs)
-            self.log.debug(f"Starting node and will exit after {terminate_after} lines")
-            node.start(
-                # TODO: add -txindex=1 to fully test index initiatlization.
-                # extra_args=['-txindex=1'],
-            )
+            num_logs = len(Path(node.debug_log_path).read_text().splitlines())
+            additional_lines = random.randint(1, num_total_logs)
+            self.log.debug(f"Starting node and will exit after {additional_lines} lines")
+            node.start(extra_args=['-txindex=1'])
            logfile = open(node.debug_log_path, 'r', encoding='utf8')

            MAX_SECS_TO_WAIT = 10
@ -132,7 +104,8 @@ class InitStressTest(BitcoinTestFramework):
                if line:
                    num_lines += 1

-                if num_lines >= terminate_after or (time.time() - start) > MAX_SECS_TO_WAIT:
+                if num_lines >= (num_logs + additional_lines) or \
+                        (time.time() - start) > MAX_SECS_TO_WAIT:
                    self.log.debug(f"Terminating node after {num_lines} log lines seen")
                    sigterm_node()
                    break
@ -152,11 +125,12 @@ class InitStressTest(BitcoinTestFramework):
        }

        for file_patt, err_fragment in files_to_disturb.items():
-            target_file = list(node.chain_path.glob(file_patt))[0]
+            target_files = list(node.chain_path.glob(file_patt))

-            self.log.info(f"Tweaking file to ensure failure {target_file}")
-            bak_path = str(target_file) + ".bak"
-            target_file.rename(bak_path)
+            for target_file in target_files:
+                self.log.info(f"Tweaking file to ensure failure {target_file}")
+                bak_path = str(target_file) + ".bak"
+                target_file.rename(bak_path)

            # TODO: at some point, we should test perturbing the files instead of removing
            # them, e.g.
@ -170,14 +144,16 @@ class InitStressTest(BitcoinTestFramework):
            # investigate doing this later.

            node.assert_start_raises_init_error(
-                # TODO: add -txindex=1 to fully test index initiatlization.
-                # extra_args=['-txindex=1'],
+                extra_args=['-txindex=1'],
                expected_msg=err_fragment,
                match=ErrorMatch.PARTIAL_REGEX,
            )

-            self.log.info(f"Restoring file from {bak_path} and restarting")
-            Path(bak_path).rename(target_file)
+            for target_file in target_files:
+                bak_path = str(target_file) + ".bak"
+                self.log.debug(f"Restoring file from {bak_path} and restarting")
+                Path(bak_path).rename(target_file)
+
            check_clean_start()
            self.stop_node(0)

--- a/test/functional/test_framework/test_node.py
+++ b/test/functional/test_framework/test_node.py
@ -389,14 +389,17 @@ class TestNode():
    def debug_log_path(self) -> Path:
        return self.chain_path / 'debug.log'

+    def debug_log_bytes(self) -> int:
+        with open(self.debug_log_path, encoding='utf-8') as dl:
+            dl.seek(0, 2)
+            return dl.tell()
+
    @contextlib.contextmanager
    def assert_debug_log(self, expected_msgs, unexpected_msgs=None, timeout=2):
        if unexpected_msgs is None:
            unexpected_msgs = []
        time_end = time.time() + timeout * self.timeout_factor
-        with open(self.debug_log_path, encoding='utf-8') as dl:
-            dl.seek(0, 2)
-            prev_size = dl.tell()
+        prev_size = self.debug_log_bytes()

        yield

@ -419,6 +422,42 @@ class TestNode():
            time.sleep(0.05)
        self._raise_assertion_error('Expected messages "{}" does not partially match log:\n\n{}\n\n'.format(str(expected_msgs), print_log))

+    def wait_for_debug_log(self, expected_msgs, timeout=10, ignore_case=False) -> int:
+        """
+        Block until we see a particular debug log message fragment or until we exceed the timeout.
+        Return:
+            the number of log lines we encountered when matching
+        """
+        time_end = time.time() + timeout * self.timeout_factor
+        prev_size = self.debug_log_bytes()
+        re_flags = re.MULTILINE | (re.IGNORECASE if ignore_case else 0)
+
+        while True:
+            found = True
+            with open(self.debug_log_path, encoding='utf-8') as dl:
+                dl.seek(prev_size)
+                log = dl.read()
+
+            for expected_msg in expected_msgs:
+                if re.search(re.escape(expected_msg), log, flags=re_flags) is None:
+                    found = False
+
+            if found:
+                num_logs = len(log.splitlines())
+                return num_logs
+
+            if time.time() >= time_end:
+                print_log = " - " + "\n - ".join(log.splitlines())
+                break
+
+            # No sleep here because we want to detect the message fragment as fast as
+            # possible.
+
+        self._raise_assertion_error(
+            'Expected messages "{}" does not partially match log:\n\n{}\n\n'.format(
+                str(expected_msgs), print_log))
+        return -1  # useless return to satisfy linter
+
    @contextlib.contextmanager
    def profile_with_perf(self, profile_name: str):
        """