From ea547baf1e092846589c7cb68e9e2bfd88cec613 Mon Sep 17 00:00:00 2001 From: Brandon Date: Sat, 5 Oct 2024 12:05:34 -0700 Subject: [PATCH] refactor: use threads to read git stdout/stderr (#132) --- PyGitUp/git_wrapper.py | 91 ++++++++++-------------- PyGitUp/tests/__init__.py | 3 +- PyGitUp/tests/test_fetch_large_output.py | 48 +++++++++++++ 3 files changed, 89 insertions(+), 53 deletions(-) create mode 100644 PyGitUp/tests/test_fetch_large_output.py diff --git a/PyGitUp/git_wrapper.py b/PyGitUp/git_wrapper.py index 98daf1f..5371fb0 100644 --- a/PyGitUp/git_wrapper.py +++ b/PyGitUp/git_wrapper.py @@ -20,10 +20,14 @@ import subprocess import platform from contextlib import contextmanager +from io import BufferedReader +from threading import Thread +from typing import IO, Optional # 3rd party libs from termcolor import colored # Assume, colorama is already initialized from git import GitCommandError, CheckoutError as OrigCheckoutError, Git +from git.cmd import Git as GitCmd # PyGitUp libs from PyGitUp.utils import find @@ -135,8 +139,8 @@ def stash(): )) try: self._run('stash') - except GitError as e: - raise StashError(stderr=e.stderr, stdout=e.stdout) + except GitError as git_error: + raise StashError(stderr=git_error.stderr, stdout=git_error.stdout) stashed[0] = True @@ -175,67 +179,51 @@ def rebase(self, target_branch): def fetch(self, *args, **kwargs): """ Fetch remote commits. """ - # Unlike the other git commands, we want to output `git fetch`'s - # output in real time. Therefore we use a different implementation - # from `GitWrapper._run` which buffers all output. - # In theory this may deadlock if `git fetch` prints more than 8 KB - # to stderr which is here assumed to not happen in day-to-day use. - - stdout = b'' - # Execute command cmd = self.git.fetch(as_process=True, *args, **kwargs) - # Capture output - while True: - output = cmd.stdout.read(1) - - sys.stdout.write(output.decode('utf-8')) - sys.stdout.flush() - - stdout += output - - # Check for EOF - if output == b"": - break - - # Wait for the process to quit - try: - cmd.wait() - except GitCommandError as error: - # Add more meta-information to errors - message = "'{}' returned exit status {}".format( - ' '.join(str(c) for c in error.command), - error.status - ) - - raise GitError(message, stderr=error.stderr, stdout=stdout) - - return stdout.strip() + return self.run_cmd(cmd) def push(self, *args, **kwargs): - ''' Push commits to remote ''' - stdout = b'' - + """ Push commits to remote """ # Execute command cmd = self.git.push(as_process=True, *args, **kwargs) - # Capture output - while True: - output = cmd.stdout.read(1) + return self.run_cmd(cmd) - sys.stdout.write(output.decode('utf-8')) - sys.stdout.flush() - - stdout += output - - # Check for EOF - if output == b"": + @staticmethod + def stream_reader(input_stream: BufferedReader, output_stream: Optional[IO], result_list: list) -> None: + """ + Helper method to read from a stream and write to another stream. + """ + captured_bytes = b"" + while True: + read_byte = input_stream.read(1) + captured_bytes += read_byte + if output_stream is not None: + output_stream.write(read_byte.decode('utf-8')) + output_stream.flush() + if read_byte == b"": break + result_list.append(captured_bytes) + + @staticmethod + def run_cmd(cmd: GitCmd.AutoInterrupt) -> bytes: + """ Run a command and return stdout. """ + std_outs = [] + std_errs = [] + stdout_thread = Thread(target=GitWrapper.stream_reader, + args=(cmd.stdout, sys.stdout, std_outs)) + stderr_thread = Thread(target=GitWrapper.stream_reader, + args=(cmd.stderr, None, std_errs)) # Wait for the process to quit try: + stdout_thread.start() + stderr_thread.start() cmd.wait() + stdout_thread.join() + stderr_thread.join() except GitCommandError as error: # Add more meta-information to errors message = "'{}' returned exit status {}".format( @@ -243,9 +231,8 @@ def push(self, *args, **kwargs): error.status ) - raise GitError(message, stderr=error.stderr, stdout=stdout) - - return stdout.strip() + raise GitError(message, stderr=error.stderr, stdout=std_outs[0]) + return std_outs[0].strip() def config(self, key): """ Return `git config key` output or None. """ diff --git a/PyGitUp/tests/__init__.py b/PyGitUp/tests/__init__.py index c58713e..6d0be07 100644 --- a/PyGitUp/tests/__init__.py +++ b/PyGitUp/tests/__init__.py @@ -65,9 +65,10 @@ def update_file(repo, commit_message='', counter=[0], filename=testfile_name): return path_file + def mkrepo(path): """ - Make a repository in 'path', create the the dir, if it doesn't exist. + Make a repository in 'path', create the dir, if it doesn't exist. """ return Repo.init(path) diff --git a/PyGitUp/tests/test_fetch_large_output.py b/PyGitUp/tests/test_fetch_large_output.py new file mode 100644 index 0000000..8c221e4 --- /dev/null +++ b/PyGitUp/tests/test_fetch_large_output.py @@ -0,0 +1,48 @@ +# System imports +from os import sep, chdir +from os.path import join +import io + +from git import * + +from PyGitUp.tests import basepath, init_master + +TEST_NAME = 'fetch-large-output' +REPO_PATH = join(basepath, TEST_NAME + sep) + + +def setup(): + master_path, master = init_master(TEST_NAME) + + # Prepare master repo + master.git.checkout(b=TEST_NAME) + + # Clone to test repo + path = join(basepath, TEST_NAME) + + master.clone(path, b=TEST_NAME) + repo = Repo(path, odbt=GitCmdObjectDB) + + assert repo.working_dir == path + + # Generate lots of branches + total_branch_name_bytes = 0 + for i in range(0, 1500): + branch_name = 'branch-name-%d' % i + total_branch_name_bytes += len(branch_name) + master.git.checkout(b=branch_name) + + +def test_fetch_large_output(): + """ Run 'git up' with a fetch that outputs lots of data """ + # Arrange + chdir(REPO_PATH) + from PyGitUp.gitup import GitUp + gitup = GitUp(testing=True) + + # Act + gitup.run() + + # Assert + assert len(gitup.states) == 1 + assert gitup.states[0] == 'up to date'