From 8115141304309b250d613938d3982cb83e14926a Mon Sep 17 00:00:00 2001 From: James Gebbie-Rayet Date: Fri, 23 Aug 2024 15:34:45 +0100 Subject: [PATCH] adding first pass --- code_quality/1_code_quality.md | 34 ++++ code_quality/2_pylint.md | 95 +++++++++++ code_quality/3_black.md | 102 ++++++++++++ code_quality/4_isort.md | 101 ++++++++++++ code_quality/5_precommit.md | 194 +++++++++++++++++++++++ code_quality/6_final_remarks.md | 10 ++ code_quality/examples/1_random_code.py | 20 +++ code_quality/examples/2_random_code.py | 20 +++ code_quality/examples/3_random_import.py | 22 +++ 9 files changed, 598 insertions(+) create mode 100644 code_quality/1_code_quality.md create mode 100644 code_quality/2_pylint.md create mode 100644 code_quality/3_black.md create mode 100644 code_quality/4_isort.md create mode 100644 code_quality/5_precommit.md create mode 100644 code_quality/6_final_remarks.md create mode 100644 code_quality/examples/1_random_code.py create mode 100644 code_quality/examples/2_random_code.py create mode 100644 code_quality/examples/3_random_import.py diff --git a/code_quality/1_code_quality.md b/code_quality/1_code_quality.md new file mode 100644 index 0000000..67ccb5c --- /dev/null +++ b/code_quality/1_code_quality.md @@ -0,0 +1,34 @@ +# Code Quality 101 + +This course is an introduction to using pylint to increase the quality of software that you may write in the future. + + +### Prerequisites + +* Access to a computer that has Python installed +* Familiar with any programming language - beneficial if you have basic Python +* Basic familiarity with how to start command-line based tools + + +### Summary + +As software engineers or scientists that write software scripts, whether this is for proper distributable software pacakages and tools or simply to share simulation setup protocols with your colleagues. It is best practice from the outset to write code with quality, sharing and maintainability in mind, even if at the start you do not wish to develop for the purpose of sharing. + +In this course we will focus on tools for code quality in the Python and git toolchains, but you can find similar tools for other toolchains out in the wild. + + +## Code Quality - Why do we care? + +Imagine you are a new enthusiastic researcher, you have just landed a new research post (PhD or PDRA) and on your first day you are introduced to your project. The project up until now has been described as working on an exciting piece of science to extend previous works in a new cutting edge direction. + +On your first day you are given a compressed file archive and told "this is the software we have for doing this research, it has been developed by several folks over the years". You are probably thinking "ok - awesome, can't wait to get started". You go away and start looking through the code and you start to find it is very poorly structured, difficult to read and has been coded in several different styles all smashed together - and here is the kicker, there's also no version control! + +To stop this endless cycle of drama with software development, software engineers have for many years put significant effort into standardising the way that we write our software so that such issues become a thing of the past. This has required a significant community effort to standardise the ways in which we write software so that we can make this happen. + +One such standard that belongs to the Python community is the PEP 8 standard (or style guide). [PEPs](https://peps.python.org/) are Python community parlance for ratified and agreed ways of doing things that all developers of tools in the language should adhere to. There are many useful PEPs but in this particular workshop, we will focus on [PEP 8](https://peps.python.org/pep-0008/). + +Activity: Have a look over the two links above just to appreciate what information they contain. + +The reason we use standards, is that they lead to cleaner code that is more easy to share, if we all write and learn to read code in a particular format, then contributing to other projects becomes very easy. Which means our code is a lot easier to maintain as it grows, both in its size and in community. + +This course is designed to give you an introduction to some of the types of tools that are available to us as developers and how to get started using them. Often the difficult bit is simply getting going rather than learning about the advanced features! diff --git a/code_quality/2_pylint.md b/code_quality/2_pylint.md new file mode 100644 index 0000000..0a29516 --- /dev/null +++ b/code_quality/2_pylint.md @@ -0,0 +1,95 @@ +# Introducing Pylint + +You can find a much more exhaustive look at Pylint in the [Documentation](https://pylint.readthedocs.io/en/stable/) + +Now PEP 8 is a rather large document and we are not going to get very far in improving our ways of working if we require all developers to memorise a standard like that!! So this is where pylint comes in, we use pylint to test our compliance against a standard and by default this will be PEP 8. So every time you are working on python, you can use Pylint to check your code for compliance before you version control it, and this means you don't have to hold the whole of PEP 8 in your head! + + +### Installing Pylint + +Installing pylint is fairly trivial, we can simply use Python pip. + +```bash +pip install pylint +``` + +If you are working in a conda environment then you can use pip to install it into your conda environment. You must have activated the environment before running "pip install pylint" otherwise it will not be installed into the correct path. This is particularly useful if you have many projects, and some might have different python package version and/or dependency requirements. + + +### Getting started with Pylint + +If the above installation went smoothly, and it really should have since it is just a pip installed package. + +You should then be able to check it is installed by running: + +```bash +pylint --version +``` + +This is a good way to also see what specific version is installed. We can get some help on the command-line with regards to basic functionality by running: + +```bash +pylint --help +``` + +It is often more helpful to run the longer help output to get a quick idea of what you can do with pylint on the command-line without having to go into the documentation: + + +```bash +pylint --long-help +``` + +A useful part of the help output is the towards the end of the help message. It shows the following output, and this gives you an idea of what the messages that pylint will return is picking on. + +```bash +Output: + Using the default text output, the message format is : + MESSAGE_TYPE: LINE_NUM:[OBJECT:] MESSAGE + There are 5 kind of message types : + * (C) convention, for programming standard violation + * (R) refactor, for bad code smell + * (W) warning, for python specific problems + * (E) error, for probable bugs in the code + * (F) fatal, if an error occurred which prevented pylint from doing + further processing. +``` + +For example if pylint had this as one of its outputs "C0114: Missing module docstring (missing-module-docstring)" the "C" is telling you it is a convention related issue so is likely to be a violation of the PEP 8 standard, if you don't yet know how to fix it you can go to the PEP 8 document linked above and actually look at the correct way. The other letters will typically indicate syntactic or errors in the code that should be fixed. + +You should however consult the documentation linked above, for very specific information about Pylint, and of course for advanced information not covered in this introduction. + + +### Using Pylint on an example + +Enough with the intro, lets get going on taking Pylint for a spin. We have provided a code example randomly discovered on the internet, since they are often badly written and this one will not disappoint! + +In your terminal, change into the examples directory for the code quality part of the workshop, for example if you are already in the repository path in your terminal then: + +```bash +cd code_quality/examples +``` + +It is worth to have a look at the example programme to see what the code looks like, but you don't need to figure out exactly what it does for this tutorial. The cat command will dump the contents of the file to your terminal: + +```bash +cat 1_random_code.py +``` + +Now run Pylint on this file: + +```bash +pylint 1_random_code.py +``` + +What do you see and what does it mean? + +You will notice that there are listed a bunch of bad indentation warnings and a few other warnings, and you will also it grades the code out of 10, this one is pretty shocking! If you are not sure what these mean and how to fix them, then consult the [PEP 8](https://peps.python.org/pep-0008/) standard where it will tell you specifically what the code should have looked like. + +Activity: Use the PEP 8 standard and Pylint together to fix this code. Can you get this to zero warnings and a code score of 10? + +Activity: Can you find a random program on the internet or even one you have written yourself and apply Pylint to it? + +That is all there is to using Pylint, you can imagine, this was a small code. Imagine you had just inherited thousands of lines or tens of thousands or even a million lines of code. You aren't going to fix it all by hand any time fast, and this effort is what we call technical debt. Technical debt is a concept in programming where we measure the effort required versus the benefit of doing it, for massive code bases it would be a big effort to standardise a badly written code base. + +This is where the next tool comes in! + diff --git a/code_quality/3_black.md b/code_quality/3_black.md new file mode 100644 index 0000000..f6cd09d --- /dev/null +++ b/code_quality/3_black.md @@ -0,0 +1,102 @@ +# Introducing Black + +You can find a much more exhaustive look at Black in the [Documentation](https://black.readthedocs.io/en/stable/) + +Wouldn't it be great if we could automate some of the changes that we need to make in order to simplify making our codes of better quality? + +Black is a really useful software utility that enables us to make some of these changes automatically. It will mainly sort out code formatting issues that would crop up as convention issues within Pylint. + + +### Installing Black + +Again, like with Pylint, we can very quickly and easily install black with Python pip. + +```bash +pip install black +``` + +Also, like with Pylint, you can pip install this into any conda environment that you may be using should you have many projects with different dependencies. + + +### Getting started with Black + +As with Pylint, you can check the package is installed correctly and find out what version you have by running: + +```bash +black --version +``` + +You can also get some pointers on the command-line for how to use the tool by running: + +```bash +black --help +``` + +Black is generally run in one of two ways. + +The first way is you can have it run through your code and only do checks to report what it found, like this: + +```bash +black --check --target-version=py35 +``` + +or the second way is you let it reformat your code so that you have automated code quality improvements. Like this: + +```bash +black --target-version=py35 +``` + +There is an important caution that needs to be made here. Black is not always right!!! It can sometimes make changes that although might meet the language spec of something like PEP 8, it would look really bad to a human working on the source. You tend to see these kinds of artifacts when processing certain types of formatted lists or strings where formatting effort has been designed to make them readable but not neccessarily standard, but this is still rare. + +It is important to still run a linter on the changes that Black makes, so in the case of Python, you would run Pylint after running Black. + +### Example of using Black + +Make sure your terminal is still in the examples directory, if you are in the repository main directory then: + +```bash +cd code_quality/examples +``` + +Once here lets run Pylint again on the second file example. This is actually the same program you had in the Pylint example, but with the original mistakes. + +```bash +pylint 2_random_code.py +``` + +You will recognise the errors and warnings from the previous example (hopefully). Ok so this time we are not going to fix it by hand. We are going to use Black to fix as much of it as possible as a first pass. + +```bash +black --check --target-version=py35 2_random_code.py +``` + +Hopefully you will see that Black has its usual "Oh no!" message when it has found issues with your code and said it would reformat it. Now this is not very useful if you wanted to see what it would do beforehand. If you want to see what it will change, then use the --diff flag on the command-line: + +```bash +black --check --diff --target-version=py35 2_random_code.py +``` + +You will see a typical diff that Linux diff utils usually present with lines beginning with a "-" denoting lines that have been removed and lines beginning with a "+" denoting lines added, in this case it is dealing mostly with bad indentation and white space issues as per the PEP 8 standard so you will see the lines changing in those ways. + +Now if you are happy with what you see, you can run the same command again without the --check flag and it will change the file. + +```bash +black --target-version=py35 2_random_code.py +``` + +We don't really always need to check and look at the diff, we are doing this for your benefit so you can see what the tool is doing to the code. In reality this is fully automatable like Pylint is, and we only check deeper when issues arise, since it is mostly rare. + +Now lets check that file with Pylint again, since we should always check Pylint after tools have modified code: + +```bash +pylint 2_random_code.py +``` + +What do you see now, compared to the very first time? + +You are probably noticing it has fixed a whole bunch of things but a few minor things remain. Usually things that remain will be things like the docstrings etc, since Black cannot interpret what your code is supposed to do and add documentation (though this might change in the post-chatGPT era!!!). + +Activity: Can you find a random program on the internet or even one you have written yourself and apply Black to it? + +The upshot is that you would have a lot less work to do on minor issues than if you only used Pylint and manual fixing, so the technical debt of maintaining high quality code is even lower. + diff --git a/code_quality/4_isort.md b/code_quality/4_isort.md new file mode 100644 index 0000000..4afbdfa --- /dev/null +++ b/code_quality/4_isort.md @@ -0,0 +1,101 @@ +# Introduction to isort + +You can find a much more exhaustive look at isort in the [Documentation](https://pycqa.github.io/isort/) + +Another area of code that is often overlooked but is extremely important in automating the process of making your code better and more readable. + +isort is a tool for dealing with the python module imports part of a script, it will sort the imports alphabetically and also separate different types of imports into groups of like types such that it makes it very simple to keep track of them. + +### Installing isort + +Like with the other tools we can simply install isort using the python pip installer like this: + +```bash +pip install isort +``` + +You can also install it with pip into a conda environment as we have discussed with the other tools. + + +### Getting started with isort + +As with Pylint and black, you can check the package is installed correctly and find out what version you have by running: + +```bash +isort --version +``` + +You can also get some pointers on the command-line for how to use the tool by running: + +```bash +isort --help +``` + +It is possible to run isort in a number of ways and the usual basic syntax goes something like this: + + +```bash +isort +``` + +Or for all files in the current directory you could just do: + +```bash +isort . +``` + +Like with black it is possible to run isort to show a diff of what it will change before letting it do it, and you do this again with the --diff flag. + +```bash +isort --diff +``` + +When running isort automatically without checking the outputs (which is what you want), you can instruct it to only make changes if there are no syntax errors introduced, and we do this by setting the --atomic flag. + +```bash +isort --atomic +``` + +### Example with isort + +isort is a pretty simple utility that only acts on the imports in a Python application. This is an example from the isort documentation, but it illustrates how isort works perfectly! + +Again, lets make sure you are in the correct directory path, if you are in the repository root then: + +```bash +cd code_quality/examples +``` + +Firstly lets have a look at the contents of the file we are about to sort out: + +```bash +cat 3_random_import.py +``` + +You will see that there is not much going on in this program, and if you tried to run it then it probably wouldn't actually run anything, it is just for the purpose of this sort of example. As you will see this file is all over the place, imports that import functions from the same package are on different lines and there is no sorting or aggregation of similar imports. This is bad practice because if you were maintaining this package it would take you a lot longer to identify imports to change if your code changes if they are not organised properly. This gets a lot, lot worse for bigger projects. + +So lets see what isort will actually change: + +```bash +isort --diff 3_random_import.py +``` + +As you can see, again like with Black, the diff will show which lines are removed (minus) and which ones are added (plus). You can see when comparing the minus lines with the plus lines that they are both better organised and shorter in general than the original file. + +To have isort run and change the file: + +```bash +isort 3_random_import.py +``` + +Then if you have a look in the file again with cat: + +```bash +cat 3_random_import.py +``` + +You will see it is much, much more organised and easier to read. + +Activity: Can you find a random program on the internet or even one you have written yourself and apply isort to it? + + diff --git a/code_quality/5_precommit.md b/code_quality/5_precommit.md new file mode 100644 index 0000000..0500c7e --- /dev/null +++ b/code_quality/5_precommit.md @@ -0,0 +1,194 @@ +# Fully Automating all of this + +We have seen now how to install various tools, a linter and two formatters to help us significantly up our game when it comes to code quality. The next step is how to hook this into our version control tooling so that it is all automated. + + +### Introducing pre-commit + +You can find a much more exhaustive look at isort in the [Documentation](https://pre-commit.com/index.html) + +Git has an interface that allows developers to extend the functionality of the application to include features beyond just version controlling code. This interface is called "hooks", git hooks are basically just a set of scripts that run at various "hooked" stages within the git workflow. For example, you can hook the commit step of the version control workflow to run tools whenever you are about to commit a new code change to your repository. + +pre-commit is a python scripted interface to simplify this process, with pre-commit we can very easily setup git hooks that run linters or code formatters when running commit related version control steps. With this it means that we can make sure that code that is version controlled, is actually of a high quality at the point we wish to record its history. + +Now isn't that convenient!! + +The pre-commit project has also rather nicely compiled a list of quite a lot of other [projects](https://pre-commit.com/hooks.html) that offer hooks for different applications + + +### Installing pre-commit + +Again as you have seen with the other tools, pre-commit is available via the Python pip installer: + +```bash +pip install pre-commit +``` + +Check your installation worked ok: + +```bash +pre-commit --version +``` + +To use pre-commit we are going to need to configure what it does when it is hooked by git commit commands, and to do this we will use a configuration file. + + +### Preparation of new repository + +For this task to really work, we need a new git repository to configure. This is because if we set this up on the repository provided, then it will start to overwrite files you are going to use in the workshop!! + +So lets start by initialising a new git repository. Firstly change to a directory that is safe to work in, lets say your home directory: + +```bash +cd ~ +``` + +Now lets make a memorable directory name: + +```bash +mkdir precommit-tutorial +``` + +Lets change out terminal into this directory: + +```bash +cd precommit-tutorial +``` + +and now lets initialise it as a git repository: + +```bash +git init +``` + +Now this is done you will have a completely empty git repo to work inside. + + +### Configuring pre-commit + +To setup pre-commit we need to create a yaml configuration file. In this file we will configure each of the applications that we wish to run. In this case we are setting up isort, Black and Pylint to run checks. + +A typical example configuration file would look like the following: + +Create a file called ".pre-commit-config.yaml" and place the following code inside: + +```yaml +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: end-of-file-fixer + - id: mixed-line-ending + - id: trailing-whitespace + +- repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + +- repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + +- repo: local + hooks: + - id: pylint + language: system + types: [file, python] + name: pylint + description: "This hook runs the pylint static code analyzer" + exclude: &exclude_files > + (?x)^( + docs/.*| + )$ + entry: pylint +``` + +As you can see in this config file, we are initialising Black, isort and Pylint. + +Before we add any code or set things up further you can commit this change to your repository. It is a good idea to version control utilities like this incase something breaks later. + +```bash +git add .pre-commit-config.yaml +``` + +and then commit the change: + +```bash +git commit -m "adding pre-commit configuration to this repository" +``` + +### Adding/Running the hooks + +Once you have configured what tools you wish to run in the yaml configuration file, you simply add them to the git hooks by running: + +```bash +pre-commit install +``` + +This will then place the relevant scripts in the git hooks interface and will run them each time you run the corresponding git commands that you have hooked. You now have a completely clean project that is version controlled and setup with Python code quality tools from the outset! To see what this does, we need to bring in some code. + +Lets grab some code from somewhere to see what this does: + +```bash +wget https://gist.githubusercontent.com/jimboid/291c92703a61f5014fdcbe744690e2f2/raw/b313157178fed899125a9487b09e73350d4ccf1b/badprog.py +``` + +Ok so once downloaded lets firstly add it to our github repository: + +```bash +git add badprog.py +``` + +Lets see what that looks like with git status, as you can see we have added a file to be tracked by git and there should be no untracked changes at this point. + +```bash +git status +``` + +In a normal project you would not usually do this step, but lets have a look at the Pylint rating: + +```bash +pylint badprog.py +``` + +As you can see it is pretty bad. Now earlier we set up our pre-commit hooks to run the code quality tools on commit, so lets commit the code and see what happens. + +```bash +git commit -m "adding some bad code for the tutorial" +``` + +You will see some things passed but others failed, and this will have blocked the changes being committed. Now running git status again will show there are untracked changes, because we have edited the files with isort and Black. + +```bash +git status +``` + +If you wish, you can see what the diff looks like to see what was changed: + +```bash +git diff badprog.py +``` + +As you can see lines have been changed, hopefully this is for the better. Before trying to commit the fixes again, we can run these tools without issuing the git command to check they pass. + +```bash +pre-commit run --all +``` + +You can now see all of the checks pass, so we have automatically corrected the code. We can go ahead and add them and commit them. + +```bash +git add badprog.py + +``` + +then: + +```bash +git commit -m "adding some better code for the tutorial" +``` + +So hopefully, now you can understand the power that these tools can bring to empower developers to write better code without having to spend lots of time learning standards first!! + diff --git a/code_quality/6_final_remarks.md b/code_quality/6_final_remarks.md new file mode 100644 index 0000000..c3719c0 --- /dev/null +++ b/code_quality/6_final_remarks.md @@ -0,0 +1,10 @@ +# Final Remarks + +All of the tools you have learned about today are used around the world by software development experts to simplify the mundane tasks associated with writing very high quality code. + +All of the tools in this workshop of course have much more advanced features than we have explored today, and you should be encouraged to step into the documentation for those tools to have a look. + +We have not covered the use of these tools with github which is an online software repository platform that is designed to share your software in a distributed fashion. All of the tools in this workshop are useable with github via github actions, and once you are on github you should not only be running these tools locally on your computer like you have today, but also using them to gate-keep the quality of code accepted onto your github repository via pull-requests. There is plenty of information on the github knowledge-base on how this is done. + +Finally, you are encouraged to pick up and make use of these tools in your everyday projects involving code, not only do they improve your code for you, but you will also learn to write code that is compliant with these systems over time and as a result, vastly increase your skill. Perhaps as an final incentive, most modern careers at the scientific, data science and scientific software engineering will require some base skills in these technologies. + diff --git a/code_quality/examples/1_random_code.py b/code_quality/examples/1_random_code.py new file mode 100644 index 0000000..29b0071 --- /dev/null +++ b/code_quality/examples/1_random_code.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +num = 407 + +if num == 0 or num == 1: + print(num, "is not a prime number") +elif num > 1: + # check for factors + for i in range(2,num): + if (num % i) == 0: + print(num,"is not a prime number") + print(i,"times",num//i,"is",num) + break + else: + print(num,"is a prime number") + +# if input number is less than +# or equal to 1, it is not prime +else: + print(num,"is not a prime number") diff --git a/code_quality/examples/2_random_code.py b/code_quality/examples/2_random_code.py new file mode 100644 index 0000000..29b0071 --- /dev/null +++ b/code_quality/examples/2_random_code.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +num = 407 + +if num == 0 or num == 1: + print(num, "is not a prime number") +elif num > 1: + # check for factors + for i in range(2,num): + if (num % i) == 0: + print(num,"is not a prime number") + print(i,"times",num//i,"is",num) + break + else: + print(num,"is a prime number") + +# if input number is less than +# or equal to 1, it is not prime +else: + print(num,"is not a prime number") diff --git a/code_quality/examples/3_random_import.py b/code_quality/examples/3_random_import.py new file mode 100644 index 0000000..3f26777 --- /dev/null +++ b/code_quality/examples/3_random_import.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +from my_lib import Object + +import os + +from my_lib import Object3 + +from my_lib import Object2 + +import sys + +from third_party import lib15, lib1, lib2, lib3, lib4, lib5, lib6, lib7, lib8, lib9, lib10, lib11, lib12, lib13, lib14 + +import sys + +from __future__ import absolute_import + +from third_party import lib3 + +print("Hey") +print("yo")