diff --git a/.github/workflows/black_n_pylint.yml b/.github/workflows/black_n_pylint.yml new file mode 100644 index 0000000..ccbbbde --- /dev/null +++ b/.github/workflows/black_n_pylint.yml @@ -0,0 +1,58 @@ +# This workflow runs pylint and updates badge + +name: linting + +on: + pull_request: + branches: + - "*" + +jobs: + black-n-pylint: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: "3.x" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black lyncs_setuptools[pylint] + + - name: Applying black formatting + run: | + black --diff . + black . + + - name: Pushing changes if any + uses: stefanzweifel/git-auto-commit-action@v4 + with: + commit_message: Applying black formatting (from Github Action) + commit_user_name: sbacchio + commit_user_email: s.bacchio@gmail.com + commit_author: Simone Bacchio + + - name: Pylint output + run: | + badge=$(lyncs_pylint_badge --disable=import-error . | sed "s/\&/\\\&/g") + badge_line=$(awk '/!\[pylint\]/ {print FNR}' README.md) + sed -i "${badge_line}s#.*#${badge}#" README.md + + - name: Pushing changes if any + uses: stefanzweifel/git-auto-commit-action@v4 + with: + commit_message: Updating pylint score (from Github Action) + commit_user_name: sbacchio + commit_user_email: s.bacchio@gmail.com + commit_author: Simone Bacchio + + - name: Run lyncs_setuptools + run: | + lyncs_setuptools diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index f4113f7..75ed379 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -1,81 +1,130 @@ -# This workflow updates the packages on PyPI +# This workflow builds and tests PRs -name: build & test +name: PR build & test on: + pull_request: + branches: + - "main" + - "master" push: branches: - - 'master' - paths-ignore: - - 'docs/**' + - "main" + - "master" jobs: - build-n-publish: - runs-on: ubuntu-latest - + build-n-publish: + runs-on: ${{ matrix.os }} + strategy: + matrix: + py-version: + - 3.x + os: + - ubuntu-latest steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - - - name: Install from source - run: | - pip install -e .[all] - - - name: Run tests - run: | - pytest -v --cov-report=xml - export CODECOV_TOKEN="${{ secrets.CODECOV_TOKEN }}" - bash <(curl -s https://codecov.io/bash) -f ./coverage.xml -n tuneit - - - name: Upload if not up to date - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.pypi_password }} - run: | - pip uninstall -y tuneit - pip install tuneit==$(lyncs_setuptools version) || ( + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.py-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.py-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install lyncs_setuptools + + - name: Check github ref + id: check-ref + env: + TEST_PASSWORD: ${{ secrets.test_pypi_password }} + PYPI_PASSWORD: ${{ secrets.pypi_password }} + run: | + if [[ $GITHUB_REF == 'refs/heads/main' || $GITHUB_REF == 'refs/heads/master' ]]; then + echo '::set-output name=main::true' + echo '::set-output name=extra::' + echo '::set-output name=url::' + echo "::set-output name=token::$PYPI_PASSWORD" + else + echo '::set-output name=main::false' + echo '::set-output name=extra::--extra-index-url https://test.pypi.org/simple/' + echo '::set-output name=url::--repository-url https://test.pypi.org/legacy/' + echo "::set-output name=token::$TEST_PASSWORD" + fi + + - name: Trying to install from pip + id: check-version + continue-on-error: true + env: + EXTRA: ${{ steps.check-ref.outputs.extra }} + run: | + pip install $EXTRA $(lyncs_setuptools name)[all]==$(lyncs_setuptools version) + + - name: Install from source + if: ${{ steps.check-version.outcome == 'failure' }} + run: | + pip install -e .[all] + + - name: Run tests + run: | + pytest -v + + - name: Upload if not up to date + if: ${{ steps.check-version.outcome == 'failure' }} + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ steps.check-ref.outputs.token }} + URL: ${{ steps.check-ref.outputs.url }} + run: | pip install twine python setup.py sdist - twine upload dist/* - count=0 - while ! pip install tuneit==$(lyncs_setuptools version) && [ $count -lt 20 ]; do - sleep 1 - count=$((count+1)) - done - ) + twine upload $URL dist/* clean-run: - needs: build-n-publish - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.6, 3.7, 3.8] + py-version: + - 3.6 + - 3.7 + - 3.8 + os: + - ubuntu-latest steps: - - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - - - name: Install via pip - run: | - pip install tuneit[all] - - - name: Run tests - run: | - pytest -v --import-mode=importlib + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.py-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.py-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install lyncs_setuptools + + - name: Check github ref + id: check-ref + run: | + if [[ $GITHUB_REF == 'refs/heads/main' || $GITHUB_REF == 'refs/heads/master' ]]; then + echo '::set-output name=extra::' + else + echo '::set-output name=extra::--extra-index-url https://test.pypi.org/simple/' + fi + + - name: Install via pip + env: + EXTRA: ${{ steps.check-ref.outputs.extra }} + run: | + # Keeps trying installing until succeeds (needs to wait for PyPi to update the index) + for i in $(seq 5); do + [ $i -gt 1 ] && sleep 20 + pip install $EXTRA $(lyncs_setuptools name)[all]==$(lyncs_setuptools version) && s=0 && break || s=$? + done + (exit $s) + + - name: Run tests + run: | + pytest -v --import-mode=importlib diff --git a/.github/workflows/ci_cd_test.yml b/.github/workflows/ci_cd_test.yml deleted file mode 100644 index 0826e37..0000000 --- a/.github/workflows/ci_cd_test.yml +++ /dev/null @@ -1,114 +0,0 @@ -# This workflow builds and tests PRs - -name: PR build & test - -on: - pull_request: - branches: - - 'master' - paths-ignore: - - 'docs/**' - -jobs: - build-n-publish: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - with: - ref: ${{ github.head_ref }} - - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - - - name: Applying black formatting - run: | - pip install black - black --diff . - black . - - - name: Pushing changes if any - uses: stefanzweifel/git-auto-commit-action@v4 - with: - commit_message: Applying black formatting (from Github Action) - commit_user_name: sbacchio - commit_user_email: s.bacchio@gmail.com - commit_author: Simone Bacchio - - - name: Install from source - run: | - pip install -e .[all] - - - name: Pylint output - run: | - pip install lyncs_setuptools[pylint] - badge=$(lyncs_pylint_badge --fail-under 8 . | sed "s/\&/\\\&/g") - badge_line=$(awk '/!\[pylint\]/ {print FNR}' README.md) - sed -i "${badge_line}s#.*#${badge}#" README.md - - - name: Pushing changes if any - uses: stefanzweifel/git-auto-commit-action@v4 - with: - commit_message: Updating pylint score (from Github Action) - commit_user_name: sbacchio - commit_user_email: s.bacchio@gmail.com - commit_author: Simone Bacchio - - - name: Run tests - run: | - pytest -v - - - name: Run lyncs_setuptools - run: | - lyncs_setuptools - - - name: Upload if not up to date - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.test_pypi_password }} - run: | - pip uninstall -y tuneit - pip install --extra-index-url https://test.pypi.org/simple/ tuneit==$(lyncs_setuptools version) || ( - pip install twine - python setup.py sdist - twine upload --repository-url https://test.pypi.org/legacy/ dist/* - count=0 - while ! pip install --extra-index-url https://test.pypi.org/simple/ tuneit==$(lyncs_setuptools version) && [ $count -lt 20 ]; do - sleep 1 - count=$((count+1)) - done - ) - - clean-run: - - needs: build-n-publish - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.6, 3.7, 3.8] - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - - - name: Install via pip - run: | - pip install --extra-index-url https://test.pypi.org/simple/ tuneit[all] - - - name: Run tests - run: | - pytest -v --import-mode=importlib diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..9dfb7a4 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,43 @@ +# This workflow builds and runs test + +name: tests + +on: + pull_request: + branches: + - "*" + +jobs: + tests: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + py-version: + - 3.6 + - 3.7 + - 3.8 + os: + - ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + + - name: Install via pip + run: | + sudo pip install -e .[all] + + - name: Run tests + run: | + pytest -v diff --git a/README.md b/README.md index aefd615..7a7232e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![license](https://img.shields.io/github/license/Lyncs-API/tuneit?logo=github&logoColor=white)](https://github.com/Lyncs-API/tuneit/blob/master/LICENSE) [![build & test](https://img.shields.io/github/workflow/status/Lyncs-API/tuneit/build%20&%20test?logo=github&logoColor=white)](https://github.com/Lyncs-API/tuneit/actions) [![codecov](https://img.shields.io/codecov/c/github/Lyncs-API/tuneit?logo=codecov&logoColor=white)](https://codecov.io/gh/Lyncs-API/tuneit) -[![pylint](https://img.shields.io/badge/pylint%20score-8.4%2F10-yellowgreen?logo=python&logoColor=white)](http://pylint.pycqa.org/) +[![pylint](https://img.shields.io/badge/pylint%20score-9.0%2F10-green?logo=python&logoColor=white)](http://pylint.pycqa.org/) [![black](https://img.shields.io/badge/code%20style-black-000000.svg?logo=codefactor&logoColor=white)](https://github.com/ambv/black) Tuneit is a generic purpose tool for optimizing and crosschecking calculations. @@ -19,3 +19,89 @@ The package can be installed via `pip`: ``` pip install [--user] tuneit ``` + +## Documentation + +The Tuneit documentation can be accessed using the link: +[tuneit documentation](https://tuneit.readthedocs.io/en/latest/) + +The Tuneit package works with computational graphs, which have two main phases: + +- A construction phase, where the graph is being built. Every operation that needs to be performed will be added to the graph as node along with all the variables and data input and output. Each type of node is visualised differently in the graph: + + * **Variables:** they are represented using diamonds. The outline is red in case the variable does not have a value yet and green in case the variable has been assigned a fixed value. + * **Operations:** they are represented using oval shapes. + * **Data:** All data objects are represented using rectangles. Most of them represent data inputs, except for the last node in the graph, which represents the data output. + +- A finalization phase. After the graph is finalized, a number of operations can be performed on it: + + * **Visualize:** Using the `visualize()` function the graph can be visualized as it is shown above. + * **Compute:** By simply calling the finalized object of the graph, the value final of the graph is computed and returned. + * **Crosshcheck:** The `crosscheck()` function will iterate through all the different options for a variable and return `True` + only for the ones that return the correct result of the graph. + * **Benchmark:** By using the `benchmark()` function, the computation times of all the different combinations of options for the + variables can be compared. In addition, by using the attribute `record` of the function, all those times can be recorded in a + dataframe. Furthermore, the `record` option allows for comparisons between not only the execution times that result by the various + alternatives for the variables, but also different inputs. + * **Optimize:** By using the `optimize()` function, the variables of the graph can be tuned. Each time it is called, it returns the values that were used for the variables in that trial and the resulting computation time along with the best trial executed so far. + A trial consists of the timing of an execution of the graph using a different combination of values for the variables that are tuned. + + +## Example + +This section contains a small example that shows the construction of a graph for the multiplication of a matrix and a vector. + +More details about this example can be found in the [example page](https://tuneit.readthedocs.io/en/latest/example.html) of the tuneit documentation. + +```````````` +@alternatives( + coo = lambda mat: sp.coo_matrix(mat), + csc = lambda mat: sp.csc_matrix(mat), + csr = lambda mat: sp.csr_matrix(mat), + bsr = lambda mat: sp.bsr_matrix(mat) +) +def matrix(mat): + return s.matrix(mat.todense()) + +mat=data(info=["shape","dtype"]) +vec=data(info=["shape","dtype"]) +mat=matrix(mat) +mul=mat*vec +mul=finalize(mul) + +mul.visualize() +``````````````` +The result of the `visualize()` function: +
+![visualised graph](docs/images/visualised_graph1.png) + +``````````````` +matrix_value = sp.random(100,100,0.1) +vector_value = np.random.rand(100,1) + +# for the result of the graph: +out = mul(mat=matrix_value, vec=vector_value) + +# for comparing the options of the variable which_matrix: +mul.benchmark(mat=matrix_value,vec=vector_value) +``````````````` +The result of the `benchmark()` function: + +| which_matrix | Time | +| :--------------: | :------------: | +| coo | 475.300 usec | +| csc | 1.076 msec | +| csr | 1.478 msec | +| bsr | 845.800 usec | +| matrix | 803.200 usec | + +
+ +## Acknowledgments + +### Authors +- Simone Bacchio (sbacchio) +- Raphaella Demetriou (raphdem) + +### Fundings +- PRACE-6IP, Grant agreement ID: 823767, Project name: LyNcs. \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 6e2f2f4..f2c7dba 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,6 +47,9 @@ # a list of builtin themes. # html_theme = "alabaster" +html_sidebars = { + "**": ["globaltoc.html", "relations.html", "sourcelink.html", "searchbox.html"] +} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/example.rst b/docs/example.rst new file mode 100644 index 0000000..4a78dec --- /dev/null +++ b/docs/example.rst @@ -0,0 +1,256 @@ +Example: A small tuning example using sparse matrices +===================================================== + +:code:`tuneit` is imported as shown below along with some other packages that are used in this example: + +.. code-block:: python + + from tuneit import * + import scipy.sparse as sp + import scipy as s + import numpy as np + +Firstly, a simple graph is constructed, which computes the multiplication of a sparse matrix with a vector. +The graph contains one variable to be tuned, which represents the different formats that can be used for the matrix. +The following function creates a matrix and by using :code:`alternatives` more options are added for the creation of a matrix and its format (available in the :code:`scipy.sparse` package). + +.. code-block:: python + + @alternatives( + coo = lambda mat: sp.coo_matrix(mat), + csc = lambda mat: sp.csc_matrix(mat), + csr = lambda mat: sp.csr_matrix(mat), + bsr = lambda mat: sp.bsr_matrix(mat) + ) + def matrix(mat): + return s.matrix(mat.todense()) + +In this way, we have created a function :code:`matrix` that expresses the given sparse matrix in an appropriate format and a variable +to be tuned. The range of the variable :code:`which_matrix` contains all different options that can be used to express the matrix, which are included in the function above (:code:`matrix,coo,csc,csr,bsr`). + +The graph takes as input the matrix and the vector to be multiplied. One option is to create a matrix and a vector at random: + +.. code-block:: python + + mat=scipy.sparse.random(100,100,0.1) + vec=np.random.rand(100,1) + +Or just create two generic data objects, which will take their actual value later on: (this is the option used in this example) + +.. code-block:: python + + mat=data(info=["shape","dtype"]) + vec=data(info=["shape","dtype"]) + +:code:`data()` creates a generic data object as no specific value is passed in the function. Even though no value is passed, some information +about the new data object can be given using :code:`info`. As shown above, some characteristics about the new objects are given by the +attributes :code:`shape` and :code:`dtype`. + +In addition, the :code:`matrix` function constructed previously can now be used. The new :code:`mat` object created after +:code:`matrix` is called on the object :code:`mat` (created above) is a tunable object. + +.. code-block:: python + + mat=matrix(mat) + +Furthermore, we define a random sparse matrix and a random vector that will be used later on when actual values are needed to be passed for the :code:`mat,vec` objects created above. + +.. code-block:: python + + matrix_value = sp.random(100,100,0.1) + vector_value = np.random.rand(100,1) + +The final graph :code:`mul` that expresses the multiplication between the vector :code:`vec` and the sparse matrix :code:`mat` is created +as shown below: + +.. code-block:: python + + mul=mat*vec + +Once the graph is completed we can finalize it with the function :code:`finalize`. + +.. code-block:: python + + mul = finalize(mul) + +This closes the graph and provides us a high-level interface for processing the graph (e.g. we can simply compute it by calling it). + +.. code-block:: python + + out = mul(mat=matrix_value, vec=vector_value) + + +Visualize +--------- + +The graph can now be visualized using: + +.. code-block:: python + + mul.visualize() + +The result is shown below: + +.. image:: images/visualised_graph1.png + +The data objects are shown in rectangles, the functions to be computed are presented in oval shapes, while the variables that have not taken a fixed value yet are shown in red diamonds. + +Note: Each node in the graph is represented by its name (such as :code:`matrix`) concatenated with a random sequence of characters, which +is not shown in its visualisation (for instance :code:`matrix-2b53519cefa68a68788760b169fee0b4`). +The small indices included in the nodes of the visualised graph allow the user to distinguish between multiple operations of the same kind +(e.g. multiplications) and to find out the whole unique name of a node in case it is needed in an operation: + +For instance the following code should return the whole name of the node that contains the index 2 in the visualization of the graph :code:`mul`: + +.. code-block:: python + + mul.graph[2] + + +Crosscheck +---------- + +The function :code:`crosscheck` can be called on the finalised object :code:`mul` as shown below. + +.. code-block:: python + + mul.crosscheck(mat=matrix_value,vec=vector_value) + +If it is called using real values (since the input :code:`mat,vec` of the graph was created using generic data objects) the sampler object +created will iterate through all the possible alternative options for the variable of the graph (:code:`which_matrix`) and return :code:`True` only for the ones +that produce the correct result of the graph. The :code:`crosscheck` function is basically a way to check that all alternatives options return the correct result. + +The result of the above operation is: + +.. table:: + + ============== ======== + which_matrix xcheck + ============== ======== + coo True + csc True + csr True + bsr True + matrix True + ============== ======== + + +Benchmark +--------- + +The function :code:`benchmark` can be called on the finalised object :code:`mul` as shown below. + +.. code-block:: python + + mul.benchmark(mat=matrix_value,vec=vector_value) + +If it is called using real values (since the input :code:`mat,vec` of the graph was created using generic data objects) the sampler object +will iterate through all the possible alternative options for the variable of the graph (:code:`which_matrix`) and time the execution of graph using each +option. The :code:`benchmark` function is basically a way to compare the execution times of all alternatives options of the variable. + +The result of the above operation is: + +.. table:: + + ============== ============ + which_matrix Time + ============== ============ + coo 475.300 usec + csc 1.076 msec + csr 1.478 msec + bsr 845.800 usec + matrix 803.200 usec + ============== ============ + +The :code:`bechmark` function has also an argument called :code:`record`, which if it set to :code:`True` allows the execution times of the graph +using alternative options for the variable to be stored in a :code:`panda` dataframe. In addition, now there is the option of also comparing +the execution times that result not only by the various alternatives for the variable, but also different inputs. For example, in the code below +different sizes of inputs are passed in each execution of the sampler object :code:`obj`. As a result, the returned dataframe :code:`trials` (can be accessed using the sampler object) will contain the execution +time of the graph for all combinations of alternative options of the variable and different sizes of inputs. + +.. code-block:: python + + sampler = mul.benchmark(record=True) + for n in [2**exponent for exponent in range(15)]: + sampler().run(mat=sp.random(n,n,0.1),vec=np.random.rand(n,1)) + +The dataframe can be accessed as shown below: + +.. code-block:: python + + sampler.trials + +The produced dataframe looks like this: + +.. table:: + + ========== ============== ============== =========== =========== =========== ========= + trial_id which_matrix mat_shape mat_dtype vec_shape vec_dtype time + ========== ============== ============== =========== =========== =========== ========= + 0 coo (1, 1) float64 (1, 1) float64 0.0020286 + 1 csc (1, 1) float64 (1, 1) float64 0.0042852 + 2 csr (1, 1) float64 (1, 1) float64 0.0021259 + 3 bsr (1, 1) float64 (1, 1) float64 0.0021831 + 4 matrix (1, 1) float64 (1, 1) float64 0.0005839 + ... ... ... ... ... ... ... + 70 coo (16384, 16384) float64 (16384, 1) float64 0.333415 + 71 csc (16384, 16384) float64 (16384, 1) float64 6.21665 + 72 csr (16384, 16384) float64 (16384, 1) float64 6.42704 + 73 bsr (16384, 16384) float64 (16384, 1) float64 7.46502 + 74 matrix (16384, 16384) float64 (16384, 1) float64 6.29298 + ========== ============== ============== =========== =========== =========== ========= + +The dataframe can be then used to compare different sizes of inputs for the different alternatives for the variable. One way to do this visually +is producing a graph like it is shown below: + +.. image:: images/plot.png + + +Optimize: +--------- + +For the purposes of this example, we would like to tune the variable :code:`which_matrix` based only on the computation time of the multiplication +(i.e. excluding the time taken by the function :code:`matrix` to construct the matrix). In order to achieve this, a link has to be added +between the multiplication and :code:`which_matrix`, as they are not currently directly connected (:code:`which_matrix` is added as a dependency to the last node +of the graph): + +.. code-block:: python + + mul.add_deps('which_matrix') + +The new link can be observed by running the code: + +.. code-block:: python + + mul.visualize() + +.. image:: images/visualised_graph2.png + +In addition, the :code:`matrix` node in the graph needs to be marked as one to be precomputed so that its computation time is not +taken into account when the execution of the graph is timed during the tuning of the variable. +Note: In the following operation we can use the name :code:`matrix` for the node only because it is unique in the graph. If there were +multiple operations of the same kind (e.g. the function :code:`matrix` is used twice in the graph), then the full name of the node would +have to be used. + +.. code-block:: python + + mul['matrix'].precompute=True + +The only thing left to do is to actually tune the variable by calling the following functions: + +.. code-block:: python + + mul.optimize(sampler='optuna')(mat=matrix_value,vec=vector_value) + +A tuner object has been created by calling the :code:`optimize()` function on the graph to be tuned and passing it the sampler to be used. +The :code:`optuna` package is one of the options that are offered by :code:`tuneit` to be used as a sampler. + +The tuner object is called, while also passing actual values for the sparse matrix and the vector. This is necessary, because +during the tuning of the variable the computation of the graph will be carried out for the first time. Each time the tuner object is +called, the tuner executes one more trial and it returns the value that was used for the variable in that trial and the resulting computation +time along with the best trial executed so far. +Note: A trial is a single execution of the objective function (which in this case is the timing of an execution) using a different combination +of values for the variables that are tuned. + +.. + do I need to include a picture of the result here? (what the tuner returns after it is called a few times) diff --git a/docs/images/computational_graph1.png b/docs/images/computational_graph1.png new file mode 100644 index 0000000..d92ffb2 Binary files /dev/null and b/docs/images/computational_graph1.png differ diff --git a/docs/images/computational_graph2.png b/docs/images/computational_graph2.png new file mode 100644 index 0000000..dd00112 Binary files /dev/null and b/docs/images/computational_graph2.png differ diff --git a/docs/images/plot.png b/docs/images/plot.png new file mode 100644 index 0000000..81c8de6 Binary files /dev/null and b/docs/images/plot.png differ diff --git a/docs/images/visualised_graph1.png b/docs/images/visualised_graph1.png new file mode 100644 index 0000000..2c127c0 Binary files /dev/null and b/docs/images/visualised_graph1.png differ diff --git a/docs/images/visualised_graph2.png b/docs/images/visualised_graph2.png new file mode 100644 index 0000000..3db2d90 Binary files /dev/null and b/docs/images/visualised_graph2.png differ diff --git a/docs/index.rst b/docs/index.rst index 1689cd5..bbee7c4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,12 +1,64 @@ .. Tunable documentation master file. -Welcome to Tuneit's documentation! -=================================== +Tuneit: optimize, benchmark and crosscheck +========================================== + + +Basic Concepts +-------------- +The Tuneit package works with computational graphs, which have two main phases: + +- A construction phase, where the graph is being built. Every operation that needs to be performed will be added to the graph as node along with all the variables and data input and output. Each type of node is visualised differently in the graph as it is shown below: + |pic1| + + * **Variables:** they are represented using diamonds. The outline is red in case the variable does not have a value yet and green in case the variable has been assigned a fixed value. + * **Operations:** they are represented using oval shapes. + * **Data:** All data objects are represented using rectangles. Most of them represent data inputs, except for the last node in the graph, which represents the data output. + .. code-block:: python + + from tuneit import * + + .. code-block:: python + + a = data() + x = variable(range(10)) + y = variable(range(5)) + axpy = a * x + y + visualize(axpy) + + |pic2| + +.. |pic1| image:: images/computational_graph1.png + :width: 500 + +.. |pic2| image:: images/computational_graph2.png + +- A finalization phase. After the graph is finalized, a number of operations (described in the next section) can be performed on it. + + +Key Features and Functions +-------------------------- +Once a computational graph has been built and finalized, it can be used in a number of operations. + +- **Visualize:** Using the :code:`visualize()` function the graph can be visualized as it is shown above. +- **Compute:** By simply calling the finalized object of the graph, the value final of the graph is computed and returned. +- **Crosshcheck:** The :code:`crosscheck()` function will iterate through all the different options for a variable and return :code:`True` + only for the ones that return the correct result of the graph. +- **Benchmark:** By using the :code:`benchmark()` function, the computation times of all the different combinations of options for the + variables can be compared. In addition, by using the attribute :code:`record` of the function, all those times can be recorded in a + dataframe. Furthermore, the :code:`record` option allows for comparisons between not only the execution times that result by the various + alternatives for the variables, but also different inputs. +- **Optimize:** By using the :code:`optimize()` function, the variables of the graph can be tuned. Each time it is called, it returns the + values that were used for the variables in that trial and the resulting computation time along with the best trial executed so far. + A trial consists of the timing of an execution of the graph using a different combination of values for the variables that are tuned. + .. toctree:: :maxdepth: 2 :caption: Contents: - + + installation + example Indices and tables diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000..3ac9548 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,11 @@ +Installation +============ + +.. + Tuneit supports Python 3.6 or newer. + +The package can be installed via `pip`: + +.. code-block:: bash + + $ pip install [--user] tuneit \ No newline at end of file diff --git a/setup.py b/setup.py index 346f98b..ccdce53 100644 --- a/setup.py +++ b/setup.py @@ -3,16 +3,19 @@ requirements = [ "dill", "dataclasses", - "python-varname", + "varname", "tabulate", "numpy", + "lyncs_utils", ] extras = { "graph": [ "graphviz", ], - "test": ["pytest", "pytest-cov"], + "test": ["pytest", "pytest-cov", "testfixtures"], + "optuna": ["optuna"], + "record": ["pandas"], } setup( diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/test_finalize.py b/test/test_finalize.py index b59b6f3..3e832b0 100644 --- a/test/test_finalize.py +++ b/test/test_finalize.py @@ -1,9 +1,16 @@ from tuneit.graph import visualize from tuneit.tunable import * from tuneit.variable import * +from tuneit.class_utils import * from tuneit.tunable import Tunable from tuneit.finalize import finalize from pytest import raises +from tuneit.variable import Variable +from tuneit.finalize import HighLevel +import testfixtures +import pickle +import scipy.sparse as sp +import scipy as s def test_finalize(): @@ -48,3 +55,93 @@ def test_finalize(): a = variable(range(10), uid=True) with raises(KeyError): finalize(a * b).fix("a") + + x = variable([1, 2, 3, 4]) + y = variable([15, 16, 17, 18]) + z = variable([10, 20, 30, 40]) + w = variable([0, 50, 100]) + a = x + y + z + w - z + a_final = finalize(a) + + b = w ** x - y + d = a + b + d_final = finalize(d) + nodes_a = list( + str(node) + for node in a_final.dependencies + if not isinstance(a_final[node], Variable) + ) + nodes_d = list( + str(node) + for node in d_final.dependencies + if not isinstance(d_final[node], Variable) + ) + + # one_output + assert a_final.one_output(nodes_a)[1] == True + assert d_final.one_output(nodes_d[1:])[1] == False + + # consecutive + assert a_final.consecutive(nodes_a) == True + assert d_final.consecutive(nodes_d) == True + assert d_final.consecutive(nodes_d[1:]) == False + + # mergeable + assert a_final.mergeable(nodes_a)[1] == True + assert d_final.mergeable(nodes_d[1:])[1] == False + + # merge + last_node = d_final.one_output(nodes_d[1:4])[0] + with raises(ValueError): + d_final.merge(list(str(dep) for dep in d_final.dependencies)) + with raises(ValueError): + d_final.merge(nodes_d[1:]) + merged_graph = d_final.merge(nodes_d[1:4]) + assert isinstance(merged_graph, HighLevel) + assert len(nodes_d) - 2 == len( + list( + str(node) + for node in merged_graph.dependencies + if not isinstance(merged_graph[node], Variable) + ) + ) # tests remove + nodes = list( + d_final.get_node(node) + for node in d_final.dependencies + if not isinstance(d_final[node], Variable) + )[1:4] + assert set( + [dep for node in nodes for dep in node.first_dependencies if dep not in nodes] + ) == set(list(merged_graph.get_node(last_node).first_dependencies)) + + +def test_pickle(): + # HighLevel + x = data() + y = data() + z = x * y + z = finalize(z) + a = pickle.dumps(z) + b = pickle.loads(a) + assert testfixtures.compare(z, b, strict=True) is None + + # HighLevel object with alternatives: alternatives is not pickle-able + + # @alternatives( + # coo = sp.coo_matrix, + # csc = sp.csc_matrix, + # csr = sp.csr_matrix, + # bsr = sp.bsr_matrix + # ) + # def matrix(mat): + # return s.matrix(mat.todense()) + + # mat=data(info=["shape","dtype"]) + # vec=data(info=["shape","dtype"]) + # mat=matrix(mat) + # mul=mat*vec + # mul = finalize(mul) + + # a = pickle.dumps(mul) + # b = pickle.loads(a) + # assert testfixtures.compare(mul,b, strict=True) is None diff --git a/test/test_graph.py b/test/test_graph.py index 0982c9d..ae74206 100644 --- a/test/test_graph.py +++ b/test/test_graph.py @@ -1,5 +1,7 @@ import pytest from tuneit.graph import Graph, Node, Key +import pickle +import testfixtures class String(str): @@ -65,3 +67,23 @@ def test_dict_methods(): b.update(a) assert a == b + + +def test_pickle(): + # Key + z = Key("z") + a = pickle.dumps(z) + b = pickle.loads(a) + assert testfixtures.compare(z, b, strict=True) is None + + # Node + z = Node("letter", "z") + a = pickle.dumps(z) + b = pickle.loads(a) + assert testfixtures.compare(z, b, strict=True) is None + + # Graph + z = Graph({"letter": "z", "number": 1}) + a = pickle.dumps(z) + b = pickle.loads(a) + assert testfixtures.compare(z, b, strict=True) is None diff --git a/test/test_optuna.py b/test/test_optuna.py new file mode 100644 index 0000000..26866b4 --- /dev/null +++ b/test/test_optuna.py @@ -0,0 +1,131 @@ +import pytest +import sys +from tuneit import * +import numpy +from hashlib import md5 +from dill import dumps +from tuneit.tools.time import Time, default_timer + +try: + from tuneit.tools.optuna import OptunaSampler + import optuna + from optuna.trial import Trial + from optuna.trial import create_trial + from optuna.distributions import CategoricalDistribution + from optuna.trial import TrialState + from tuneit.finalize import HighLevel + from optuna.study import Study + + skip_optuna = False +except ImportError: + skip_optuna = True + +skip_optuna = pytest.mark.skipif(skip_optuna, reason="optuna is not installed") + + +@skip_optuna +def test_optuna_sampler(): + # simple example to use in tests + # building a graph with variables for sorting (preprocessing) and searching to be tuned: + + @alternatives( + mergesort=lambda a: numpy.sort(a, kind="mergesort"), + heapsort=lambda a: numpy.sort(a, kind="heapsort"), + timsort=lambda a: numpy.array(sorted(a)), + ) + def preprocessing(array): + res = numpy.sort(array) + return res + + @alternatives( + indices=lambda a, b: [i for i, x in enumerate(a.tolist()) if x == b][0], + array_search=lambda a, b: numpy.where(a == b)[0][0], + binary_search=lambda a, b: numpy.searchsorted(a, b), + ) + def searching(array, element): + l = array.tolist() + index = l.index(element) + return index + + element = 65 + result = searching( + preprocessing(numpy.random.randint(1000, size=(10000))), element + ) # input size: 10 000, type: integers + fz = finalize(result) + + callback_function = lambda fnc: Time(default_timer(fnc)) + obj_A = OptunaSampler( + fz, callback=callback_function, storage="sqlite:///example.db" + ) + assert isinstance(obj_A, OptunaSampler) + assert isinstance(obj_A.tunable, HighLevel) + assert not bool( + obj_A.compute_kwargs + ) # at the moment no kwargs are used for the compute function, so compute_kwargs must be empty + assert callable(obj_A.callback) + assert obj_A.n_trials > 0 + + # test get_study function + study = obj_A.get_study() + assert isinstance(study, Study) + name = obj_A.tunable.get_info() + name["callback"] = obj_A.callback + assert study.study_name == md5(dumps(name)).hexdigest() + assert [*study.user_attrs.keys()] == ["callback"] + obj_B = OptunaSampler( + fz, callback=callback_function, storage="sqlite:///example.db", n_trials=10 + ) + assert obj_A.compute() == finalize(result).compute() # test compute function + assert obj_B.compute() == finalize(result).compute() + name_A = obj_A.get_study().study_name + name_B = obj_B.get_study().study_name + assert name_A == name_B + assert len(study.trials) >= 11 + + # test _call_wrapper function + assert obj_A._call_wrapper(obj_A.tunable) == finalize(result).compute() + + # test objective function + tid = study._storage.create_new_trial(study._study_id) + trial = Trial(study, trial_id=tid) + assert isinstance(obj_A.objective(trial), Time) + + # test get_next_trial + temp = obj_A.get_next_trial(trial) + assert len(temp.fixed_variables) == 2 + var_A = temp.get_variable("which_preprocessing") + var_B = temp.get_variable("which_searching") + assert var_A.fixed and var_B.fixed + + # test get_suggetions + selected_options = obj_A.get_suggestions(trial) + assert len(selected_options) == len(fz.variables) + assert [*selected_options.values()][0] in preprocessing.keys() + assert [*selected_options.values()][1] in searching.keys() + + # test get_var_args function + assert OptunaSampler.get_var_args("categorical", ["a", "b", "c"]) == ( + tuple(["a", "b", "c"]), + ) + args = OptunaSampler.get_var_args("discrete_uniform", range(100)) + assert len(args) == 3 + assert args[0] == min(range(100)) + assert args[1] == max(range(100)) + assert args[2] == 1 + + # test deduce_type function + assert OptunaSampler.deduce_type(range(100)) == "discrete_uniform" + assert OptunaSampler.deduce_type([1, 2, 17, 25]) == "categorical" + assert OptunaSampler.deduce_type(preprocessing.values()) == "categorical" + + # test best_params + best = obj_A.best_params() + assert len(best.keys()) == len(fz.variables) + assert [*best.values()][0] in preprocessing.keys() + assert [*best.values()][1] in searching.keys() + + optuna.delete_study(name_A, storage="sqlite:///example.db") + + +# If I want to delete the study and start a new one the next time I run the tests +# optuna.delete_study(name_B, storage="sqlite:///example.db") diff --git a/test/test_sampler.py b/test/test_sampler.py new file mode 100644 index 0000000..32fecef --- /dev/null +++ b/test/test_sampler.py @@ -0,0 +1,74 @@ +from tuneit import * +import numpy +from tuneit.tools.time import Time, default_timer +from tuneit.tools.base import Sampler +from tuneit.finalize import HighLevel +import itertools + + +def test_sampler(): + # simple example to use in tests + # building a graph with variables for sorting (preprocessing) and searching to be tuned: + + @alternatives( + mergesort=lambda a: numpy.sort(a, kind="mergesort"), + heapsort=lambda a: numpy.sort(a, kind="heapsort"), + timsort=lambda a: numpy.array(sorted(a)), + ) + def preprocessing(array): + res = numpy.sort(array) + return res + + @alternatives( + indices=lambda a, b: [i for i, x in enumerate(a.tolist()) if x == b][0], + array_search=lambda a, b: numpy.where(a == b)[0][0], + binary_search=lambda a, b: numpy.searchsorted(a, b), + ) + def searching(array, element): + l = array.tolist() + index = l.index(element) + return index + + element = 65 + result = searching(preprocessing(numpy.random.randint(1000, size=(10000))), element) + fz = finalize(result) + + callback_function = lambda fnc: Time(default_timer(fnc)) + + obj = sample( + result, + ["which_preprocessing", "which_searching"], + callback=callback_function, + callback_calls=True, + ) + assert isinstance(obj, Sampler) + assert callable(obj.callback) + assert obj.callback == callback_function + assert len(obj.variables) == 2 + assert obj.variables[0] in fz.variables + assert obj.variables[1] in fz.variables + assert not obj.compute_kwargs + assert isinstance(obj.tunable, HighLevel) + + assert obj.max_samples == 16 + assert obj.n_samples == 16 + assert obj.samples == tuple( + itertools.product([*preprocessing.keys()], [*searching.keys()]), + ) + values = obj.sample_values() + for a in [x[1] for x in values]: + assert isinstance(a, Time) + assert [x[0] for x in values] == list(obj.samples) + assert obj.value == fz.compute() + + obj_B = benchmark(fz.copy(reset=True)) + assert isinstance(obj_B, Sampler) + assert obj_B.samples == obj.samples + assert obj_B.value == obj.value + + obj_C = crosscheck(fz.copy(reset=True)) + assert isinstance(obj_C, Sampler) + values2 = obj_C.sample_values() + for a in [x[1] for x in values2]: + assert isinstance(a, bool) + assert obj_C.samples == obj.samples diff --git a/test/test_tunable.py b/test/test_tunable.py index 033d23d..f3f6269 100644 --- a/test/test_tunable.py +++ b/test/test_tunable.py @@ -1,8 +1,9 @@ -from pickle import dumps +import pickle from pytest import raises from tuneit.graph import visualize, Node from tuneit.tunable import * -from tuneit.tunable import Tunable +from tuneit.tunable import Tunable, Data +import testfixtures class unpickable: @@ -27,7 +28,6 @@ def test_object(): assert a.key != Object(unpickable()).key one = Object(1, deps=zero) - assert zero.key in one.dependencies def test_function(): @@ -43,8 +43,6 @@ def test_function(): one = Object(1) fnc = Function(str, args=zero, kwargs={"one": one}) - assert zero.key in fnc.dependencies - assert one.key in fnc.dependencies lst = Object([1, 2]).tunable() lst = lst.append(3) @@ -87,4 +85,33 @@ def test_tunable(): with raises(TypeError): bool(tunable(1)) - assert dumps(a) + assert pickle.dumps(a) + + +def test_pickle(): + # Object + z = Object(2, label="z") + a = pickle.dumps(z) + b = pickle.loads(a) + assert testfixtures.compare(z, b, strict=True) is None + + # Data + z = Data("z") + a = pickle.dumps(z) + b = pickle.loads(a) + assert testfixtures.compare(z, b, strict=True) is None + + # Function + z = Function(str) + a = pickle.dumps(z) + b = pickle.loads(a) + assert testfixtures.compare(z, b, strict=True) is None + + # Tunable + x = data() + y = data() + z = x * y + a = pickle.dumps(z) + b = pickle.loads(a) + assert type(z) == type(b) == Tunable + assert testfixtures.compare(Node(z), Node(b), strict=True) is None diff --git a/test/test_tuner.py b/test/test_tuner.py new file mode 100644 index 0000000..0169ba8 --- /dev/null +++ b/test/test_tuner.py @@ -0,0 +1,138 @@ +from tuneit import * +import numpy +from tuneit.tools.tuner import Tuner +from tuneit.tools.time import Time, default_timer +from tuneit.tools.optuna import OptunaSampler +from tuneit.tools.base import Sampler +import pytest +import scipy.sparse as sp +from tuneit.tunable import Tunable +from tuneit.finalize import HighLevel + + +def test_tuner(): + # simple example to use in tests + # building a graph with variables for sorting (preprocessing) and searching to be tuned: + + @alternatives( + mergesort=lambda a: numpy.sort(a, kind="mergesort"), + heapsort=lambda a: numpy.sort(a, kind="heapsort"), + timsort=lambda a: numpy.array(sorted(a)), + ) + def preprocessing(array): + res = numpy.sort(array) + return res + + @alternatives( + indices=lambda a, b: [i for i, x in enumerate(a.tolist()) if x == b][0], + array_search=lambda a, b: numpy.where(a == b)[0][0], + binary_search=lambda a, b: numpy.searchsorted(a, b), + ) + def searching(array, element): + l = array.tolist() + index = l.index(element) + return index + + element = 65 + result = searching( + preprocessing(numpy.random.randint(1000, size=(10000))), element + ) # input size: 10 000, type: integers + + # test optimise function + obj_A = optimise(result, sampler="optuna") + assert isinstance(obj_A, Tuner) + + # test tune function + obj_B = tune(result, callback=lambda fnc: Time(default_timer(fnc))) + assert isinstance(obj_B, Tuner) + + # test Tuner class + obj_C = Tuner( + result, sampler="optuna", callback=lambda fnc: Time(default_timer(fnc)) + ) + assert isinstance(obj_C, Tuner) + assert bool(obj_C.tuner_kwargs) + assert callable(obj_B.tuner_kwargs.get("callback", None)) + # test compute function in Tuner + assert obj_C.compute() == finalize(result).compute() + # test get_best_trial function in Tuner + res = obj_C.get_best_trial() + assert isinstance(res, dict) + assert "which_preprocessing" in {k.split("-")[0]: v for k, v in res.items()} + assert ( + next(v for k, v in res.items() if k.startswith("which_preprocessing")) + in preprocessing.keys() + ) + assert "which_searching" in {k.split("-")[0]: v for k, v in res.items()} + assert ( + next(v for k, v in res.items() if k.startswith("which_searching")) + in searching.keys() + ) + # test get_sampler function in Tuner + assert obj_C.get_sampler() == OptunaSampler + assert ( + Tuner( + result, sampler=None, callback=lambda fnc: Time(default_timer(fnc)) + ).get_sampler() + == Sampler + ) + with pytest.raises(ValueError): + Tuner( + result, sampler="hello", callback=lambda fnc: Time(default_timer(fnc)) + ).get_sampler() + # test get_sampler_kwargs function in Tuner + kwargs = obj_C.get_sampler_kwargs() + assert kwargs["storage"] == "sqlite:///example.db" + assert kwargs["callback"] == obj_C.tuner_kwargs.get("callback", None) + kwargs = Tuner( + result, sampler=None, callback=lambda fnc: Time(default_timer(fnc)) + ).get_sampler_kwargs() + assert not bool(kwargs) + + # Example from the README file: + + @alternatives( + coo=lambda matrix: sp.coo_matrix(matrix), + csc=lambda matrix: sp.csc_matrix(matrix), + csr=lambda matrix: sp.csr_matrix(matrix), + ) + def create_matrix(matrix): + res = sp.bsr_matrix(matrix) + return res + + create_matrix.var_name = "foo" + mat = data(info=["shape", "dtype"]) + vec = data(info=["shape", "dtype"]) + assert isinstance(mat, Tunable) + assert isinstance(vec, Tunable) + mat = create_matrix(mat) + assert isinstance(mat, Tunable) + mul = finalize(mat * vec) + assert isinstance(mul, HighLevel) + assert len(list(dep for dep in mul.dependencies if str(dep).startswith("foo"))) == 1 + assert ( + len( + list( + dep + for dep in mul.dependencies + if str(dep).startswith("which_create_matrix") + ) + ) + == 0 + ) + + mul.add_deps("foo") + assert ( + len(list(dep for dep in mul.first_dependencies if str(dep).startswith("foo"))) + == 1 + ) + mul["create_matrix"].precompute = True + assert mul["create_matrix"].precompute == True + obj = optimise(mul, sampler="optuna") + assert isinstance(obj, Tuner) + matrix = sp.random(100, 100, 0.1) + vector = numpy.random.rand(100) + res = obj(mat=matrix, vec=vector) + assert isinstance(res, numpy.ndarray) + assert res.shape == (100,) + numpy.testing.assert_array_almost_equal(res, matrix * vector, decimal=8) diff --git a/test/test_variable.py b/test/test_variable.py index 685722a..d597508 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -4,6 +4,8 @@ from tuneit.tunable import Tunable from tuneit.finalize import finalize from pytest import raises +import pickle +import testfixtures def test_variable(): @@ -45,9 +47,6 @@ def test_variable(): with raises(RuntimeError): d.value = 3 - d = Variable(range(10)) - d.value = Variable(range(10)) - d = Permutation((1, 2, 3)) d.value = (3, 2, 1) assert d.size == 6 @@ -74,3 +73,11 @@ def test_copy(): assert not var2.fixed assert var.fixed assert var.uid != var2.uid + + +def test_pickle(): + # Variable + z = Variable([1, 2]) + a = pickle.dumps(z) + b = pickle.loads(a) + assert testfixtures.compare(z, b, strict=True) is None diff --git a/tuneit/__init__.py b/tuneit/__init__.py index df85f5c..5202e47 100644 --- a/tuneit/__init__.py +++ b/tuneit/__init__.py @@ -1,6 +1,6 @@ -"A Python tool for optimizing and crosschecking calculations" +"A Python tool for optimizing, benchmarking and crosschecking calculations" -__version__ = "0.0.4" +__version__ = "0.1.0" from .graph import * from .tunable import * diff --git a/tuneit/class_utils.py b/tuneit/class_utils.py index d3abf46..43c9e0d 100644 --- a/tuneit/class_utils.py +++ b/tuneit/class_utils.py @@ -222,9 +222,15 @@ def args_to_kwargs(cls, *args): return kwargs - def __init__(self, *args, **kwargs): + def __init__(self, *args, name=None, var_name=None, **kwargs): super().__init__(**self.args_to_kwargs(*args), **kwargs) + self._name_given = False self.default = next(iter(self)) + if name: + self.name = name + self._name_given = True + self._var_name = var_name + self._closed = False @wraps(dict.update) def update(self, *args, **kwargs): @@ -240,8 +246,8 @@ def default(self, key): if key not in self: raise KeyError(f"{key} unknown alternative") self._default = key - wraps(self[key])(self) - self.__name__ = key + if not self._name_given: + self.name = key def add(self, fnc): "Adds a value to the alternatives" @@ -249,9 +255,28 @@ def add(self, fnc): self.update(kwargs) return next(iter(kwargs)) + @property + def var_name(self): + if self._var_name is None: + return "which_" + self.__name__ + return self._var_name + + @var_name.setter + def var_name(self, key): + self._var_name = key + + @property + def name(self): + return self.__name__ + + @name.setter + def name(self, key): + self.__name__ = key + def __call__(self, *args, _key=None, **kwargs): - if len(args) == 1 and callable(args[0]): + if len(args) == 1 and callable(args[0]) and not self._closed: self.default = self.add(args[0]) + self._closed = True return self if _key: @@ -260,6 +285,8 @@ def __call__(self, *args, _key=None, **kwargs): return function( self, *args, - _key=variable(self.keys(), default=self.default, label=self.__name__), + _key=variable( + tuple(self.keys()), default=self.default, label=self.var_name + ), **kwargs, ) diff --git a/tuneit/finalize.py b/tuneit/finalize.py index c4f794e..d12aaba 100644 --- a/tuneit/finalize.py +++ b/tuneit/finalize.py @@ -6,9 +6,9 @@ "finalize", ] -from .graph import Node, Key +from .graph import Node, Key, Graph, visualize from .variable import Variable -from .tunable import Object, Function, compute +from .tunable import Object, Function, compute, Data, data def finalize(tunable): @@ -21,6 +21,13 @@ def finalize(tunable): class HighLevel(Node): "HighLevel view of a Node" + @property + def datas(self): + "List of dependencies that are a variable" + return tuple( + str(dep) for dep in self.dependencies if isinstance(self[dep], Data) + ) + @property def variables(self): "List of dependencies that are a variable" @@ -28,6 +35,15 @@ def variables(self): str(dep) for dep in self.dependencies if isinstance(self[dep], Variable) ) + @property + def direct_variables(self): + "List of first dependencies that are a variable" + return tuple( + str(dep) + for dep in self.direct_dependencies + if isinstance(self[dep], Variable) + ) + @property def functions(self): "List of dependencies that are a functions" @@ -53,17 +69,74 @@ def depends_on(self, value): return self.depends_on(value.key) return False + def get_node(self, key): + "Returns a node of the graph as a finalized graph" + key = self.get_key(key) + return HighLevel(self.graph[key]) + def __getitem__(self, key): - if isinstance(key, Object): - key = key.key + + key = self.get_key(key) return self.graph[key].value def __setitem__(self, key, value): + key = self.get_key(key) self.graph[key] = value + def get_key(self, key, ntype=None): + + if isinstance(key, Object): + key = key.key + if isinstance(key, Key): + key = Key(key).key + if key in self.graph: + return key + if not isinstance(key, str): + raise TypeError("key should be string") + + keys = self.dependencies + if ntype: + keys = (k for k in keys if isinstance(self[k], ntype)) + + # Smart search + matches = list(filter(lambda k: str(k).startswith(key + "-"), keys)) + if len(matches) > 1: + raise KeyError("More than one key matched to %s: %s" % (key, matches)) + if len(matches) == 0: + raise KeyError("%s is not a key of %s" % (key, self)) + + return matches[0] + + def get_info(self, short=False): + all_info = {} + for data in self.datas: + info = self[data].get_info() + for key in list(info): + name = data[: data.rfind("-")] if short else data + all_info[f"{name}_{key}"] = info[key] + return all_info + def __copy__(self): return HighLevel(super().__copy__()) + def __call__(self, *args, compute_kwargs=None, **kwargs): + "kwargs are data input of the graph" + if args: + raise ValueError("args not supported, please pass them as kwargs") + + tmp = self.copy() + + for key, val in kwargs.items(): + try: + tmp.get_data(key).set(val) + continue + except KeyError: + pass + tmp.fix(key, val) + + compute_kwargs = compute_kwargs or {} + return tmp.compute(**compute_kwargs) + def copy(self, reset=False, reset_tunable=True): "Copy the content of the graph unrelating the tunable variables" res = self.__copy__() @@ -77,27 +150,34 @@ def copy(self, reset=False, reset_tunable=True): return res - def get_variable(self, variable): + def get_variable(self, key): + "Returns the variable corresponding to var" + key = self.get_key(key, ntype=Variable) + return self[key] + + def get_data(self, key): "Returns the varible corresponding to var" - if isinstance(variable, Variable): - variable = variable.key - if isinstance(variable, Key): - variable = Key(variable).key - - if not variable in self.variables: - # Smart search - matches = list( - filter(lambda var: var.startswith(variable + "-"), self.variables) - ) - if len(matches) > 1: - raise KeyError( - "More than one variable matched to %s: %s" % (variable, matches) - ) - if len(matches) == 0: - raise KeyError("%s is not a variable of %s" % (variable, self)) - variable = matches[0] + key = self.get_key(key, ntype=Data) + return self[key] + + def add_deps( + self, start_node, end_node=None + ): # start_node, end_node must be a strings + if not end_node: + end_node = self + else: + end_node = self.get_node(end_node) - return self[variable] + start_node = self[start_node] + + end_node.value.deps = end_node.value.deps + (Key(start_node.key),) + + def precompute(self, **kwargs): + for key in self.dependencies: + if self[key].precompute: + self[key] = Data( + self.get_node(key).compute(**kwargs), label=self[key].label + ) def fix(self, variable, value=None): "Fixes the value of the variable" @@ -107,3 +187,120 @@ def compute(self, **kwargs): "Computes the result of the Node" kwargs.setdefault("graph", self.graph) return compute(self.value, **kwargs) + + def remove(self, *nodes): + "Removes the list of nodes from the graph" + for node in nodes: + del self.graph[node] + + def merge(self, nodes=None): + "Returns a new graph with the list of nodes merged into a single node" + # if not nodes: + # mergers = detect_mergers + # for nodes in mergers... all the rest + for node in nodes: + if not isinstance(self[node], Function): + raise ValueError("The node does not represent a function") + last_node, merge = self.mergeable(nodes) + if not merge: + raise ValueError("Group of nodes not mergeable") + deps = tuple( + set( + [ + str(dep) + for node in nodes + for dep in self.get_node(node).first_dependencies + if dep not in nodes + ] + ) + ) + sub = {node: self[node] for node in nodes} + sub.update( + {dep: Data(None, label=f"dep{i}", info=dep) for (i, dep) in enumerate(deps)} + ) + sub = finalize(Graph(sub)[last_node]) + new_node = Function( + sub, + label="merged", + kwargs={f"dep{i}": Key(dep) for (i, dep) in enumerate(deps)}, + ) + new_graph = self.copy(reset=True) + nodes.remove(last_node) + new_graph.remove(*nodes) + new_graph[last_node] = new_node + return new_graph + + def consecutive(self, nodes): + "implements a DFS to check if the undirected graph is connected (if all nodes are consecutive)" + stack = [nodes[0]] + unvisited = nodes[1:] + while stack: + u = stack[-1] + appended = False + for w in [str(dep) for dep in self.get_node(u).first_dependencies]: + if w in unvisited: + unvisited.remove(w) + stack.append(w) + appended = True + break + if not appended: + for w in [ + str(node) + for node in nodes + if u in self.get_node(node).first_dependencies + ]: + if w in unvisited: + unvisited.remove(w) + stack.append(w) + appended = True + break + if not appended: + stack.pop() + if not unvisited: + return True + return False + + def one_output(self, nodes): + "gets a list of nodes and checks that they only produce one output" + common = [] + common_outside = [] + all_nodes = [ + str(n) for n in self.dependencies if isinstance(self[str(n)], Function) + ] + + # If the whole graph is given or if the subgraph contains the last node of the whole graph: + all_deps = [ + str(dep) + for node in all_nodes + for dep in self.get_node(str(node)).first_dependencies + ] + for node in all_nodes: + if node not in all_deps and node in nodes: + common_outside = common_outside + [node] + if len(common_outside) == 1: + return common_outside[0], True + elif len(common_outside) > 1: + return common_outside, False + + # If a subgraph is given: + for node in all_nodes: + deps = list( + set( + [str(dep) for dep in self.get_node(str(node)).first_dependencies] + ).intersection(nodes) + ) + if node in nodes: + common = common + deps + else: + common_outside = common_outside + deps + if len(set(common_outside)) == 1 and set(common + common_outside) == set(nodes): + return common_outside[0], True + return common_outside, False + + def mergeable(self, nodes): + last_node, res = self.one_output(nodes) + if self.consecutive(nodes) and res: + return last_node, True + return last_node, False + + visualize = visualize diff --git a/tuneit/graph.py b/tuneit/graph.py index 51cacac..7fc8ae8 100644 --- a/tuneit/graph.py +++ b/tuneit/graph.py @@ -22,12 +22,19 @@ def __init__(self, graph=None): self.backend = {} if graph is None else dict(graph) def __getitem__(self, key): + if isinstance(key, int): + return self[list(self.keys())[key]] if isinstance(key, Key): key = Key(key).key node = Node(self) Key(node).key = key return node + def index(self, key): + if isinstance(key, Key): + key = Key(key).key + return list(self.keys()).index(key) + def __setitem__(self, key, value): if isinstance(key, Key): key = Key(key).key @@ -77,6 +84,9 @@ def copy(self): "Shallow copy of a Graph" return Graph(self.backend.copy()) + def __reduce__(self): + return type(self), (self.backend,) + class Key(metaclass=CastableType, attrs=["key"]): "Namespace for the keys of tunable objects" @@ -103,6 +113,9 @@ def __getattr__(self, key): def __hash__(self): return hash(self.key) + def __reduce__(self): + return type(self), (self.key,) + class Node(Graph, Key, bind=False): """ @@ -110,12 +123,15 @@ class Node(Graph, Key, bind=False): """ def __init__(self, key, value=None): - Graph.__init__(self.graph, join_graphs(value)) + if isinstance(key, Graph): + Graph.__init__(self.graph, join_graphs(key)) + else: + Graph.__init__(self.graph, join_graphs(value)) Key.__init__(self.key, key) if key not in self.graph: self.graph[key] = value - elif not isinstance(value, Graph) or self == value: + elif value and (not isinstance(value, Graph) or self == value): raise KeyError("%s already in graph and the value will not be changed") for part in self: @@ -124,6 +140,12 @@ def __init__(self, key, value=None): except TypeError: pass + def __reduce__(self): + return type(self), ( + self.key, + self.graph, + ) + @property def key(self): "Returns the key of the node" @@ -193,22 +215,30 @@ def copy(self): def first_dependencies(self): "Iterates over the dependencies" for val in self: - if isinstance(val, Key): + if type(val) == Key: yield Key(val) + direct_dependencies = first_dependencies + @property def dependencies(self): "Iterates over the dependencies" deps = [self.key] yield deps[0] for val in self: - if isinstance(val, Key): + if not type(val) == Key: + continue + if str(val) in deps: + continue + if val in self.graph: val = self.graph[val] - if isinstance(val, Node): - for dep in Node(val).dependencies: - if dep not in deps: - deps.append(dep) - yield dep + else: + continue + + for dep in Node(val).dependencies: + if dep not in deps: + deps.append(dep) + yield dep def visualize(self, **kwargs): """ @@ -239,7 +269,7 @@ def join_graphs(graphs): return Graph() -def visualize(graph, start=None, end=None, **kwargs): +def visualize(graph, start=None, end=None, groups=None, **kwargs): "Visualizes the graph returning a dot graph" assert isinstance(graph, Graph), "graph must be of type Graph" @@ -265,18 +295,39 @@ def visualize(graph, start=None, end=None, **kwargs): dot = default_graph(**kwargs) for key in keys: + i = graph.index(key) node = graph[key] if start and start not in node.dependencies: continue - dot.node(str(key), node.label, **node.dot_attrs) + dot.node( + str(key), + label=f"""<{node.label}
{i}>""", + **node.dot_attrs, + ) for dep in node.first_dependencies: if start and start not in graph[dep].dependencies: continue dot.edge(str(dep), str(key)) + if key == end: + dot.node( + "output", + label="output", + shape="rectangle", + ) + dot.edge(str(key), "output") + + if groups is not None: + for (i, group) in enumerate(groups): + with dot.subgraph(name=f"cluster_{i}") as c: + c.attr(style="dashed", color="blue") + for n in group: + node = graph[n] + c.node(n, node.label, **node.dot_attrs) + return dot diff --git a/tuneit/meta.py b/tuneit/meta.py index fb945fe..1133ba3 100644 --- a/tuneit/meta.py +++ b/tuneit/meta.py @@ -128,8 +128,10 @@ def __call__(cls, *args, **kwargs): values = dict() getattr(cls, "__values__").__set__(obj, values) - if len(args) == 1 and ( - isinstance(args[0], cls) or issubclass(cls, type(args[0])) + if ( + len(args) == 1 + and not kwargs + and (isinstance(args[0], cls) or issubclass(cls, type(args[0]))) ): for attr in obj.__attrs__: values[attr] = ( @@ -143,7 +145,7 @@ def __call__(cls, *args, **kwargs): try: obj.__init__(*args, **kwargs) except AttributeError: - pass + raise return obj diff --git a/tuneit/tools/__init__.py b/tuneit/tools/__init__.py index 5632543..85f490f 100644 --- a/tuneit/tools/__init__.py +++ b/tuneit/tools/__init__.py @@ -1,5 +1,6 @@ "Highlevel tools for analyzing the tunable graphs" from .base import * +from .tuner import * from .check import * from .time import * diff --git a/tuneit/tools/base.py b/tuneit/tools/base.py index e54f787..3c7ac6a 100644 --- a/tuneit/tools/base.py +++ b/tuneit/tools/base.py @@ -13,6 +13,7 @@ from itertools import product from tabulate import tabulate from ..finalize import finalize +from lyncs_utils import isiterable class Sampler: @@ -26,6 +27,8 @@ def __init__( callback=None, callback_calls=False, label=None, + store_value=False, + record=False, **kwargs, ): "Initializes the tunable object and the variables" @@ -41,8 +44,12 @@ def __init__( self.label = label if variables: + if isinstance(variables, str): + variables = [variables] + elif not isiterable(variables): + variables = [variables] self.variables = tuple( - str(tunable.get_variable(var).key) for var in variables + str(self.tunable.get_variable(var).key) for var in variables ) set_vars = set(self.variables) @@ -62,6 +69,39 @@ def __init__( if n_samples: self.n_samples = n_samples + self.store_value = store_value + + self._trials = None + self.record = record + + @property + def record(self): + return self.trials is not None + + @record.setter + def record(self, value): + try: + import pandas as pd + except: + raise ImportError("pandas is a requirement for record") + if value == self.record: + return + if value is False: + self._trials = None + elif value is True: + self._trials = pd.DataFrame( + columns=["trial_id"] + + list(self.headers)[:-1] + + list(self.tunable.get_info(short=True).keys()) + + ["time"] + ) + else: + raise TypeError("Value is neither true or false") + + @property + def trials(self): + return self._trials + @property def max_samples(self): "Size of the parameter space (product of variables' size)" @@ -120,13 +160,52 @@ def __iter__(self): tmp.fix(var, val) try: if self.callback_calls: - result = self.callback(lambda: tmp.compute(**self.compute_kwargs)) + result = self.callback(lambda: self._perform_call(tmp)) else: - result = self.callback(tmp.compute(**self.compute_kwargs)) + result = self.callback(self._perform_call(tmp)) except Exception as err: result = err + + if self.record: + index = len(self.trials) + self.trials.loc[index] = [ + index, + *list(params), + *list(self.tunable.get_info().values()), + result, + ] yield params, result + def run(self, *args, **kwargs): + return tuple(self(*args, **kwargs)) + + def _perform_call(self, graph): + value = graph.compute(**self.compute_kwargs) + if self.store_value: + self._value = value + return value + + def __call__(self, *args, **kwargs): + "kwargs are data input of the graph" + if args: + raise ValueError("args not supported, please pass them as kwargs") + + for key, val in kwargs.items(): + try: + self.tunable.get_data(key).set(val) + continue + except KeyError: + pass + self.tunable.fix(key, val) + + return self + + @property + def value(self): + if not hasattr(self, "_value") or not self.store_value: + return self._perform_call(self.tunable.copy()) + return self._value + @property def label(self): "Label used for the result" @@ -150,7 +229,7 @@ def _repr_html_(self): return self.tabulate(tablefmt="html") -def sample(tunable, *variables, samples=100, **kwargs): +def sample(tunable, variables=None, samples=100, **kwargs): """ Samples the value of the tunable object diff --git a/tuneit/tools/check.py b/tuneit/tools/check.py index 5fe7997..14ace02 100644 --- a/tuneit/tools/check.py +++ b/tuneit/tools/check.py @@ -1,12 +1,11 @@ """ Function for checking the results """ -# pylint: disable=C0303,C0330 import operator from numpy import allclose from .base import sample -from ..finalize import finalize +from ..finalize import finalize, HighLevel __all__ = [ "crosscheck", @@ -15,7 +14,7 @@ def crosscheck( tunable, - *variables, + variables=None, comparison=allclose, samples=None, reference=None, @@ -40,13 +39,23 @@ def crosscheck( Variables passed to the compute function. See help(tunable.compute) """ if reference is None: - reference = finalize(tunable).copy().compute(**kwargs) + reference = [] + else: + reference = [reference] + + def get_ref(res): + if len(reference) == 0: + reference.append(res) + return reference[0] return sample( tunable, - *variables, - callback=lambda res: comparison(reference, res), + variables=variables, + callback=lambda res: comparison(get_ref(res), res), samples=samples, label=label, **kwargs ) + + +HighLevel.crosscheck = crosscheck diff --git a/tuneit/tools/optuna.py b/tuneit/tools/optuna.py new file mode 100644 index 0000000..da2fe28 --- /dev/null +++ b/tuneit/tools/optuna.py @@ -0,0 +1,144 @@ +import optuna +from optuna.trial import Trial +from optuna import exceptions +from hashlib import md5 +from dill import dumps +from tuneit.finalize import HighLevel + + +class OptunaSampler: + "Creates a sampler using the optuna package" + + optuna_types = { + "discrete_uniform": Trial.suggest_discrete_uniform, + "float": Trial.suggest_float, + "int": Trial.suggest_int, + "loguniform": Trial.suggest_loguniform, + "uniform": Trial.suggest_uniform, + "categorical": Trial.suggest_categorical, + } + + def __init__(self, tunable, callback=None, storage=None, n_trials=None, **kwargs): + """ + Initialises the parameters of the class: + + Parameters + ---------- + tunable: HighLevel object + A finalised tunable object whose parameters will be tuned. + kwargs: Any + Variables that will be used for the computation of the graph. + n_trials: int + The number of trials optuna will execute for the tunable object. + storage: sqlite file name + Local file where the trials in this study will be saved. + Example: "sqlite:///example.db" + callback: callable + The objective function to be used for the tuning of parameters. + """ + + self.tunable = HighLevel(tunable).copy() + self.compute_kwargs = kwargs + + self.callback = callback + + self.n_trials = n_trials or 1 + + self.storage = storage + + def get_study(self): + "Creates a new study or loads a pre-existing one if the name already exists" + attrs = self.tunable.get_info() + attrs["callback"] = self.callback + name = md5(dumps(attrs)).hexdigest() + try: + study = optuna.create_study(study_name=name, storage=self.storage) + for key, val in attrs.items(): + study.set_user_attr( + key, repr(val) + ) # because the value should be JSON serializable + except exceptions.DuplicatedStudyError: + study = optuna.load_study(study_name=name, storage=self.storage) + return study + + @property + def catches(self): + return (Exception,) + + def compute(self, count=0, **kwargs): + "Returns the value of the graph after completing the set number of trials for the tuning of the parameters" + try: + self.get_study().optimize( + lambda trial: self.objective( + trial, **{**self.compute_kwargs, **kwargs} + ), + self.n_trials, + catch=self.catches, + ) + value = self._value + del self._value + return value + except: + if count < 10: + return self.compute(count=count + 1, **kwargs) + else: + raise RuntimeError("Trial failed too many times") + + def _call_wrapper(self, graph, **kwargs): + "Computes and returns the value of the graph" + self._value = graph(compute_kwargs=kwargs) + return self._value + + def objective(self, trial, **kwargs): + "Computes and returns the objective function (callback) value for the next trial" + tmp = self.get_next_trial(trial) + tmp.precompute(**kwargs) + return self.callback(lambda: self._call_wrapper(tmp, **kwargs)) + + def get_next_trial(self, trial): + "Returns the next trial: a tunable object whose variables have been fixed with a combination of options" + tmp = self.tunable.copy(reset=True) + vars = self.tunable.variables + values = self.get_suggestions(trial) + for v in vars: + tmp.fix(v, values[v]) + return tmp + + def get_suggestions(self, trial): + "Returns a suggested option for each variable that will be tuned" + vars = self.tunable.variables + values = {} + for v in vars: + var = self.tunable.get_variable(v) + var_values = var.values + var_type = self.deduce_type(var_values) + var_args = self.get_var_args(var_type, var_values) + values[v] = OptunaSampler.optuna_types[var_type](trial, v, *var_args) + return values + + @staticmethod + def get_var_args(var_type, var_values): + "Returns the arguments needed for each optuna type of variable" + if var_type == "categorical": + return (tuple(var_values),) + elif var_type == "discrete_uniform": + step = 1 # default + return min(var_values), max(var_values), step + + @staticmethod + def deduce_type(variable): + "Returns a type compatible with optuna: discrete_uniform, float, int, loguniform, uniform, categorical" + # only categorical and discrete uniform are supported for the time being + if isinstance(variable, range): + return "discrete_uniform" + return "categorical" + + def best_params(self, **kwargs): + "Returns the best options for the variables of this graph" + study = self.get_study() + study.optimize( + lambda trial: self.objective(trial, **kwargs), + self.n_trials, + catch=self.catches, + ) + return study.best_params diff --git a/tuneit/tools/time.py b/tuneit/tools/time.py index 2facafe..62efdbd 100644 --- a/tuneit/tools/time.py +++ b/tuneit/tools/time.py @@ -3,10 +3,14 @@ __all__ = [ "benchmark", + "optimize", + "optimise", ] from timeit import timeit from .base import sample +from .tuner import tune +from ..finalize import HighLevel class Time(float): @@ -25,21 +29,22 @@ def __str__(self): __repr__ = __str__ -def default_timer(fnc, number=100): - return timeit(fnc, number=100) / number +def default_timer(fnc, number=1): + return timeit(fnc, number=number) / number def benchmark( tunable, - *variables, + variables=None, timer=default_timer, timer_kwargs=None, samples=None, label="Time", + record=False, **kwargs, ): """ - Crosscheck the result of tunable against the reference. + Samples the execution time. Parameters ---------- @@ -55,16 +60,54 @@ def benchmark( timer_kwargs: dict Arguments passed to the timer. For default timer: - number: (int) number of iterations + kwargs: dict Variables passed to the compute function. See help(tunable.compute) """ return sample( tunable, - *variables, + variables=variables, callback=lambda fnc: Time(timer(fnc, **(timer_kwargs or {}))), callback_calls=True, samples=samples, label=label, + record=record, + **kwargs, + ) + + +def optimise(tunable, timer=default_timer, timer_kwargs=None, **kwargs): + """ + Optimizes the execution time changing the tunable parameters. + + Parameters + ---------- + comparison: callable (default = numpy.allclose) + The function to use for comparison. It is called as fnc(reference, value) + and should return a value from 0 (False) to 1 (True). + reference: Any + The reference value. If None, than the default values are used to produce the result. + variables: list of str + Set of variables to sample. + n_trials: int + The number of trials per call. + timer_kwargs: dict + Arguments passed to the timer. For default timer: + - number: (int) number of iterations + kwargs: dict + Variables passed to the compute function. See help(tunable.compute) + """ + + return tune( + tunable, + callback=lambda fnc: Time(timer(fnc, **(timer_kwargs or {}))), + callback_calls=True, **kwargs, ) + + +optimize = optimise +HighLevel.benchmark = benchmark +HighLevel.optimize = optimise +HighLevel.optimise = optimise diff --git a/tuneit/tools/tuner.py b/tuneit/tools/tuner.py new file mode 100644 index 0000000..5b0f322 --- /dev/null +++ b/tuneit/tools/tuner.py @@ -0,0 +1,77 @@ +__all__ = [ + "tune", +] + +from .base import Sampler +from ..finalize import HighLevel + +try: + from .optuna import OptunaSampler +except ImportError: + OptunaSampler = None + + +class Tuner(HighLevel, attrs=["tuner_kwargs"]): + def __init__(self, tunable, **kwargs): + "Initializes the tunable object and the variables" + super().__init__(tunable) + self.tuner_kwargs = kwargs + + def copy(self, **kwargs): + tmp = super().copy(**kwargs) + return Tuner(tmp, **self.tuner_kwargs) + + def compute(self, **kwargs): + "Calls a sampler to tune the whole graph" + # graph_manager = self.divide_graph() + # for subgraph in graph_manager: + # value = self.get_sampler()(subgraph,**self.get_sampler_kwargs()).compute(**kwargs) + # graph_manager.store(subgraph, value) + value = self.get_sampler()(self, **self.get_sampler_kwargs()).compute(**kwargs) + return value + + def get_best_trial(self): + "Returns the best options for the variables of the graph after it has been tuned" + return self.get_sampler()(self, **self.get_sampler_kwargs()).best_params() + + def get_sampler(self): + "Returns the name of the appropriate sampler class to be called based on the sampler argument in kwargs" + sampler = self.tuner_kwargs.get("sampler", None) + if not sampler: + return Sampler + if sampler in [ + "Optuna", + "optuna", + ]: + if OptunaSampler is None: + raise ImportError("Optuna not installed") + return OptunaSampler + raise ValueError(f"Unknown sampler {sampler}") + + def get_sampler_kwargs(self): + "Returns the appropriate arguments for the selected sampler" + sampler_kwargs = {} + for d in self.datas: + y = d.split("-")[0] + if y in list(self.tuner_kwargs.keys()): + sampler_kwargs[y] = self.tuner_kwargs.get(y, None) + if self.get_sampler() is OptunaSampler: + sampler_kwargs["callback"] = self.tuner_kwargs.get("callback", None) + sampler_kwargs["storage"] = "sqlite:///example.db" + return sampler_kwargs + + +def tune(tunable, **kwargs): + """ + Tunes the value of the tunable object + + Parameters + ---------- + variables: list of str + Set of variables to sample. + tuner: str or class + The tune to use. Options = None, Optuna, ... + kwargs: dict + Variables passed to the tuner function + """ + return Tuner(tunable, **kwargs) diff --git a/tuneit/tunable.py b/tuneit/tunable.py index 274b4a4..1f008e6 100644 --- a/tuneit/tunable.py +++ b/tuneit/tunable.py @@ -6,6 +6,7 @@ "Tunable", "tunable", "Object", + "data", "function", "Function", ] @@ -22,6 +23,11 @@ from typing import Any from varname import varname as _varname, VarnameRetrievingError from .graph import Graph, Node, Key +from lyncs_utils import isiterable +import multiprocessing +from time import time +from contextlib import contextmanager +import logging def varname(caller=1, default=None): @@ -32,8 +38,23 @@ def varname(caller=1, default=None): return default -def compute(obj, **kwargs): +def compute(obj, timeout=None, **kwargs): "Compute the value of a tunable object" + if timeout: + raise NotImplementedError + kwargs["queue"] = multiprocessing.Queue() + p = multiprocessing.Process(target=compute, args=(obj,), kwargs=kwargs) + p.start() + p.join(timeout) + if p.exitcode is None: + p.terminate() + raise RuntimeError("Timeout") + if p.exitcode < 0: + raise RuntimeError(f"The process was terminated by signal {abs(exitcode)}.") + if not kwargs["queue"].empty(): + obj = kwargs["queue"].get() + return obj + kwargs.setdefault("maxiter", 3) if kwargs["maxiter"] <= 0: return obj @@ -41,12 +62,10 @@ def compute(obj, **kwargs): kwargs.setdefault("graph", Node(obj).graph) if isinstance(obj, Key) and not isinstance(obj, Node): obj = kwargs["graph"][obj].value - try: + if hasattr(obj, "__compute__"): obj = obj.__compute__(**kwargs) kwargs["maxiter"] -= 1 obj = compute(obj, **kwargs) - except AttributeError: - pass return obj @@ -74,6 +93,7 @@ class Object: deps: Any = None label: str = None uid: Any = None + precompute: bool = False @classmethod def extract_deps(cls, deps): @@ -124,13 +144,13 @@ def tunable(self): "Returns a tunable object" return Tunable(self) - @property - def dependencies(self): - "Returns the list of dependencies for the Object" - return Node(self.tunable()).dependencies - def copy(self, **kwargs): "Returns a copy of self" + # TODO: improve copy.. it should copy automatically all the data + # tmp = copy(self) + # for key, val in kwargs.items(): + # setattr(tmp, key, val) + # return tmp kwargs.setdefault("deps", self.deps) kwargs.setdefault("label", self.label) kwargs.setdefault("uid", self.uid) @@ -171,13 +191,59 @@ def __label__(self): @property def __dot_attrs__(self): - return dict(shape="rect") + attrs = dict(shape="rect") + if self.precompute: + attrs["style"] = "filled" + attrs["color"] = "lightblue2" + return attrs Object.__eq2__ = Object.__eq__ Object.__eq__ = lambda self, value: self.obj == value or self.__eq2__(value) +def data(value=None, label=None, **kwargs): + """ + A tunable function call. + + Parameters + ---------- + label: str + + """ + label = label or varname() + return Data(value, label=label, **kwargs).tunable() + + +@dataclass +class Data(Object): + "A data input of the graph" + + check: callable = None + info: callable = None + + def copy(self, **kwargs): + "Returns a copy of self" + kwargs.setdefault("check", self.check) + kwargs.setdefault("info", self.info) + return super().copy(**kwargs) + + def set(self, val): + if self.check: + if not self.check(val): + raise ValueError("Check did not return True") + self.obj = val + + def get_info(self): + if self.info is None: + return None + if isiterable(self.info): + return {key: getattr(self.obj, key, None) for key in self.info} + if callable(self.info): + return self.info(self.obj) + raise TypeError("Unsupported type for info") + + def function(fnc, *args, **kwargs): """ A tunable function call. @@ -251,8 +317,17 @@ def __compute__(self, **kwargs): cmpt = lambda obj: compute(obj, **kwargs) fnc = cmpt(super().__compute__(**kwargs)) args = tuple(map(cmpt, self.args)) + context = kwargs.get("context", default_context) + log = kwargs.get("log", False) kwargs = dict(zip(self.kwargs.keys(), map(cmpt, self.kwargs.values()))) - res = fnc(*args, **kwargs) + if log: + logging.basicConfig(level=logging.INFO) + else: + logging.basicConfig(level=logging.WARNING) + if context is None: + context = empty_context + with context(self.key): + res = fnc(*args, **kwargs) if res is None: if ismethod(fnc) or fnc is setattr: return args[0] @@ -295,7 +370,9 @@ def __label__(self): @property def __dot_attrs__(self): - return dict(shape="oval") + attrs = super().__dot_attrs__ + attrs["shape"] = "oval" + return attrs def callattr(self, key, *args, **kwargs): @@ -303,6 +380,18 @@ def callattr(self, key, *args, **kwargs): return getattr(self, key)(*args, **kwargs) +@contextmanager +def default_context(key): + start = time() + yield + logging.info(f"{key}:{time()-start}") + + +@contextmanager +def empty_context(key): + yield + + class Tunable(Node, bind=False): "A class that turns any operation into a graph node" @@ -325,11 +414,15 @@ def __call__(self, *args, **kwargs): and len(tmp.args) == 2 and not tmp.kwargs ): - value = Function(callattr, args=tmp.args + args, kwargs=kwargs) graph = Graph(self).copy() del graph[Key(self)] - graph[value.key] = value - return Tunable(graph[value.key]) + node = graph[tmp.args[0]] + return function(callattr, node, tmp.args[1], *args, **kwargs) + # value = Function(callattr, args=tmp.args + args, kwargs=kwargs) + # graph = Graph(self).copy() + # del graph[Key(self)] + # graph[value.key] = value + # return Tunable(graph[value.key]) return function(Tunable(Node(self).copy()), *args, **kwargs) def __repr__(self): @@ -343,10 +436,16 @@ def __bool__(self): def __getstate__(self): return Node(self).key + def __reduce__(self): + return type(self), (Node(self),) + def __compute__(self, **kwargs): # pylint: disable=W0613 return Node(self).value + def __iter__(self): + raise TypeError("A tunable object is not iterable") + def default_operator(fnc): "Default operator wrapper" diff --git a/tuneit/variable.py b/tuneit/variable.py index f89ee11..fda9d1e 100644 --- a/tuneit/variable.py +++ b/tuneit/variable.py @@ -146,7 +146,13 @@ def __compute__(self, **kwargs): @property def __dot_attrs__(self): - return dict(shape="diamond", color="green" if self.fixed else "red") + attrs = super().__dot_attrs__ + attrs["shape"] = "diamond" + if self.fixed: + attrs["color"] = "green" + else: + attrs["color"] = "red" + return attrs def copy(self, reset=False, reset_value=False, **kwargs): "Returns a copy of self"