Skip to content

Commit

Permalink
Release 0.8 (#421)
Browse files Browse the repository at this point in the history
  • Loading branch information
cTatu authored Nov 11, 2022
1 parent 7579891 commit 8b4795e
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 71 deletions.
42 changes: 42 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,48 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.8.0] - 2022-11-11
### Added
- `save` and `load` methods for all models
- Adding Muliclass CSVM
- Adding TS-QR (Tall Skinny QR)
- New in-place operations for ds-arrays:
`add` `iadd` `isub`
- Matrix-Subtraction and Matrix-Addition
- Concatenating two ds-arrays by columns
- Save ds-array to `npy` file
- Load ds-array from several `npy` files
- Create ds-arrays from blocks
- GridSearch for simulations & improvements
- Inverse transformation in Scalers
- Train-Test-Split functionality
- Add KNN Classifier
- Better SVD columns pairing
- GPU Support using CUDA/CuPy for algorithms: Kmeans, KNN, SVD, PCA, Matmul, Addition, Subtraction, QR, Kronecker

### Changed
- New documentation for GPU, RandomForest, Scalers

### Fixed
- Fix bug Scalers & tests

## [0.7.0] - 2021-11-10
### Added
- New decomposition algorithm QR
- New preprocessing algorithm MinMaxScaler
- Jenkinsfile for CI automated tests
- ds-array matrix multiplication (matmul)
- New function for ds-array creation
- Add `@constraint(computing_units="${ComputingUnits}")` to all tasks
- More I/O functions for reading and writing ds-arrays
- More tests

### Changed
- Move RandomForest from 'classification' to 'trees'

### Fixed
- Some bugs in the ds-array

## [0.6.0] - 2020-10-09
### Added
- User guide and glossary
Expand Down
2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2019-2020 Barcelona Supercomputing Center (BSC)
Copyright 2019-2022 Barcelona Supercomputing Center (BSC)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.7.0
0.8.0
62 changes: 56 additions & 6 deletions dislib/math/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import itertools
import math

import numpy as np
import dislib
Expand Down Expand Up @@ -173,6 +174,7 @@ def svd(a, compute_uv=True, sort=True, copy=True, eps=1e-9):
v = identity(x.shape[1], (x._reg_shape[1], x._reg_shape[1]))

checks = [True]
n_cols = x._n_blocks[1]

if dislib.__gpu_available__:
_compute_rotation_func = _compute_rotation_and_rotate_gpu
Expand All @@ -182,11 +184,7 @@ def svd(a, compute_uv=True, sort=True, copy=True, eps=1e-9):
while not _check_convergence_svd(checks):
checks = []

pairings = itertools.combinations(
range(x._n_blocks[1]), 2
)

for i, j in pairings:
for i, j in svd_col_combs(n_cols):
coli_x = x._get_col_block(i)
colj_x = x._get_col_block(j)

Expand Down Expand Up @@ -393,7 +391,7 @@ def _compute_u_block_sorted_gpu(a_block, index, bsize, sorting, u_block):
@task(block={Type: COLLECTION_IN, Depth: 1},
out_blocks={Type: COLLECTION_OUT, Depth: 1})
def _merge_svd_block(block, index, hbsize, vbsize, sorting, out_blocks):
block = list(filter(lambda a: a != [], block)) # remove empty lists
block = list(filter(lambda a: np.any(a), block)) # remove empty lists
col = np.vstack(block).T
local_sorting = []

Expand Down Expand Up @@ -577,3 +575,55 @@ def _kron_gpu(block1, block2, out_blocks):
for i in range(block1_gpu.shape[0]):
for j in range(block1_gpu.shape[1]):
out_blocks[i][j] = cp.asnumpy(block1_gpu[i, j] * block2_gpu)


def _combinations(a, b):
# First get all combinations between a and b
n = len(a)
coverages = list()

for i in range(n):
single_cov = list()
for a_idx in range(n):
b_idx = (a_idx + i) % n
single_cov.append((a[a_idx], b[b_idx]))
coverages.append(single_cov)

# Now get coverages of a and b independently
if n == 1:
return coverages
elif n == 2:
coverages.append([(a[0], a[1]), (b[0], b[1])])
else:
m = n // 2
a1 = a[:m]
a2 = a[m:]
b1 = b[:m]
b2 = b[m:]

coverages_a = _combinations(a1, a2)
coverages_b = _combinations(b1, b2)

for cov_a, cov_b in zip(coverages_a, coverages_b):
coverages.append(cov_a + cov_b)

return coverages


def svd_col_combs(n_cols: int):
if n_cols <= 1:
return list()

cols = list(range(2**math.ceil(math.log(n_cols, 2))))

n = len(cols) // 2

a = cols[:n]
b = cols[n:]

coverages = _combinations(a, b)

coverages = sum(coverages, list())
all_combs = list(itertools.combinations(range(n_cols), 2))
pairings = list(filter(lambda x: x in all_combs, coverages))
return pairings
12 changes: 10 additions & 2 deletions dislib/trees/forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,11 @@ def save_model(self, filepath, overwrite=True, save_format="json"):
>>> assert np.allclose(model_pred.collect(),
>>> loaded_model_pred.collect())
"""
super().save_model(filepath, overwrite=overwrite, save_format=save_format)
super().save_model(
filepath,
overwrite=overwrite,
save_format=save_format
)


class RandomForestRegressor(BaseRandomForest):
Expand Down Expand Up @@ -688,7 +692,11 @@ def save_model(self, filepath, overwrite=True, save_format="json"):
>>> assert np.allclose(model_pred.collect(),
>>> loaded_model_pred.collect())
"""
super().save_model(filepath, overwrite=overwrite, save_format=save_format)
super().save_model(
filepath,
overwrite=overwrite,
save_format=save_format
)


def _base_soft_vote(classes, *predictions):
Expand Down
61 changes: 0 additions & 61 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,67 +1318,6 @@ def test_kron_regular(self, a_shape, a_bsize, b_shape, b_bsize):
self.assertTrue(_validate_array(computed))
self.assertTrue(_equal_arrays(computed.collect(), expected))

@parameterized.expand([(ds.array([[1, 0, 0, 0],
[0, 0, 0, 2],
[0, 3, 0, 0],
[2, 0, 0, 0]], (2, 2)),),
(ds.random_array((17, 5), (1, 1)),),
(ds.random_array((9, 7), (9, 6)),),
(ds.random_array((10, 10), (2, 2))[1:, 1:],)])
def test_svd(self, x):
""" Tests SVD """
x_np = x.collect()
u, s, v = ds.svd(x)
u = u.collect()
s = np.diag(s.collect())
v = v.collect()

self.assertTrue(np.allclose(x_np, u @ s @ v.T))
self.assertTrue(
np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1])))
self.assertTrue(
np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1])))

u, s, v = ds.svd(x, sort=False)
u = u.collect()
s = np.diag(s.collect())
v = v.collect()

self.assertTrue(np.allclose(x_np, u @ s @ v.T))
self.assertTrue(
np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1])))
self.assertTrue(
np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1])))

s = ds.svd(x, compute_uv=False, sort=False)
s = np.diag(s.collect())

# use U and V from previous decomposition
self.assertTrue(np.allclose(x_np, u @ s @ v.T))
self.assertTrue(
np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1])))
self.assertTrue(
np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1])))

u, s, v = ds.svd(x, copy=False)
u = u.collect()
s = np.diag(s.collect())
v = v.collect()

self.assertTrue(np.allclose(x_np, u @ s @ v.T))
self.assertTrue(
np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1])))
self.assertTrue(
np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1])))

def test_svd_errors(self):
""" Tests SVD raises """
with self.assertRaises(ValueError):
ds.svd(ds.random_array((3, 9), (2, 2)))

with self.assertRaises(ValueError):
ds.svd(ds.random_array((3, 3), (3, 3)))


def main():
unittest.main()
Expand Down
86 changes: 86 additions & 0 deletions tests/test_svd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import unittest

import numpy as np
import itertools

import dislib as ds
from parameterized import parameterized
from dislib.math.base import svd_col_combs


class SVDTest(unittest.TestCase):

def test_pairing(self):
for n_cols in range(10):

all_combs = list(itertools.combinations(range(n_cols), 2))
cols_combs = svd_col_combs(n_cols)

assert set(all_combs) == set(cols_combs)

@parameterized.expand([(ds.array([[1, 0, 0, 0],
[0, 0, 0, 2],
[0, 3, 0, 0],
[2, 0, 0, 0]], (2, 2)),),
(ds.random_array((17, 5), (1, 1)),),
(ds.random_array((9, 7), (9, 6)),),
(ds.random_array((10, 10), (2, 2))[1:, 1:],)])
def test_svd(self, x):
x_np = x.collect()
u, s, v = ds.svd(x)
u = u.collect()
s = np.diag(s.collect())
v = v.collect()

self.assertTrue(np.allclose(x_np, u @ s @ v.T))
self.assertTrue(
np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1])))
self.assertTrue(
np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1])))

u, s, v = ds.svd(x, sort=False)
u = u.collect()
s = np.diag(s.collect())
v = v.collect()

self.assertTrue(np.allclose(x_np, u @ s @ v.T))
self.assertTrue(
np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1])))
self.assertTrue(
np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1])))

s = ds.svd(x, compute_uv=False, sort=False)
s = np.diag(s.collect())

# use U and V from previous decomposition
self.assertTrue(np.allclose(x_np, u @ s @ v.T))
self.assertTrue(
np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1])))
self.assertTrue(
np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1])))

u, s, v = ds.svd(x, copy=False)
u = u.collect()
s = np.diag(s.collect())
v = v.collect()

self.assertTrue(np.allclose(x_np, u @ s @ v.T))
self.assertTrue(
np.allclose(np.linalg.norm(u, axis=0), np.ones(u.shape[1])))
self.assertTrue(
np.allclose(np.linalg.norm(v, axis=0), np.ones(v.shape[1])))

def test_svd_errors(self):
with self.assertRaises(ValueError):
ds.svd(ds.random_array((3, 9), (2, 2)))

with self.assertRaises(ValueError):
ds.svd(ds.random_array((3, 3), (3, 3)))


def main():
unittest.main()


if __name__ == '__main__':
main()

0 comments on commit 8b4795e

Please sign in to comment.