Skip to content

Commit

Permalink
Merge pull request #631 from macs3-project/feat/macs3/hmmlearn0.3.2
Browse files Browse the repository at this point in the history
Feat/macs3/hmmlearn0.3.2
  • Loading branch information
taoliu authored Mar 5, 2024
2 parents cf0a344 + 9f91e6c commit b4fdb5f
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 70 deletions.
37 changes: 5 additions & 32 deletions MACS3/Signal/HMMR_HMM.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# cython: language_level=3
# cython: profile=True
# Time-stamp: <2024-02-18 16:21:00 Tao Liu>
# Time-stamp: <2024-03-01 23:34:51 Tao Liu>

"""Module description:
Expand All @@ -20,7 +20,8 @@ from math import sqrt
import numpy as np
cimport numpy as np
from cpython cimport bool
from hmmlearn import hmm, _utils
import hmmlearn
from hmmlearn.hmm import GaussianHMM
from sklearn import cluster
import json
# from hmmlearn cimport hmm
Expand Down Expand Up @@ -51,34 +52,6 @@ cdef inline float get_weighted_density( int x, float m, float v, w ):
# Classes
# ------------------------------------

class GaussianHMM_modified( hmm.GaussianHMM ):
def _init(self, X, lengths=None):
super()._init(X, lengths)
# we will overwrite initial means_ and covars_
kmeans = cluster.KMeans(n_clusters=self.n_components,
random_state=self.random_state,
n_init=10) # https://github.com/hmmlearn/hmmlearn/pull/545
# the idea is to do the random seeds
# for 10 times orginally, hmmlearn 0.3
# will do this only once. However,
# due to the change in scikit-learn
# 1.3, the random seeding in KMeans
# will generate different results with
# previous scikit-learn. It will make
# the results irreproducible between
# sklearn <1.3 and sklearn
# >=1.3. Hopefully, if we choose to do
# the process 10 times, the results
# will be more similar.
kmeans.fit(X)
self.means_ = kmeans.cluster_centers_

cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
if not cv.shape:
cv.shape = (1, 1)
self.covars_ = \
_utils.distribute_covar_matrix_to_match_covariance_type( cv, self.covariance_type, self.n_components ).copy()

# ------------------------------------
# public functions
# ------------------------------------
Expand All @@ -90,7 +63,7 @@ cpdef hmm_training( list training_data, list training_data_lengths, int n_states
# according to base documentation, if init_prob not stated, it is set to be equally likely for any state (1/ # of components)
# if we have other known parameters, we should set these (ie: means_weights, covariance_type etc.)
rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(random_seed)))
hmm_model = GaussianHMM_modified( n_components= n_states, covariance_type = covar, random_state = rs, verbose = False )
hmm_model = GaussianHMM( n_components= n_states, covariance_type = covar, random_state = rs, verbose = False )
hmm_model = hmm_model.fit( training_data, training_data_lengths )
assert hmm_model.n_features == 4
return hmm_model
Expand Down Expand Up @@ -121,7 +94,7 @@ cpdef void hmm_model_save( str model_file, object hmm_model, int hmm_binsize, in
cpdef list hmm_model_init( str model_file ):
with open( model_file ) as f:
m = json.load( f )
hmm_model = GaussianHMM_modified( n_components=3, covariance_type=m["covariance_type"] )
hmm_model = GaussianHMM( n_components=3, covariance_type=m["covariance_type"] )
hmm_model.startprob_ = np.array(m["startprob"])
hmm_model.transmat_ = np.array(m["transmat"])
hmm_model.means_ = np.array(m["means"])
Expand Down
6 changes: 3 additions & 3 deletions conda/macs3/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,22 @@ requirements:
- Cython ~=3.0
- cykhash >=2.0,<3.0
- setuptools >=68.0
- hmmlearn >=0.3
- hmmlearn >=0.3.2
- scikit-learn >=1.3
host:
- python >=3.11
- zlib
- numpy >=1.25
- scipy >=1.12
- Cython ~=3.0
- hmmlearn >=0.3
- hmmlearn >=0.3.2
- scikit-learn >=1.3
- cykhash >=2.0,<3.0
run:
- python >=3.11
- numpy >=1.25
- scipy >=1.12
- hmmlearn >=0.3
- hmmlearn >=0.3.2
- scikit-learn >=1.3
- cykhash >=2.0,<3.0

Expand Down
8 changes: 4 additions & 4 deletions docs/INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ reproducing your results, we also add them into the requirement list
with specific version numbers. So here is the list of the required
python libraries that will impact the numerical calculation in MACS3:

- numpy>=1.24
- hmmlearn>=0.3
- scikit-learn>=1.2,<1.4
- scipy>=1.10
- numpy>=1.25
- hmmlearn>=0.3.2
- scikit-learn>=1.3
- scipy>=1.12

### Cython

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[build-system]
requires=['setuptools>=60.0', 'numpy>=1.24.2', 'scipy>=1.11.4', 'cykhash>=2.0,<3.0', 'Cython~=3.0', 'scikit-learn>=1.2.1', 'hmmlearn==0.3.0']
requires=['setuptools>=68.0', 'numpy>=1.25', 'scipy>=1.12', 'cykhash>=2.0,<3.0', 'Cython~=3.0', 'scikit-learn>=1.3', 'hmmlearn>=0.3.2']

10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Cython~=3.0
numpy>=1.24.2
scipy>=1.11.4
scikit-learn>=1.2.1
hmmlearn==0.3.0
numpy>=1.25
scipy>=1.12
scikit-learn>=1.3
hmmlearn>=0.3.2
cykhash>=2.0,<3.0
pytest>=7.0
setuptools>=60.0
setuptools>=68.0
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
'Programming Language :: Python :: 3.12',
'Programming Language :: Cython', ]

install_requires = [ "numpy>=1.24.2",
"scipy>=1.11.4",
"hmmlearn==0.3.0",
"scikit-learn>=1.2.1",
install_requires = [ "numpy>=1.25",
"scipy>=1.12",
"hmmlearn>=0.3.2",
"scikit-learn>=1.3",
"cykhash>=2.0,<3.0"]


Expand Down
43 changes: 22 additions & 21 deletions test/test_HMMR_HMM.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,34 +19,34 @@ def setUp( self ):
self.not_expected_covars = None
self.not_expected_means = None
self.not_expected_transmat = None
self.startprob = [0.09411589, 0.82689766, 0.07898644]
self.means = [[2.02697935e-01, 1.52785266e+00, 1.73790142e+00, 1.00019411e-04],
[1.87823916e-01, 1.48213364e+00, 1.69577044e+00, 1.00017125e-04],
[2.07360047e+00, 8.63029738e+00, 7.24406955e+00, 1.00852188e-04]]
self.covars = [[[ 1.18061824e-01, 5.32522674e-02, 4.04981722e-02, 1.43240236e-07],
[ 5.32522674e-02, 1.88909221e+00, 7.44040883e-01, 1.64463390e-07],
[ 4.04981722e-02, 7.44040883e-01, 2.35914194e+00, 1.69079937e-07],
[ 1.43240236e-07, 1.64463390e-07, 1.69079937e-07, 1.38857074e-07]],

[[ 1.08338994e-01, 4.38027284e-02, 3.40898529e-02, 1.34873591e-07],
[ 4.38027284e-02, 1.78899081e+00, 6.92059837e-01, 1.54578989e-07],
[ 3.40898529e-02, 6.92059837e-01, 2.26836145e+00, 1.58248579e-07],
[ 1.34873591e-07, 1.54578989e-07, 1.58248579e-07, 1.31639696e-07]],

[[ 5.96438746e+00, 5.22590773e+00, -5.59954962e-01, -1.48829290e-06],
[ 5.22590773e+00, 2.63829229e+01, 3.49433872e+00, -6.09680431e-06],
[-5.59954962e-01, 3.49433872e+00, 1.50531402e+01, 1.43841972e-05],
[-1.48829290e-06, -6.09680431e-06, 1.43841972e-05, 1.04838987e-07]]]
self.transmat = [[3.55718812e-03, 9.71544738e-01, 2.48980738e-02],
[9.22578828e-01, 7.32630014e-02, 4.15817043e-03],
[2.11090463e-02, 6.34703169e-04, 9.78256251e-01]]
self.startprob = [0.01807016, 0.90153727, 0.08039257]
self.means = [[2.05560411e-01, 1.52959594e+00, 1.73568556e+00, 1.00019720e-04],
[1.84467806e-01, 1.46784946e+00, 1.67895745e+00, 1.00016654e-04],
[2.06402305e+00, 8.60140461e+00, 7.22907032e+00, 1.00847661e-04]]
self.covars = [[[ 1.19859257e-01, 5.33746506e-02, 3.99871507e-02, 1.49805047e-07],
[ 5.33746506e-02, 1.88774896e+00, 7.38204761e-01, 1.70902908e-07],
[ 3.99871507e-02, 7.38204761e-01, 2.34175176e+00, 1.75654357e-07],
[ 1.49805047e-07, 1.70902908e-07, 1.75654357e-07, 1.45312288e-07]],
[[ 1.06135330e-01, 4.16846792e-02, 3.24447289e-02, 1.30393434e-07],
[ 4.16846792e-02, 1.75537103e+00, 6.70848135e-01, 1.49425940e-07],
[ 3.24447289e-02, 6.70848135e-01, 2.22285392e+00, 1.52914017e-07],
[ 1.30393434e-07, 1.49425940e-07, 1.52914017e-07, 1.27205162e-07]],
[[ 5.94746590e+00, 5.24388615e+00, -5.33166471e-01, -1.47228883e-06],
[ 5.24388615e+00, 2.63945986e+01, 3.54212739e+00, -6.03892201e-06],
[-5.33166471e-01, 3.54212739e+00, 1.50231166e+01, 1.43141422e-05],
[-1.47228883e-06, -6.03892201e-06, 1.43141422e-05, 1.04240673e-07]]]
self.transmat =[[1.91958645e-03, 9.68166646e-01, 2.99137676e-02],
[8.52453717e-01, 1.46924953e-01, 6.21329356e-04],
[2.15432113e-02, 6.80080650e-05, 9.78388781e-01]]
self.n_features = 4

# for prediction
self.prediction_data = np.loadtxt("test/small_prediction_data.txt", delimiter="\t", dtype="float", usecols=(2,3,4,5)).tolist()
self.prediction_data_lengths = np.loadtxt('test/small_prediction_lengths.txt', dtype="int").tolist()
self.predictions = np.loadtxt('test/small_prediction_results.txt', delimiter="\t", dtype="float").tolist()

@pytest.mark.skip( reason="it may fail with different sklearn+hmmlearn" )
def test_training( self ):
# test hmm_training:
model = hmm_training(training_data = self.training_data, training_data_lengths = self.training_data_lengths, n_states = 3, random_seed = 12345, covar = 'full')
Expand All @@ -65,9 +65,10 @@ def test_training( self ):
npt.assert_allclose(model.transmat_, self.transmat)
npt.assert_allclose(model.n_features, self.n_features)

@pytest.mark.skip( reason="it may fail with different sklearn+hmmlearn" )
def test_predict( self ):
# test hmm_predict
hmm_model = hmm.GaussianHMM( n_components=3, covariance_type='full' )
hmm_model = GaussianHMM( n_components=3, covariance_type='full' )
hmm_model.startprob_ = np.array(self.startprob)
hmm_model.transmat_ = np.array(self.transmat)
hmm_model.means_ = np.array(self.means)
Expand Down

0 comments on commit b4fdb5f

Please sign in to comment.