Open
Description
Description of your problem
I have a model that compiles correctly when the priors are represented as numpy arrays, but that gets a shape error when those numpy arrays are wrapped in pm.Data
, which seems wrong (isn't it?).
Example code
num_words = df.shape[0]
with pm.Model() as model:
#alpha = pm.Data("α", np.ones((1, K)))
alpha = np.ones((1, K))
# beta_prior = pm.Data("β", np.ones((1, V)))
beta_prior = np.ones((1, V))
doc_num = pm.Data('i', df['Document'])
theta = pm.Dirichlet("θ", a=alpha, shape=(D, K)) # topic probabilities
# z = pm.Categorical("z_i", p=theta[doc_num], shape=df.shape[0]) # topic for word
beta = pm.Dirichlet("beta", a=beta_prior, shape=(K, V)) # word probabilities
#ws = pm.Deterministic("z * w", t.dot(theta[doc_num], beta))
w = pm.Categorical("w",
p=t.dot(theta[doc_num], beta),
# p=ws,
shape=df.shape[0],
observed=df['Word']
)
(full code will be supplied as attachment -- but this may have an obvious answer.)
When the above piece of code is executed as written, it completes successfully.
However, if I replace the definitions of alpha
and beta_prior
with the commented out lines using pm.Data
, I get shape errors. Traceback follows:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-5542319f899e> in <module>
7 #beta_prior = np.ones((1, V))
8 doc_num = pm.Data('i', df['Document'])
----> 9 theta = pm.Dirichlet("θ", a=alpha, shape=(D, K)) # topic probabilities
10
11 # z = pm.Categorical("z_i", p=theta[doc_num], shape=df.shape[0]) # topic for word
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
120 else:
121 dist = cls.dist(*args, **kwargs)
--> 122 return model.Var(name, dist, data, total_size, dims=dims)
123
124 def __getnewargs__(self):
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size, dims)
1140 else:
1141 with self:
-> 1142 var = TransformedRV(
1143 name=name,
1144 distribution=dist,
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/model.py in __init__(self, type, owner, index, name, distribution, model, transform, total_size)
2011
2012 self.transformed = model.Var(
-> 2013 transformed_name, transform.apply(distribution), total_size=total_size
2014 )
2015
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/transforms.py in apply(self, dist)
124 def apply(self, dist):
125 # avoid circular import
--> 126 return TransformedDistribution.dist(dist, self)
127
128 def __str__(self):
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
128 def dist(cls, *args, **kwargs):
129 dist = object.__new__(cls)
--> 130 dist.__init__(*args, **kwargs)
131 return dist
132
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/transforms.py in __init__(self, dist, transform, *args, **kwargs)
152 self.dist = dist
153 self.transform_used = transform
--> 154 v = forward(FreeRV(name="v", distribution=dist))
155 self.type = v.type
156
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/model.py in __init__(self, type, owner, index, name, distribution, total_size, model)
1669 np.ones(distribution.shape, distribution.dtype) * distribution.default()
1670 )
-> 1671 self.logp_elemwiset = distribution.logp(self)
1672 # The logp might need scaling in minibatches.
1673 # This is done in `Factor`.
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/multivariate.py in logp(self, value)
520 # only defined for sum(value) == 1
521 return bound(
--> 522 tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)),
523 tt.all(value >= 0),
524 tt.all(value <= 1),
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/dist_math.py in logpow(x, m)
106 """
107 # return m * log(x)
--> 108 return tt.switch(tt.eq(x, 0), tt.switch(tt.eq(m, 0), 0.0, -np.inf), m * tt.log(x))
109
110
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/tensor/var.py in __mul__(self, other)
126 # and the return value in that case
127 try:
--> 128 return theano.tensor.mul(self, other)
129 except (NotImplementedError, TypeError):
130 return NotImplemented
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/graph/op.py in __call__(self, *inputs, **kwargs)
251
252 if config.compute_test_value != "off":
--> 253 compute_test_value(node)
254
255 if self.default_output is not None:
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/graph/op.py in compute_test_value(node)
128 thunk.outputs = [storage_map[v] for v in node.outputs]
129
--> 130 required = thunk()
131 assert not required # We provided all inputs
132
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/graph/op.py in rval()
604
605 def rval():
--> 606 thunk()
607 for o in node.outputs:
608 compute_map[o][0] = True
~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/link/c/basic.py in __call__(self)
1769 print(self.error_storage, file=sys.stderr)
1770 raise
-> 1771 raise exc_value.with_traceback(exc_trace)
1772
1773
ValueError: Input dimension mis-match. (input[0].shape[0] = 1, input[1].shape[0] = 2000)
Versions and main components
- PyMC3 Version: 3.11.2
- Aesara/Theano Version: 1.1.2
- Python Version: 3.9
- Operating system: MacOS 10.15.7
- How did you install PyMC3: conda from conda-forge