0 installation
https://www.pymc.io/projects/docs/en/latest/installation.html
1
2
3
4
5
6
7
8
9
10
| import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pymc as pm
import pytensor.tensor as pt
RANDOM_SEED = 8927
rng = np.random.default_rng(RANDOM_SEED)
az.style.use("arviz-darkgrid")
|
1 Model creation
模型创建:
1
2
3
| with pm.Model() as model:
# Model definition
pass
|
相关方法:
1
| model.compile_logp()(parameters) # 返回参数对应的对数似然函数
|
查看模型内部的相关属性:
1
2
3
| model.basic_RVs # 查看模型内部定义的随机变量
model.free_RVs # 查看模型内部不受其它变量影响的随机变量
model.observed_RVs # 查看受其它随机变量影响的变量
|
example:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| with pm.Model() as model:
mu = pm.Normal("mu", mu=0, sigma=1)
obs = pm.Normal("obs", mu=mu, sigma=1, observed=rng.standard_normal(100))
model.compile_logp()({"mu": 0}) # 查看对数似然函数大小。
Out[25]: array(-152.43813297)
model.basic_RVs
Out[26]: [mu ~ N(0, 1), obs ~ N(mu, 1)]
model.free_RVs
Out[27]: [mu ~ N(0, 1)]
model.observed_RVs
Out[28]: [obs ~ N(mu, 1)]
|
2 Probability Distributions
每个概率模型都包含了观察数据和未观察到的数据。观察到的数据可以使用似然函数进行定义,而未观察到的数据则使用先验分布进行定义,具体的概率分布可以参考这些api: api_distributions
定义未观察到的随机变量
1
2
| with pm.Model():
x = pm.Normal("x", mu=0, sigma=1) # 选择一个分布,然后填写参数
|
可以查看随机变量的密度函数的对数值:
定义有观察值的随机变量
1
2
| with pm.Model():
obs = pm.Normal("x", mu=0, sigma=1, observed=rng.standard_normal(100)) # 和没有观察值的随机变量的差别在于多了一个observed参数
|
定义确定的量
1
2
3
4
5
6
7
8
| with pm.Model():
x = pm.Normal("x", mu=0, sigma=1)
y = pm.Gamma("y", alpha=1, beta=1)
summed = x + y
squared = x**2
sined = pm.math.sin(x) # 可以直接定义,PyMC中会自动转换,但模型不会保存这个变量
plus_2 = pm.Deterministic("x plus 2", x + 2) # 使用这种定义会保存这个变量,方便跟踪数据
|
定义一串随机变量或者高维随机变量
1
2
3
| with pm.Model():
# bad: 这个虽然工作,但速度很慢
x = [pm.Normal(f"x_{i}", mu=0, sigma=1) for i in range(10)]
|
一种替代方法:
1
2
3
4
| coords = {"cities": ["Santiago", "Mumbai", "Tokyo"]}
with pm.Model(coords=coords) as model:
# good:
x = pm.Normal("x", mu=0, sigma=1, dims="cities") # 这样我们就有三个随机变量,且对应不同的名字,具体看后面
|
初始化随机变量
某些情况下,对随机变量定义初始值是非常有意义的。
1
2
3
4
5
| with pm.Model(coords={"idx": np.arange(5)}) as model:
x = pm.Normal("x", mu=0, sigma=1, dims="idx")
model.initial_point()
{'x': array([0., 0., 0., 0., 0.])} # 没有初始值,默认初始值为0
|
设置初始值:
1
2
3
4
5
| with pm.Model(coords={"idx": np.arange(5)}) as model:
x = pm.Normal("x", mu=0, sigma=1, dims="idx", initval=[1,2,3,4,5])
model.initial_point()
{'x': array([1., 2., 3., 4., 5.])}
|
3 Inference
前面的创建模型,还有定义随机变量,还有变量与变量之间的关系,以及观察值,下一步就是对模型进行推断求解了。贝叶斯统计最终的解就是后验分布,PyMC主要通过抽样(sampling)和变分推断(variational inference)的方法进行求解。
抽样
PyMC通过 pm.sample() 函数进行 MCMC 抽样,并且会自动选择一种抽样方法。pm.sample()会返回 arviz.InferenceData 数据,关于这种数据,具体可以查看 ArviZ Quickstart 。
1
2
3
4
5
| with pm.Model() as model:
mu = pm.Normal("mu", mu=0, sigma=1)
obs = pm.Normal("obs", mu=mu, sigma=1, observed=rng.standard_normal(100))
idata = pm.sample(tune=0, draws=500,cores=4, chains=6) # 设置核心数以及生成链数
|
查看后验数据信息:
1
2
3
| idata.posterior.dims # 查看数据信息
idata.posterior["mu"].shape # 获取mu随机变量的后验分布数据
idata.posterior["mu"].sel(chain=2) # 获取第二条链上的mu后验分布信息
|
分析抽样结果
最常见的分析抽样结果的方法是“踪迹图”(trace-plot)
1
2
3
4
5
6
| with pm.Model() as model:
mu = pm.Normal("mu", mu=0, sigma=1)
sd = pm.HalfNormal("sd", sigma=1)
obs = pm.Normal("obs", mu=mu, sigma=sd, observed=rng.standard_normal(100))
idata = pm.sample()
|
四条线分别是四条不同的马尔科夫链所对应的。
what means of R-hat? R-hat, or the potential scale reduction factor, is a diagnostic that attempts to measure whether or not an MCMC algorithm1 has converged flag situations where the MCMC algorithm has failed converge. The basic idea is that you want to check a couple of things:
- Is the distribution of the first part of a chain (after warm up) the same as the distribution of the second half of the chain?
- If I start the algorithm at two different places and let the chain warm up, do both chains have the same distribution?
ref
1
| az.plot_forest(idata, r_hat=True)
|
1
| az.plot_posterior(idata) # for a plot of the posterior that is inspired by [Kruschke, 2014]
|
当为高维的时候,我们将这些变量全部绘制出来十分不方便,如果是用 NUTS方法抽样,可以使用energy plot查看是否收敛:
(这个图的结果看起来不是特别好啊!!!!理论上应该要一致的,可能是数据的问题)
变分推断
PyMC 也支持各种变分推断的方法,MCMC方法的求解更准确,但速度慢。而变分推断则是使用近似的方法进行求解,速度快很多,但会损失精度,使用的函数为 pymc.fit()
1
2
3
4
5
6
| with pm.Model() as model:
mu = pm.Normal("mu", mu=0, sigma=1)
sd = pm.HalfNormal("sd", sigma=1)
obs = pm.Normal("obs", mu=mu, sigma=sd, observed=rng.standard_normal(100))
approx = pm.fit()
|
pm.fit函数返回的对象 Approximation有很多属性方法,比如从后验分布中进行抽样:
1
2
| idata = approx.sample(1000)
az.summary(idata)
|
variational子模块提供了很多方法的接口,并且使用起来很灵活:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| mu = pm.floatX([0.0, 0.0])
cov = pm.floatX([[1, 0.5], [0.5, 1.0]])
# full-rank ADVI. The first form
with pm.Model(coords={"idx": np.arange(2)}) as model:
pm.MvNormal("x", mu=mu, cov=cov, dims="idx")
approx = pm.fit(method="fullrank_advi")
# The second form
with pm.Model(coords={"idx": np.arange(2)}) as model:
pm.MvNormal("x", mu=mu, cov=cov, dims="idx")
approx = pm.FullRankADVI().fit()
plt.figure()
idata = approx.sample(10000)
az.plot_pair(idata, var_names="x", coords={"idx": [0, 1]});
|
Stein Variational Gradient Descent (SVGD) uses particles to estimate the posterior:
1
2
3
4
5
6
7
8
9
10
| w = pm.floatX([0.2, 0.8])
mu = pm.floatX([-0.3, 0.5])
sd = pm.floatX([0.1, 0.1])
with pm.Model() as model:
pm.NormalMixture("x", w=w, mu=mu, sigma=sd)
approx = pm.fit(method=pm.SVGD(n_particles=200, jitter=1.0)) # 两个参数分别对应粒子个数,以及初始粒子标准差
plt.figure()
idata = approx.sample(10000)
az.plot_dist(idata.posterior["x"]);
|
4 Posterior Predictive Sampling
sample_posterior_predictive()函数提供了数据预测和后验分布检查的功能。
pm.MutableData 可以将其它量变为模型中可交互运算的量(PyMC中的运算都是基于符号表达式,如果不变换无法进行计算):
1
2
3
4
5
6
7
8
9
10
11
12
13
| x = rng.standard_normal(100)
y = x > 0
coords = {"idx": np.arange(100)}
with pm.Model() as model:
# create shared variables that can be changed later on
x_obs = pm.MutableData("x_obs", x, dims="idx")
y_obs = pm.MutableData("y_obs", y, dims="idx")
coeff = pm.Normal("x", mu=0, sigma=1)
logistic = pm.math.sigmoid(coeff * x_obs)
pm.Bernoulli("obs", p=logistic, observed=y_obs, dims="idx")
idata = pm.sample()
|
1
2
3
4
5
6
7
8
9
10
11
12
| with model:
# change the value and shape of the data
pm.set_data(
{
"x_obs": [-1, 0, 1.0],
# use dummy values with the same shape:
"y_obs": [0, 0, 0],
},
coords={"idx": [1001, 1002, 1003]},
)
idata.extend(pm.sample_posterior_predictive(idata))
|
1
| idata.posterior_predictive["obs"].mean(dim=["draw", "chain"])
|