Scegliere le distribuzioni a priori#
La definizione delle distribuzioni a priori si riferisce al processo di trasformazione della conoscenza di un particolare dominio in distribuzioni di probabilità. Specificare dei distribuzioni a priori utili è un aspetto centrale della statistica bayesiana. PreliZ è un pacchetto Python progettato per assistere i ricercatori nella scelta delle distribuzioni a priori, fornendo una serie di strumenti per le varie operazioni necessarie per la definizione delle distribuzioini a priori. C
Nell’ambiente virtuale in cui è installato PyMC, possiamo installare PreliZ nel modo seguente:
pip install "preliz[full,lab]"
Preparazione del Notebook#
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pymc as pm
import pymc.sampling_jax
import scipy.stats as stats
import preliz as pz
import arviz as az
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=Warning)
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
KeyboardInterrupt:
The above exception was the direct cause of the following exception:
ImportError Traceback (most recent call last)
Cell In[1], line 6
4 import seaborn as sns
5 import pymc as pm
----> 6 import pymc.sampling_jax
7 import scipy.stats as stats
8 import preliz as pz
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/sampling_jax.py:22
19 import warnings
21 warnings.warn("This module is deprecated, use pymc.sampling.jax", DeprecationWarning)
---> 22 from pymc.sampling.jax import *
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/sampling/jax.py:23
20 from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Union
22 import arviz as az
---> 23 import jax
24 import jax.numpy as jnp
25 import numpy as np
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/jax/__init__.py:39
34 del _cloud_tpu_init
36 # Confusingly there are two things named "config": the module and the class.
37 # We want the exported object to be the class, so we first import the module
38 # to make sure a later import doesn't overwrite the class.
---> 39 from jax import config as _config_module
40 del _config_module
42 # Force early import, allowing use of `jax.core` after importing `jax`.
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/jax/config.py:15
1 # Copyright 2018 The JAX Authors.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
(...)
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 15 from jax._src.config import config as _deprecated_config # noqa: F401
17 # Deprecations
19 _deprecations = {
20 # Added October 27, 2023
21 "config": (
22 "Accessing jax.config via the jax.config submodule is deprecated.",
23 _deprecated_config),
24 }
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/jax/_src/config.py:28
25 from typing import Any, Callable, Generic, NamedTuple, NoReturn, TypeVar
26 import warnings
---> 28 from jax._src import lib
29 from jax._src.lib import jax_jit
30 from jax._src.lib import transfer_guard_lib
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/jax/_src/lib/__init__.py:86
83 cpu_feature_guard.check_cpu_features()
85 import jaxlib.utils as utils
---> 86 import jaxlib.xla_client as xla_client
87 import jaxlib.lapack as lapack
89 import jaxlib.ducc_fft as ducc_fft
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/jaxlib/xla_client.py:32
29 import ml_dtypes
30 import numpy as np
---> 32 from . import xla_extension as _xla
34 # Note this module does *not* depend on any Python protocol buffers. The XLA
35 # Python bindings are currently packaged both as part of jaxlib and as part
36 # of TensorFlow. If we use protocol buffers here, then importing both jaxlib
(...)
43 # Pylint has false positives for type annotations.
44 # pylint: disable=invalid-sequence-index
46 ops = _xla.ops
ImportError: initialization failed
%config InlineBackend.figure_format = 'retina'
RANDOM_SEED = 42
rng = np.random.default_rng(RANDOM_SEED)
az.style.use("arviz-viridish")
Applicazioni#
Con PreliZ possiamo generare un grafico della funzione di densità di una data distribuzione. Per esempio
pz.Beta(2, 5).plot_pdf(pointinterval=True)
<Axes: >
![../_images/7fc4175693542a4aded74c8d82b4d1df9ef5d853f048e76a32b16a35983e279f.png](../_images/7fc4175693542a4aded74c8d82b4d1df9ef5d853f048e76a32b16a35983e279f.png)
Abbiamo ottenuto la PDF in blu, e poiché abbiamo passato l’argomento pointinterval=True
abbiamo ottenuto anche un elemento simile a un box-plot nella parte inferiore, che potrebbe aiutarci a interpretare cosa significa effettivamente un Beta(2, 5). Il punto bianco rappresenta la mediana, la linea più spessa rappresenta l’intervallo interquartile, ossia l’intervallo definito dai quantili 0.25 e 0.75 (o in altre parole il 50% centrale della distribuzione) e la linea più sottile rappresenta l’intervallo definito dai quantili 0.05 e 0.95.
Generiamo qui sotto la PDF della Normale(100, 15):
pz.Normal(100, 15).plot_pdf(pointinterval=True)
<Axes: >
![../_images/8ffdbbe40c8bd046179a7dc1d69267aab4fa0946243f4e7726e1cf420a49cfa9.png](../_images/8ffdbbe40c8bd046179a7dc1d69267aab4fa0946243f4e7726e1cf420a49cfa9.png)
Esaminiamo delle distribuzioni discrete.
pz.Binomial(10, 0.2).plot_pdf();
![../_images/3d6eb7ff586a4cd1fbf1753885c6ef058380ec2472e620878ab07100afa4e7f5.png](../_images/3d6eb7ff586a4cd1fbf1753885c6ef058380ec2472e620878ab07100afa4e7f5.png)
pz.Poisson(3.5).plot_pdf();
![../_images/1c6673c4958928e143863ae2068f34f2089eaac651b00aedd32f03bfdc971cd3.png](../_images/1c6673c4958928e143863ae2068f34f2089eaac651b00aedd32f03bfdc971cd3.png)
Possiamo includere più di una distribuzione nello stesso grafico. Questo può essere utile per confrontare l’impatto dei parametri su una data distribuzione o anche su distribuzioni diverse.
pz.Beta(2, 5).plot_pdf()
pz.Beta(5, 2).plot_pdf(color="C3");
![../_images/9de12bcb3f0d25b97de5a943ec947df02368850873826c8af6d177217e077921.png](../_images/9de12bcb3f0d25b97de5a943ec947df02368850873826c8af6d177217e077921.png)
Possiamo creare la CDF:
pz.Gamma(2, 0.5).plot_cdf()
pz.Gamma(2, 1).plot_cdf(color="C3");
![../_images/72ed23bb34c04915f54d10bc09f93b2eeccd6179bf56b84e45060984a2b63238.png](../_images/72ed23bb34c04915f54d10bc09f93b2eeccd6179bf56b84e45060984a2b63238.png)
È possibile creare un grafico interattivo.
pz.Beta().plot_interactive()
Le distribuzioni di PreliZ sono dei wrappers delle distribuzioni SciPy. Pertanto, possiamo “congelare” una funzione SciPy e poi calolare varie proprietà della distribuzione.
dist = pz.Beta(2, 5)
dist.plot_pdf()
<Axes: >
![../_images/1270c843ea86e7eaab218e5334548d25c9a545a95e8280e765af308d0c20034a.png](../_images/1270c843ea86e7eaab218e5334548d25c9a545a95e8280e765af308d0c20034a.png)
Ad esempio, calcoliamo la media, la mediana, la deviazione standard, l’intervallo che contiene il 95% dell’area sottesa alla funzione.
dist.summary()
Beta(mean=0.29, median=0.26, std=0.16, lower=0.05, upper=0.63)
Calcoliamo l’intervallo a code ugualie o a più alta densità.
dist.eti(), dist.hdi()
((0.05, 0.63), (0.02, 0.57))
Generiamo dei valori casuali dalla distribuzione.
dist.rvs(10)
array([0.26945963, 0.11192815, 0.73949522, 0.36171116, 0.70088142,
0.18099464, 0.12745493, 0.2458155 , 0.17703744, 0.08130133])
Troviamo i quantili della distribuzione.
dist.ppf([0.1, 0.5, 0.9])
array([0.09259526, 0.26444998, 0.51031631])
Possiamo trovare, ad esempio, la distribuzione Gamma con media 4 e tale per cui il 90% della massa della distribuzione sia contenuta tra 1 e 10:
pz.maxent(pz.Gamma(mu=4), 1, 10, 0.9);
![../_images/3234a951504dce80a060852b36117682396e9637160771e018cebbe5d04aa0cd.png](../_images/3234a951504dce80a060852b36117682396e9637160771e018cebbe5d04aa0cd.png)
Per trovare i parametri della distribuzione, poi, usiamo dist
.
# update della distribuzione
dist = pz.Gamma(mu=4)
pz.maxent(dist, 1, 10, 0.9);
# ottengo i parametri della distribuzione
dist.alpha, dist.beta
(2.341680163267631, 0.5854200408169078)
![../_images/3234a951504dce80a060852b36117682396e9637160771e018cebbe5d04aa0cd.png](../_images/3234a951504dce80a060852b36117682396e9637160771e018cebbe5d04aa0cd.png)
pz.Gamma(dist.alpha, dist.beta).plot_pdf()
<Axes: >
![../_images/2c2953d3715adc5f0fb41aa62a2f5d3acac825ec20f24e75833b7e72dea4c0a4.png](../_images/2c2953d3715adc5f0fb41aa62a2f5d3acac825ec20f24e75833b7e72dea4c0a4.png)
Watermark#
%run wp.py
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/IPython/core/magics/execution.py:716, in ExecutionMagics.run(self, parameter_s, runner, file_finder)
715 fpath = arg_lst[0]
--> 716 filename = file_finder(fpath)
717 except IndexError as e:
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/IPython/utils/path.py:90, in get_py_filename(name)
89 return py_name
---> 90 raise IOError("File `%r` not found." % name)
OSError: File `'wp.py'` not found.
The above exception was the direct cause of the following exception:
Exception Traceback (most recent call last)
Cell In[1], line 1
----> 1 get_ipython().run_line_magic('run', 'wp.py')
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/IPython/core/interactiveshell.py:2456, in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
2454 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2455 with self.builtin_trap:
-> 2456 result = fn(*args, **kwargs)
2458 # The code below prevents the output from being displayed
2459 # when using magics with decorator @output_can_be_silenced
2460 # when the last Python token in the expression is a ';'.
2461 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False):
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/IPython/core/magics/execution.py:727, in ExecutionMagics.run(self, parameter_s, runner, file_finder)
725 if os.name == 'nt' and re.match(r"^'.*'$",fpath):
726 warn('For Windows, use double quotes to wrap a filename: %run "mypath\\myfile.py"')
--> 727 raise Exception(msg) from e
728 except TypeError:
729 if fpath in sys.meta_path:
Exception: File `'wp.py'` not found.