{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Причиность"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Библиотеки"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import networkx as nx\n",
"import matplotlib.pylab as plt\n",
"\n",
"from statsmodels.regression.linear_model import OLS\n",
"from causalgraphicalmodels import CausalGraphicalModel\n",
"\n",
"import statsmodels.api as sm\n",
"\n",
"from causality.inference.search import IC\n",
"from causality.inference.independence_tests import MutualInformationTest\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"from causality.estimation.adjustments import AdjustForDirectCauses\n",
"from causality.estimation.nonparametric import CausalEffect"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Warmup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Связь в линейной регрессии"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Fork\n",
"\n",
"Смоделируем зависимости,соответствующие типу Fork.\n",
"\n",
"Заметим, что здесь есть только одна переменная, которая является экзогенной (Z), то есть она не зависит ни от какой другой переменной в системе (не зависит от X, Y, эндогенных переменных).\n",
"\n",
"$$Z->X, Z->Y, X->Y$$"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"rs = np.random.RandomState(42)\n",
"N = 10000\n",
"Z = rs.randn(10000)\n",
"X = 0.5 * Z + rs.randn(10000) \n",
"Y = 0.3 * Z + 0.4 * X + rs.randn(10000)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Построим разные регрессии регрессии Y ~ X, Y ~ Z + X, Y ~ Z. Что можно сказать о 95% доверительном интервале (confint) на коэффициент регрессии перед X в двух этих моделях?"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | y | R-squared (uncentered): | 0.263 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared (uncentered): | 0.263 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 3564. | \n",
"
\n",
"\n",
" Date: | Thu, 22 Apr 2021 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
" Time: | 19:09:56 | Log-Likelihood: | -14486. | \n",
"
\n",
"\n",
" No. Observations: | 10000 | AIC: | 2.897e+04 | \n",
"
\n",
"\n",
" Df Residuals: | 9999 | BIC: | 2.898e+04 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" x1 | 0.5511 | 0.009 | 59.699 | 0.000 | 0.533 | 0.569 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.062 | Durbin-Watson: | 1.995 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.969 | Jarque-Bera (JB): | 0.051 | \n",
"
\n",
"\n",
" Skew: | -0.004 | Prob(JB): | 0.975 | \n",
"
\n",
"\n",
" Kurtosis: | 3.007 | Cond. No. | 1.00 | \n",
"
\n",
"
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"=======================================================================================\n",
"Dep. Variable: y R-squared (uncentered): 0.263\n",
"Model: OLS Adj. R-squared (uncentered): 0.263\n",
"Method: Least Squares F-statistic: 3564.\n",
"Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.00\n",
"Time: 19:09:56 Log-Likelihood: -14486.\n",
"No. Observations: 10000 AIC: 2.897e+04\n",
"Df Residuals: 9999 BIC: 2.898e+04\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"x1 0.5511 0.009 59.699 0.000 0.533 0.569\n",
"==============================================================================\n",
"Omnibus: 0.062 Durbin-Watson: 1.995\n",
"Prob(Omnibus): 0.969 Jarque-Bera (JB): 0.051\n",
"Skew: -0.004 Prob(JB): 0.975\n",
"Kurtosis: 3.007 Cond. No. 1.00\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
"[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"l1 = OLS(Y, np.vstack([X]).T).fit()\n",
"l1.summary()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | y | R-squared (uncentered): | 0.318 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared (uncentered): | 0.318 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 2331. | \n",
"
\n",
"\n",
" Date: | Thu, 22 Apr 2021 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
" Time: | 19:10:48 | Log-Likelihood: | -14096. | \n",
"
\n",
"\n",
" No. Observations: | 10000 | AIC: | 2.820e+04 | \n",
"
\n",
"\n",
" Df Residuals: | 9998 | BIC: | 2.821e+04 | \n",
"
\n",
"\n",
" Df Model: | 2 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" x1 | 0.4266 | 0.010 | 43.099 | 0.000 | 0.407 | 0.446 | \n",
"
\n",
"\n",
" x2 | 0.3134 | 0.011 | 28.467 | 0.000 | 0.292 | 0.335 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 3.320 | Durbin-Watson: | 1.995 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.190 | Jarque-Bera (JB): | 3.163 | \n",
"
\n",
"\n",
" Skew: | 0.007 | Prob(JB): | 0.206 | \n",
"
\n",
"\n",
" Kurtosis: | 2.914 | Cond. No. | 1.63 | \n",
"
\n",
"
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"=======================================================================================\n",
"Dep. Variable: y R-squared (uncentered): 0.318\n",
"Model: OLS Adj. R-squared (uncentered): 0.318\n",
"Method: Least Squares F-statistic: 2331.\n",
"Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.00\n",
"Time: 19:10:48 Log-Likelihood: -14096.\n",
"No. Observations: 10000 AIC: 2.820e+04\n",
"Df Residuals: 9998 BIC: 2.821e+04\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"x1 0.4266 0.010 43.099 0.000 0.407 0.446\n",
"x2 0.3134 0.011 28.467 0.000 0.292 0.335\n",
"==============================================================================\n",
"Omnibus: 3.320 Durbin-Watson: 1.995\n",
"Prob(Omnibus): 0.190 Jarque-Bera (JB): 3.163\n",
"Skew: 0.007 Prob(JB): 0.206\n",
"Kurtosis: 2.914 Cond. No. 1.63\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
"[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"l1 = OLS(Y, np.vstack([X, Z]).T).fit()\n",
"l1.summary()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | y | R-squared (uncentered): | 0.191 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared (uncentered): | 0.191 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 2366. | \n",
"
\n",
"\n",
" Date: | Thu, 22 Apr 2021 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
" Time: | 19:11:09 | Log-Likelihood: | -14948. | \n",
"
\n",
"\n",
" No. Observations: | 10000 | AIC: | 2.990e+04 | \n",
"
\n",
"\n",
" Df Residuals: | 9999 | BIC: | 2.991e+04 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" x1 | 0.5230 | 0.011 | 48.641 | 0.000 | 0.502 | 0.544 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 12.818 | Durbin-Watson: | 1.999 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.002 | Jarque-Bera (JB): | 11.305 | \n",
"
\n",
"\n",
" Skew: | 0.029 | Prob(JB): | 0.00351 | \n",
"
\n",
"\n",
" Kurtosis: | 2.846 | Cond. No. | 1.00 | \n",
"
\n",
"
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"=======================================================================================\n",
"Dep. Variable: y R-squared (uncentered): 0.191\n",
"Model: OLS Adj. R-squared (uncentered): 0.191\n",
"Method: Least Squares F-statistic: 2366.\n",
"Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.00\n",
"Time: 19:11:09 Log-Likelihood: -14948.\n",
"No. Observations: 10000 AIC: 2.990e+04\n",
"Df Residuals: 9999 BIC: 2.991e+04\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"x1 0.5230 0.011 48.641 0.000 0.502 0.544\n",
"==============================================================================\n",
"Omnibus: 12.818 Durbin-Watson: 1.999\n",
"Prob(Omnibus): 0.002 Jarque-Bera (JB): 11.305\n",
"Skew: 0.029 Prob(JB): 0.00351\n",
"Kurtosis: 2.846 Cond. No. 1.00\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
"[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"l1 = OLS(Y, np.vstack([Z]).T).fit()\n",
"l1.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Collider\n",
"\n",
"Рассмотрим collider:\n",
"```\n",
"X -> Z\n",
"X -> Y\n",
"Y -> Z\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"X = rs.randn(N)\n",
"Y = 0.7 * X + rs.randn(N)\n",
"Z = 1.2 * X + 0.6 * Y + rs.randn(N)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | y | R-squared (uncentered): | 0.336 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared (uncentered): | 0.336 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 5053. | \n",
"
\n",
"\n",
" Date: | Thu, 22 Apr 2021 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
" Time: | 19:13:54 | Log-Likelihood: | -14193. | \n",
"
\n",
"\n",
" No. Observations: | 10000 | AIC: | 2.839e+04 | \n",
"
\n",
"\n",
" Df Residuals: | 9999 | BIC: | 2.839e+04 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" x1 | 0.7079 | 0.010 | 71.083 | 0.000 | 0.688 | 0.727 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 3.522 | Durbin-Watson: | 1.985 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.172 | Jarque-Bera (JB): | 3.548 | \n",
"
\n",
"\n",
" Skew: | -0.045 | Prob(JB): | 0.170 | \n",
"
\n",
"\n",
" Kurtosis: | 2.978 | Cond. No. | 1.00 | \n",
"
\n",
"
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"=======================================================================================\n",
"Dep. Variable: y R-squared (uncentered): 0.336\n",
"Model: OLS Adj. R-squared (uncentered): 0.336\n",
"Method: Least Squares F-statistic: 5053.\n",
"Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.00\n",
"Time: 19:13:54 Log-Likelihood: -14193.\n",
"No. Observations: 10000 AIC: 2.839e+04\n",
"Df Residuals: 9999 BIC: 2.839e+04\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"x1 0.7079 0.010 71.083 0.000 0.688 0.727\n",
"==============================================================================\n",
"Omnibus: 3.522 Durbin-Watson: 1.985\n",
"Prob(Omnibus): 0.172 Jarque-Bera (JB): 3.548\n",
"Skew: -0.045 Prob(JB): 0.170\n",
"Kurtosis: 2.978 Cond. No. 1.00\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
"[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"l1 = OLS(Y, np.vstack([X]).T).fit()\n",
"l1.summary()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | y | R-squared (uncentered): | 0.504 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared (uncentered): | 0.504 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 5084. | \n",
"
\n",
"\n",
" Date: | Thu, 22 Apr 2021 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
" Time: | 19:16:08 | Log-Likelihood: | -12730. | \n",
"
\n",
"\n",
" No. Observations: | 10000 | AIC: | 2.546e+04 | \n",
"
\n",
"\n",
" Df Residuals: | 9998 | BIC: | 2.548e+04 | \n",
"
\n",
"\n",
" Df Model: | 2 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" x1 | 0.0107 | 0.015 | 0.725 | 0.468 | -0.018 | 0.040 | \n",
"
\n",
"\n",
" x2 | 0.4311 | 0.007 | 58.298 | 0.000 | 0.417 | 0.446 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.285 | Durbin-Watson: | 2.008 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.867 | Jarque-Bera (JB): | 0.269 | \n",
"
\n",
"\n",
" Skew: | -0.012 | Prob(JB): | 0.874 | \n",
"
\n",
"\n",
" Kurtosis: | 3.010 | Cond. No. | 4.02 | \n",
"
\n",
"
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"=======================================================================================\n",
"Dep. Variable: y R-squared (uncentered): 0.504\n",
"Model: OLS Adj. R-squared (uncentered): 0.504\n",
"Method: Least Squares F-statistic: 5084.\n",
"Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.00\n",
"Time: 19:16:08 Log-Likelihood: -12730.\n",
"No. Observations: 10000 AIC: 2.546e+04\n",
"Df Residuals: 9998 BIC: 2.548e+04\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"x1 0.0107 0.015 0.725 0.468 -0.018 0.040\n",
"x2 0.4311 0.007 58.298 0.000 0.417 0.446\n",
"==============================================================================\n",
"Omnibus: 0.285 Durbin-Watson: 2.008\n",
"Prob(Omnibus): 0.867 Jarque-Bera (JB): 0.269\n",
"Skew: -0.012 Prob(JB): 0.874\n",
"Kurtosis: 3.010 Cond. No. 4.02\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
"[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"l1 = OLS(Y, np.vstack([X, Z]).T).fit()\n",
"l1.summary()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | y | R-squared (uncentered): | 0.504 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared (uncentered): | 0.504 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 1.017e+04 | \n",
"
\n",
"\n",
" Date: | Thu, 22 Apr 2021 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
" Time: | 19:16:43 | Log-Likelihood: | -12730. | \n",
"
\n",
"\n",
" No. Observations: | 10000 | AIC: | 2.546e+04 | \n",
"
\n",
"\n",
" Df Residuals: | 9999 | BIC: | 2.547e+04 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" x1 | 0.4355 | 0.004 | 100.838 | 0.000 | 0.427 | 0.444 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.272 | Durbin-Watson: | 2.008 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.873 | Jarque-Bera (JB): | 0.256 | \n",
"
\n",
"\n",
" Skew: | -0.011 | Prob(JB): | 0.880 | \n",
"
\n",
"\n",
" Kurtosis: | 3.010 | Cond. No. | 1.00 | \n",
"
\n",
"
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"=======================================================================================\n",
"Dep. Variable: y R-squared (uncentered): 0.504\n",
"Model: OLS Adj. R-squared (uncentered): 0.504\n",
"Method: Least Squares F-statistic: 1.017e+04\n",
"Date: Thu, 22 Apr 2021 Prob (F-statistic): 0.00\n",
"Time: 19:16:43 Log-Likelihood: -12730.\n",
"No. Observations: 10000 AIC: 2.546e+04\n",
"Df Residuals: 9999 BIC: 2.547e+04\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"x1 0.4355 0.004 100.838 0.000 0.427 0.444\n",
"==============================================================================\n",
"Omnibus: 0.272 Durbin-Watson: 2.008\n",
"Prob(Omnibus): 0.873 Jarque-Bera (JB): 0.256\n",
"Skew: -0.011 Prob(JB): 0.880\n",
"Kurtosis: 3.010 Cond. No. 1.00\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
"[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"l1 = OLS(Y, np.vstack([Z]).T).fit()\n",
"l1.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### DAGs (Directed Acyclic Graphs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Рассмотрим простой граф"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G = CausalGraphicalModel(nodes=['U', 'Z', 'X', 'Y'], edges=[\n",
" ('U', 'Y'),\n",
" ('U', 'X'),\n",
" ('X', 'Z'),\n",
" ('Z', 'Y')\n",
"])\n",
"G.draw()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Найдем все пути в графе (без циклов)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['X', 'Z', 'Y']]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(nx.all_simple_paths(G.dag, 'X', 'Y'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Все пути, если считать граф не ориентированным"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['X', 'U', 'Y'], ['X', 'Z', 'Y']]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(nx.all_simple_paths(G.dag.to_undirected(), 'X', 'Y'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Рассмотрим все зависимости"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('Z', 'U', {'X'}), ('Y', 'X', {'U', 'Z'})]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G.get_all_independence_relationships()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Сгенерим распределение"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'P(U)P(X|U)P(Z|X)P(Y|U,Z)'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G.get_distribution()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Обусловимся на переменную X"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G.do('X').draw()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G.draw()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"frozenset({frozenset({'U', 'Z'})})"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G.get_all_backdoor_adjustment_sets('Y','X')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"frozenset({frozenset()})"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G.get_all_frontdoor_adjustment_sets('Y','X')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"frozenset({frozenset({'Z'})})"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G.get_all_frontdoor_adjustment_sets('X','Y')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inductive search"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Допустим, у нас есть несколько событий:\n",
"\n",
"1. Продажа мороженного (ICE)\n",
"2. Количество преступлений (CRIMES)\n",
"3. Количество полицейских на тысячу человек (POLICE)\n",
"4. Средняя температура (TEMP)\n",
"5. Количество скачиваний браузера IE (IE)\n",
"6. Количество зараженных компьютеров (COMP)\n",
"7. Уровень загрязнения воздуха (AIR)\n",
"\n",
"P.S. Полная синтетика!"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G = CausalGraphicalModel(nodes=['ICE','CRIMES','POLICE', 'TEMP','IE','COMP', 'AIR'], \n",
"edges=[('TEMP','ICE'), ('TEMP','CRIMES'), ('IE','COMP'), ('POLICE', 'CRIMES')])\n",
"G.draw()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"rs = np.random.RandomState(42)\n",
"police = rs.uniform(low=1, high=100, size=1000)\n",
"temp = rs.uniform(low=-40, high=40, size=1000)\n",
"air = rs.uniform(size=1000)\n",
"crimes = (temp + 40) / police\n",
"ice = temp + 40\n",
"ie = rs.uniform(size=1000)\n",
"comp = ie**2\n",
"dataframe = pd.DataFrame({'police':police, 'temp':temp, 'air':air, 'crimes':crimes, 'ice':ice, 'ie':ie, 'comp':comp})"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" police | \n",
" temp | \n",
" air | \n",
" crimes | \n",
" ice | \n",
" ie | \n",
" comp | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 38.079472 | \n",
" -25.189366 | \n",
" 0.261706 | \n",
" 0.388940 | \n",
" 14.810634 | \n",
" 0.672703 | \n",
" 0.452529 | \n",
"
\n",
" \n",
" 1 | \n",
" 95.120716 | \n",
" 3.352076 | \n",
" 0.246979 | \n",
" 0.455759 | \n",
" 43.352076 | \n",
" 0.796681 | \n",
" 0.634701 | \n",
"
\n",
" \n",
" 2 | \n",
" 73.467400 | \n",
" 29.835667 | \n",
" 0.906255 | \n",
" 0.950567 | \n",
" 69.835667 | \n",
" 0.250468 | \n",
" 0.062734 | \n",
"
\n",
" \n",
" 3 | \n",
" 60.267190 | \n",
" 18.577991 | \n",
" 0.249546 | \n",
" 0.971971 | \n",
" 58.577991 | \n",
" 0.624874 | \n",
" 0.390468 | \n",
"
\n",
" \n",
" 4 | \n",
" 16.445845 | \n",
" 24.524892 | \n",
" 0.271950 | \n",
" 3.923477 | \n",
" 64.524892 | \n",
" 0.571746 | \n",
" 0.326893 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 995 | \n",
" 10.066625 | \n",
" 12.556413 | \n",
" 0.865296 | \n",
" 5.220857 | \n",
" 52.556413 | \n",
" 0.442107 | \n",
" 0.195459 | \n",
"
\n",
" \n",
" 996 | \n",
" 91.814044 | \n",
" 36.529170 | \n",
" 0.157273 | \n",
" 0.833524 | \n",
" 76.529170 | \n",
" 0.334401 | \n",
" 0.111824 | \n",
"
\n",
" \n",
" 997 | \n",
" 14.545044 | \n",
" -34.483359 | \n",
" 0.309788 | \n",
" 0.379280 | \n",
" 5.516641 | \n",
" 0.394572 | \n",
" 0.155687 | \n",
"
\n",
" \n",
" 998 | \n",
" 95.073498 | \n",
" -35.435622 | \n",
" 0.290046 | \n",
" 0.048009 | \n",
" 4.564378 | \n",
" 0.529941 | \n",
" 0.280837 | \n",
"
\n",
" \n",
" 999 | \n",
" 45.154572 | \n",
" -17.425034 | \n",
" 0.871414 | \n",
" 0.499949 | \n",
" 22.574966 | \n",
" 0.161367 | \n",
" 0.026039 | \n",
"
\n",
" \n",
"
\n",
"
1000 rows × 7 columns
\n",
"
"
],
"text/plain": [
" police temp air crimes ice ie comp\n",
"0 38.079472 -25.189366 0.261706 0.388940 14.810634 0.672703 0.452529\n",
"1 95.120716 3.352076 0.246979 0.455759 43.352076 0.796681 0.634701\n",
"2 73.467400 29.835667 0.906255 0.950567 69.835667 0.250468 0.062734\n",
"3 60.267190 18.577991 0.249546 0.971971 58.577991 0.624874 0.390468\n",
"4 16.445845 24.524892 0.271950 3.923477 64.524892 0.571746 0.326893\n",
".. ... ... ... ... ... ... ...\n",
"995 10.066625 12.556413 0.865296 5.220857 52.556413 0.442107 0.195459\n",
"996 91.814044 36.529170 0.157273 0.833524 76.529170 0.334401 0.111824\n",
"997 14.545044 -34.483359 0.309788 0.379280 5.516641 0.394572 0.155687\n",
"998 95.073498 -35.435622 0.290046 0.048009 4.564378 0.529941 0.280837\n",
"999 45.154572 -17.425034 0.871414 0.499949 22.574966 0.161367 0.026039\n",
"\n",
"[1000 rows x 7 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataframe"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"class OLS_test():\n",
" def __init__(self, y, x, z, data, alpha):\n",
" self.regression = sm.OLS.from_formula('{0}~{1}'.format(y[0], '+'.join(x + z)), data)\n",
" self.result = self.regression.fit()\n",
" self.x = x\n",
" self.y = y\n",
" self.z = z\n",
" self.alpha = alpha\n",
" print(y, x, z)\n",
"\n",
" def independent(self):\n",
" to_fisher = '(' + ' ,'.join([x_ + '=0' for x_ in self.x]) + ')'\n",
" return self.result.f_test(to_fisher).pvalue > self.alpha"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['temp'] ['police'] []\n",
"['air'] ['police'] []\n",
"['ice'] ['police'] []\n",
"['crimes'] ['police'] []\n",
"['ie'] ['police'] []\n",
"['comp'] ['police'] []\n",
"['air'] ['temp'] []\n",
"['ice'] ['temp'] []\n",
"['crimes'] ['temp'] []\n",
"['ie'] ['temp'] []\n",
"['comp'] ['temp'] []\n",
"['ice'] ['air'] []\n",
"['crimes'] ['air'] []\n",
"['ie'] ['air'] []\n",
"['comp'] ['air'] []\n",
"['crimes'] ['ice'] []\n",
"['ie'] ['ice'] []\n",
"['comp'] ['ice'] []\n",
"['ie'] ['crimes'] []\n",
"['comp'] ['crimes'] []\n",
"['comp'] ['ie'] []\n",
"['crimes'] ['police'] ['ice']\n",
"['crimes'] ['police'] ['temp']\n",
"['ice'] ['temp'] ['crimes']\n",
"['crimes'] ['temp'] ['ice']\n",
"['crimes'] ['ice'] ['temp']\n",
"['crimes'] ['ice'] ['police']\n",
"['crimes'] ['ice'] ['temp', 'police']\n"
]
}
],
"source": [
"variable_types = {'police' : 'd', \n",
" 'temp' : 'd',\n",
" 'air' : 'c',\n",
" 'ice' : 'd',\n",
" 'crimes' : 'd',\n",
" 'ie':'c',\n",
" 'comp':'c'}\n",
"\n",
"ic_algorithm = IC(OLS_test, alpha=0.05)\n",
"graph = ic_algorithm.search(dataframe, variable_types)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAADuCAYAAAAOR30qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAHPpJREFUeJzt3Xt8VOW97/HPysyQSUiGQBJCQm6V7CZCGyxEyvFGdKu1kd7YUC6Gaqvb9kRs5VVb8aA9ejQvrZ4qaqH0SC0VOEqlrR5qdgWVQGUXNdDCbkmIwSaZEJJMgBACmclc1vkjEsAEcmEmmZDv+6+5PGvNb/GK39fjs57nWYZpmoiIyNCLGOoCRESkkwJZRCRMKJBFRMKEAllEJEwokEVEwoQCWUQkTCiQRUTChAJZRCRMKJBFRMKEtT+NExISzMzMzBCVIiJy6UlISOCtt956yzTNW3pr269AzszMpKysbOCViYiMQIZhJPSlnYYsRETChAJZRCRMKJBFRMKEAllEJEwokEVEwoQCWUQkTPRr2ptIfzS3edi0u46KhlZa3T4cdis5ExzMm55KfEzkUJcnEnYUyBJ0e50trCytYnulCwCPL9D1nd3awLNvV5KfnUjRrCympsUNVZkiYUeBLEG1flc1xSUVuH1+enpco/uTcN6yv5Edlc0sL8ihcGbm4BYpEqYUyBI0nWFcTrs30Gtb04R2r5/iknIAhbIIuqkn/ZSZmcnbb7/d7fO9zhaKSyr6FMZna/cGKC6pYF9dS7BKFBm2FMgSFCtLq3D7/AM61u3zs6q0KsgViQw/CmTps8WLF1NbW8tXvvIVYmJieOqpp9i1axczvvjf+NW/53NozRLcNfu62jdsWMaxHetoWHc/tT+bS9Nrj+Jvb8X1/56m9pl5HF67FF9LI6YJ2w64MAyD559/nssuu4yEhAR+9KMfEQj0r8ctMpwpkKXP1q1bR3p6Ops3b6atrY3bbruNW2+9lRlz7mLS/RsZe8OduP7wBP5Tx7uOObV/B/Gzf0jqPb/B19JAw8v3E5N7E2n3vYotPpWWna8AYHzS/g9/+ANlZWXs2bOHN954g5deemkIrlRkaCiQZcDWr19PQUEB1sxpdPgh6jNfYFRyFu0Hz2zRGpN7I7axyUTYR2O/bDrWsclEZV6BEWEhOucaOhoPAmdmXzzwwAOMGzeO9PR07rvvPl555ZUhuTaRoaBZFjJgNTU1vPbaa5i/ex1v4JM5bgEf9vTcrjYRo8/MMzZskViiz31vdrjPOWdaWlrX64yMDOrr60NUvUj4USBLvxiG0fU6LS2NxYsXM/rGIl7/W3CC0+l0MmXKFABqa2tJSUkJynlFhgMNWUi/JCUl8fHHHwNQWFjI5s2bCdT+jVERJqavA3fNPnytzf0+r93a+af49NNPc+zYMZxOJ8899xzz588Pav0i4UyBLP3y4IMP8vjjjxMXF8fGjRt54403KHvjJQ4+s5C6lXfQ+sHvwez/zIjTi/q+9rWvMX36dK644gpuvfVW7rzzzuBegEgYM8ye1reeR15enqln6klP7l5Xxtbyxh6XS/fGMOBLk5P45eIr+eijj8jKygp+gSJDyDCM3aZp5vXWTj1kCYp78rOwWy0DOtZutVCUrxAWUSBLUExNi2N5QQ5Rtv79SUXZIlhekENuqnZ9E9EsCwma0xsEXWi3t9MMo7NnfPZub/0ZPhO5FCmQJagKZ2aSmxrHqtKqzuXQnFn0AZ2zKUzg+uxEivKz1DMWOYsCWYIuNzWO1YV5HGnzsGlPHU/84mVmXns9yfFx5CTHMneanhgi0hPNspCQczgcOJ1OxowZM9SliAwJzbKQsOB2u/F4PDgcjqEuRSTsKZAlpFwuFwkJCecsuRaRnimQJaRcLhfjx48f6jJEhgUFsoSUy+UiMTFxqMsQGRYUyBJSTU1NCmSRPlIgS0iphyzSdwpkCSmNIYv0nQJZQko9ZJG+UyBLSGkMWaTvFMgSUuohi/SdAllCSoEs0ncKZAkp3dQT6TsFsoSMx+Ohvb1dmwqJ9JG235Sga27zsGl3HXs+biDpm4+w9Ld/I2eCg3nTte2myIVo+00Jmr3OFlaWVrG90gWAp4eN6fOzEymalcXUNG1MLyNHX7ffVA9ZgmL9ruoLPrrp9FNDtuxvZEdl8zmPbhKRTgpkuShTpkzh6/c8zO8bxtDuDfTa3jSh3eunuKQcQKEschbd1JOL8n//tJPfN8T1KYzP1u4NUFxSwb66lhBVJjL8KJDloqwsrcLt8w/oWLfPz6rSqiBXJDJ8achCBqy5zcOv772VcV/+Pvb0z9O663e07duC/9RxbGNTSPy3h7A6EvEecXJ06y/paKgiInoMcdcWMvryazFN2HbAxZE2j2ZfiKBAlouwaXdd1+vWD1/nZPl2xs97BOu4iXib/olhiyTQ4abx1YeJu/Y2xn/zUbxN1TRufBhbYgajEtIxgE176vjudZOG7kJEwoSGLGTAKhpau2ZUtO3dQty1i7HFp2IYBqOSLsMS5aD94AdYx4wnJvcmjAgLoyZMIjr7Kk5VvAd0zr6oOHxiCK9CJHyohywD1ur2db32tzZjHTuhWxvf8SY89ZXUPjv/zIcBP6M/d/1Z5/GGtE6R4UKBLAPmsJ/587E4EvAda2BUYuY5bayOROzpnyNpweMXOI8tVCWKDCsaspABy5ngwDA6X8dMvZmWP6/He/QQpmnS0fRP/O2tRE26Eu/RQ7T9/V1Mvw/T78NzuBJvsxPoXMGXkxw7hFchEj7UQ5YBmzs9lXs/ee248uuYPi+NG39CoL0V27hUEucsx+JwkDT/MY69u4Zj76wB08Q2/jOM+9e7ADCBudNSh+waRMKJ9rKQi3L3ujK2ljf2uFy6N4YBX5qcxOrCXpf4iwxrfd3LQkMWclHuyc/CbrUM6Fi71UJRflaQKxIZvhTIclGmpsWxvCCHKFv//pSibBEsL8ghN1W7vomcpjFkuWinNwi60G5vpxlGZ89Yu72JdKdAlqAonJlJbmocq0qr2HbAhc/nxc+ZoYzT+yFfn51IUX6WesYiPVAgS9DkpsaxujCPI20ebnv4BYyxqaRkTMJht5GTHMvcaXpiiMiFKJAl6OJjIhlT/yHf+GIqCxZcOdTliAwbuqknIeFyuUhMTBzqMkSGFQWyhITL5SIhIWGoyxAZVhTIEhLqIYv0nwJZgs40TY4cOaIeskg/KZAl6FpaWoiOjmbUqFFDXYrIsKJAlqDTcIXIwCiQJegUyCIDo0CWoGtubtb4scgAKJAl6NRDFhkYBbIEnQJZZGAUyBJ0CmSRgVEgS9BpDFlkYBTIEnTqIYsMjAJZgk6BLDIwCmQJOg1ZiAyMAlmCTj1kkYHRBvUSFM1tHjbtruPvdceIvfXHPFxSRc4EB/Om6ykhIn1lmBd6IuWn5OXlmWVlZSEsR4abvc4WVpZWsb3SBYDHF+j67vRz9PKzEymalcXUND1HT0YmwzB2m6aZ11s79ZBlwNbvqr7gk6bdn4Tzlv2N7Khs1pOmRXqhMWTplw0bNnDzzTd/EsbltHt7DuOzmSa0e/0Ul5Szflf1oNQpMhxpyEL6ba+zhQUv7qLd6+/3sVE2CxvvnkluqoYvZOTo65CFesjSZz6fD4CVpVW4ff0PYwC3z8+q0qpgliVyyVAgCwBOp5M5c+aQmJhIfHw8S5YsYe3atVx99dUsXbqU+Ph4HnnkEV5Y/SLr/sftXcMUNU/O5sSeNzn0y3+n9pl5tOxYh/fYYRrW3U/tM/Nwvf4kpt/b9TsnP/qAtffPZ0xcHFdddRX79u3r+u6nP/0pEydOJDY2luzsbN55553B/mcQGVK6qSf4/X5mz57NDTfcwLp167BYLJSVlVFVVcX777/PggULaGxsxOv18r1HX+h2fPvHe0i+4zl8rS4Or/0BnkPlxH/lfiz2WBrW3c/J/TuI+fy/0tFwkCMlz5E6/3+ybPGtRNf+ha9+9ascOHCA6upqfv7zn/Phhx+SkpJCdXU1fv/AeuEiw5V6yMIHH3xAfX09Tz/9NKNHj8Zut3PNNdcAkJKSwr333ovVaiUqKorDx9v59F0Hx8x/IyIymlGJGYxKyMD+mWnY4iYQYR+N/bLpdDQeBODE3j8Re8UtGEmfpbLpFLfffjuRkZHs2rULi8WCx+Nh//79eL1eMjMzmTRp0iD/S4gMLQWy4HQ6ycjIwGrt/j9MaWlp57xv9wa6tbGMPnODzrBFYok+973Z4QbAd7yJ1g9ep/bZ+az69rXExcXhdDqpr68nKyuLFStW8MgjjzB+/HgWLFhAfX19sC5RZFhQIAtpaWnU1tZ23bQ7m2EY57yPsg38T8bqSGTMVd8kfelGin79Z1paWjh16hQLFy4EYNGiRbz33nvU1NRgGAYPPPDAgH9LZDhSIAszZswgOTmZZcuWcfLkSdxuNzt37uyxbfKYKIwev+ldzNQvceKv/4HZWEn2hBhOnjzJm2++yYkTJzhw4ADvvvsuHo8Hu91OVFQUERH685SRRX/xgsViYfPmzVRVVZGenk5qaiobN27ssW1e5tgB/05k8r8Q/+V7afzTL3jga3lkZWWxdu1aADweD8uWLSMhIYEJEybQ1NTEE088MeDfEhmOtDBE+u3udWVsLW/sdYVeTwwDvjQ5idWFvc6RF7lkaGGIhMw9+VnYrZYBHWu3WijKzwpyRSKXBgWy9NvUtDiWF+T0+wZflC2C5QU5WjYtch5aGCIDcnrXtgvt9naaYXT2jLXbm8iFKZBlwApnZpKbGseq0iq2HXDh8XjAYuv6/vR+yNdnJ1KUn6WesUgvFMhyUXJT41hdmMeRNg/T5hZx87xvE7BG4rDbyEmOZe40PTFEpK8UyBIUjsgIDr+7jlWbV2Oz2Xo/QES60U09CYpDhw6RlJSkMBa5CApkCYqamhoyMjKGugyRYU2BLEGhQBa5eApkCQoFssjFUyBLUCiQRS7eoM6yaG7zsGl3HRUNrbS6fTjsVnImOJg3XVOjhrvq6mrmzp071GWIDGuDEsh7nS2sLK1ie6ULAI/vzCbndmsDz75dSX52IkWzspiapsUDw1FNTQ2ZmZlDXYbIsBbyQF6/q/qCy2vdn4Tzlv2N7Khs1vLaYSgQCOB0OklPTx/qUkSGtZCOIXeGcTnt3u5h3PaPbTS++nDXe9OEdq+f4pJy1u+qDmVZEmSNjY04HA6io6OHuhSRYS1kPeS9zhaKSyp6fAYbQMyU64mZcn23z9u9AYpLKshNjdPeB8OEbuiJBEfIesgrS6tw+wb2GPf2jg5WlVYFuSIJFQWySHCEpIfc3OZhe6UL04Tjf3mNtr1v4T91HGtsAnHXLSY6+yra9r1N274tTCh8CoCaJ2cz7qbv0Vr2BmYgwLZ7f82RNo9mXwwDCmSR4AhJIG/aXXfmB8Ymk3TbT7HEjOVUxXs0//FnpEzM6fG4Ux/tYsK3nsGwjsIANu2p47vXTQpFiXKRzp7C+J9NCYxzpLN6+0FNYRS5CCEJ5IqG1q6pbaNzrun6fPTl13H8L6/RUV/Z43GOmfOwRMUCnbMvKg6fCEV5chF6nMJoS6LJDSvertQURpGLEJJAbnX7ul63/dc7tH74Or7jTQCYHe3421sxjO7D11ZHwqfO4w1FeTJAmsIoElohCWSHvfO0vuNNHPnTCyQtKCZyYg5GhIX6l+4FzvO8H8P41Hm0lWO4ODOFsedZM2c7ewojoFAW6aOQzLLImeAg0hpBwOsGDCzRYwBo27cVr6umT+ewWyPISY4NRXnSTz1NYax5cjbeY/UAHPnTz2nZ+Uq3405PYdxX1zJotYoMZyEJ5LnTUwEYlZCOY8Y3aFh3P3UvLKbDVU1k6uQ+ncME5k5LDUV50k+9TWGMv2UJcVcv7PE7t8+vKYwifRSSIYuEmEhmfTaRreWNjJ31LcbO+laP7WJyb+x6nbHsj12vDaPzwZi6Wz/0zp7COBCmCdsOuDSFUaQPQrYw5J78LOxWy4COtVstFOVnBbki6UlmZiZPPPEEkydPZuzYsXz729/G7XYD8OKLLzI5+7NU/e9v0rTpf+E7caTHczT/8VmO7VjX9f5U5S7qX7qX2mfmcWj1XZw6WMamPXUcP36cO++8k+TkZCZOnMhDDz2E3z+wxUMil6KQBfLUtDiWF+QQZevfT0TZIlhekKNl04Now4YNvPXWWxw8eJDKykoef/xx3n33XR588EFuvPcJUpesw+oYT/MbT/V6Lk/9AZrffIax13+HtKUbSbrtSQIxiVQcPsEdd9yB1WqlqqqKv/71r2zZsoU1a9YMwhWKDA8h3VyocGYmywsuJ8pm+fQEim4MA6JsFpYXXK678oNsyZIlpKWlMW7cOJYvX84rr7zChg0b+M53voM9+V8wrDbi8m/HU1+Br6Xxgudq27eVmM/fRNRnvoBhRGCNTcAWn0ZjYwMlJSWsWLGC0aNHM378eJYuXcqrr746SFcpEv5Cvv1m4cxMclPjWFVaxbYDLgzOzFeFztkUJp1jxkX5WeoZD4G0tLSu1xkZGdTX11NfX8+0adPo+GQKY8SoKCKiYvG1HcEal3Tec/laXURNyuv2uXmiGa/XS3JyctdngUDgnN8WGekGZYP63NQ4VhfmcaTNw6Y9dVQcPkGr24vDbiMnOZa507Tcdig5nc6u17W1taSkpJCSkkJNTQ05n7uFSGsD7adOEWg/gTUm/oLnsjoS8R1rOOczuzWCL0yexOuRkTQ3N2O1DuqDakSGjUH9LyM+JlJ7U4ShlStXMnv2bKKjoykuLmb+/PnccMMNLFy4kN9+fS4Bn5eW7b8hMiX7gr1jgJjcm2jc+BOisq7EnpGLv+0oHr+HO2+6gZ0338wPf/hDHnvsMWJiYvjnP/9JXV0ds2bNGqQrFQlvesipsGjRIm6++WYuu+wyJk2axEMPPcSNN97IY489xl2LF+J8YTG+lgYSvvrjXs8VmZJNQsEPOPbOGpzPzqdxw4NMju2c8vbyyy/T0dHRNaNj7ty5HD58eBCuUGR4MMx+TDDNy8szy8rKQliODLbMzEzWrFnDjTfeeN42e50tLHhxF+3e/k9Ri7JZ2Hj3TN0bkBHNMIzdpml2v7nyKeohS680hVFkcOjuivTJ6amIF9rt7TTD6Fzco93eRPpHgTzCVVdX97mtpjCKhJYCWfpFUxhFQkeBLAOiKYwiwaebeiIiYUKBLCISJhTIIiJhQoEsIhImFMgiImFCgSwiEiYUyCIiYUKBLCISJhTIIiJhQoEsIhImFMgiImFCgSwiEiYUyCIiYUKBLCISJhTIIiJhQoEsIhImFMgiImFCgSwiEiYUyCIiYUKBLCISJhTIIiJhQoEsIhImFMgiImFCgSwiEiYUyCIiYUKBLCISJhTIIiJhQoEsIhImFMgiImFCgSwiEiYUyCIiYUKBLCISJhTIIiJhQoEsIhImFMgiImFCgSwiEiasQ12AiMhQaG7zsGl3HRUNrbS6fTjsVnImOJg3PZX4mMghqUmBLCIjyl5nCytLq9he6QLA4wt0fWe3NvDs25XkZydSNCuLqWlxg1qbAllERoz1u6opLqnA7fNjmt2/d38Szlv2N7KjspnlBTkUzswctPoUyCIyInSGcTnt3kCvbU0T2r1+ikvKAQYtlHVTT0QueXudLRSXVPQpjM/W7g1QXFLBvrqWEFV2LgWyiFzyVpZW4fb5B3Ss2+dnVWlVkCvqmQJZRC4JTqeTOXPmkJiYSHx8PEuWLCEQCPDgTx7h10sKqH3uNpo3/4yA+yQAvpZGap6cTdu+rdStvAPns/M58dcSPIcrqf/VEmqfnc/RLb/ANGHbARc/X/0iV199NUuWLGHMmDHk5OTwzjvvBPUaFMgiMuz5/X5mz55NRkYG1dXVHDp0iAULFrB27Vp+9atfk7r4SSZ+bw0Br5ujW1efc6yn/gATv/t/SPj6Axx9+0WO/+dvSVrwOCl3reRkxXu4a/8LA/iw5hjvv/8+kyZNorm5mUcffZQ5c+Zw9OjRoF2HAllEhr0PPviA+vp6nn76aUaPHo3dbueaa65hw4YNTLllEWZsEhGjohg763ZOlu/ADJwZvhhz9QIM6yiiPjONCJud0ZOvwzI6DmtsAvbUKXQ0fozbF+Bwi5vx48dz3333YbPZmD9/PtnZ2bz55ptBuw4FsogMe06nk4yMDKzWcyeO1dfXY3WM73pvHTMeAn78J491fWYZPbbrtWEbhSU67pz3gY52ANxePxMnTsQwjK7vMzIyqK+vD9p1KJBFZNhLS0ujtrYWn893zucpKSn4Wpu63vtaXRBhOSeE+8pus3Do0CHMsyYw19bWkpKSMvDCP0WBLCLD3owZM0hOTmbZsmWcPHkSt9vNzp07WbhwIf946xUiTjQR6GinZftvGH35tRgRln6d326NIDnOTlNTE88//zxer5fXXnuN8vJyCgoKgnYdWhgiIsOexWJh8+bNfP/73yc9PR3DMFi0aBErVqygqrqWnz3/YwK+DqIum8bYm77X7/ObwJUZY6n64hf56KOPSEhIICkpiU2bNhEfHx+06zDMntYPnkdeXp5ZVlYWtB8XERkMd68rY2t5Y4/LpXtjGPClyUnM9P2dNWvW8N577w3gHMZu0zTzemunIQsRueTdk5+F3dq/YYrT7FYLRflZQa6oZwpkEbnkTU2LY3lBDlG2/kVelC2C5QU55KYOzq5vGrIQkRGjt93eTjOMzp5xsHZ76+uQhW7qiciIUTgzk9zUOFaVVrHtgAuDM1tuQudsChO4PjuRovysQesZn6ZAFpERJTc1jtWFeRxp87BpTx0Vh09wvL2Dktd/x9L//i0Kr87SE0NERAZTfEwk371uUtf7K1fewxfHfGPIwhh0U09EBIDJkyfzj3/8Y0hrUCCLiABTpkxh//79Q1qDAllEBPWQRUTChnrIIiJhIiMjg6NHj9La2jpkNSiQRUSAiIgIcnJyKC8vH7IaNO1NRARobvMQO2MOj7/jJLEiAofdSs4EB/Ompw7aVDgFsoiMaHudLawsrWJ7pQvfmM/jPxEBFZ2b2tutDTz7diX52YkUzcpialpoV+4pkEVkxOq+t8W5o7inl1Vv2d/IjsrmoO1tcT4KZBEZkTrDuJx2b6DXtqYJ7V4/xSWd48uhCmXd1BOREWevs4XikoquMK5fU4S7Zl+vx7V7AxSXVLCvriUkdSmQRWTEWVlahdvn73qfctcq7Bm5fTrW7fOzqrQqJHUpkEVkRGlu87C90jWgxzlB5/DFtgMujrR5glsYCmQRGWE27a7r9lndqu/QXv03TDPA8b+8xqHVd+FcsRDX60/ibz/Rrb0BbNrT/TwXS4EsIiNKRUMrHl/PN/JOlG3m1Ee7SFr0JKlLXibCHsPRLb/o1s7tC1BxuHtQXywFsoiMKK1u33m/O/G3/yDuusVYHQkYVhtjrlnEqQM7MQP+bm1b3d6g16ZpbyIyojjs5489/3EXrt8Xg3FWX9WIwH/yGNbYhE+dxxb02hTIIjKi5ExwEGlt6HHYwuJIIL7gB9hTJ1/wHHZrBDnJsUGvTUMWIjKizJ2eet7vYq/4Mi3bX8Z3vHPptP/UcU5V7urWzgTmTjv/eQZKPWQRGVESYiKZ9dlEtpY3dpv6FnvlVwGTxo0P4287iiV6DNGXX0v0Z2d2tTGMzqdSh2LDIQWyiIw49+Rn8eePmmn3dt6sSy16qes7x4xv4JjxjfMea7daKMrPCkldGrIQkRFnalocywtyiLL1LwKjbBEsL8ghNzU0u76phywiI9LpDYLO3e2tZ4bR2TPWbm8iIiFSODOT3NQ4VpVWse2AC4MzW25C52wKk84x46L8rJD1jE9TIIvIiJabGsfqwjyOtHnYtKeOisMnaHV7cdht5CTHMneanhgiIjKo4mMi+e51k4a0Bt3UExEJEwpkEZEwoUAWEQkTCmQRkTChQBYRCRMKZBGRMGGY/XiwlGEYLqAmdOWIiFxymgFM07ylt4b9CmQREQkdDVmIiIQJBbKISJhQIIuIhAkFsohImFAgi4iECQWyiEiYUCCLiIQJBbKISJhQIIuIhIn/D3v5FPt9d0QDAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"nx.draw_networkx(graph)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"G = CausalGraphicalModel(nodes=['ICE','CRIMES','POLICE', 'TEMP','IE','COMP', 'AIR'], \n",
"edges=[('TEMP','ICE'), ('TEMP','CRIMES'), ('IE','COMP'), ('POLICE', 'CRIMES')])\n",
"G.draw()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"EdgeView([('police', 'crimes'), ('temp', 'ice'), ('ice', 'crimes'), ('ie', 'comp')])"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"graph.edges"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Задача про курение"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" smoke | \n",
" tar | \n",
" cancer | \n",
"
\n",
" \n",
" \n",
" \n",
" 654 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 350 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 670 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 721 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 278 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" smoke tar cancer\n",
"654 0 0 1\n",
"350 1 1 1\n",
"670 0 0 1\n",
"721 0 0 1\n",
"278 1 1 0"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"smoke = []\n",
"cancer = []\n",
"tar = []\n",
"\n",
"smoke += [1] * 323\n",
"cancer += [0]*323\n",
"tar+=[1]*323\n",
"\n",
"smoke+=[0]\n",
"cancer+=[0]\n",
"tar+=[1]\n",
"\n",
"smoke+=[1]*57\n",
"cancer+=[1]*57\n",
"tar+=[1]*57\n",
"\n",
"smoke+=[0]*19\n",
"cancer+=[1]*19\n",
"tar+=[1]*19\n",
"\n",
"\n",
"smoke+=[1]*18\n",
"cancer+=[0]*18\n",
"tar+=[0]*18\n",
"\n",
"smoke+=[0]*38\n",
"cancer+=[0]*38\n",
"tar+=[0]*38\n",
"\n",
"smoke+=[1]*2\n",
"cancer+=[1]*2\n",
"tar+=[0]*2\n",
"\n",
"smoke+=[0]*342\n",
"cancer+=[1]*342\n",
"tar+=[0]*342\n",
"\n",
"dataframe = pd.DataFrame({'smoke':smoke, 'tar':tar, 'cancer':cancer})\n",
"dataframe.sample(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Строим граф"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"g = nx.DiGraph()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"g.add_nodes_from(['smoke','cancer','tar'])\n",
"g.add_edges_from([('tar','cancer'),('smoke','tar')])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAADuCAYAAAAOR30qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAHdJJREFUeJzt3XlcVPX+P/DXwLApIAqobEmCSeCwBCrmgktqmdcWl4tmhdIduJCp13bvvfpN7Wrdq5ZaPtK8VpZp5nXFJBVxBdlXUTEXFlmVTRhgZs7vD4tf5orOcM7MvJ6Ph48HzJxz5n0eDx6vx8vPnDkjEwQBREQkPjOxByAiohsYyEREEsFAJiKSCAYyEZFEMJCJiCSCgUxEJBEMZCIiiWAgExFJBAOZiEgi5O3Z2MnJSfD09NTTKERExiktLa1KEATne23XrkD29PREamrqg09FRGSCZDLZpfvZjksWREQSwUAmIpIIBjIRkUQwkImIJIKBTEQkEQxkIiKJYCATEUkEA5mISCLa9cEQIiJDU9XQjG1pxSgoq0OdSg17azl8etpjcrA7HG2txB7vJgxkIjJKWUU1WHO4EIlnKwEAzWpt23PW8jKsOHAWw/s6IybMGwEeDmKNeRMGMhEZnU1JF7EkrgAqtQaCcOvzql/DOT6/HEfOVmH+OB9MD/Xs2CFvg4FMREblRhifRlOr9p7bCgLQ1KrBkrjTACB6KPNNPSIyGllFNVgSV3BfYfx7Ta1aLIkrQHZxjZ4muz8MZCIyGmsOF0Kl1jzQviq1Bp8dLmzXPmq1+oFe604YyEQkeUVFRXjxxRfh7OwMR0dHvP766zh//jxGjhwJR0dHODk5YdKfw3Eo+0LbmnHxZzNRm7wdpV++jssrpqByxzII6pa2YzaeTULphlm4vHwySta+hsbzaUg4U4kLJRWIjIyEi4sL3Nzc8Pe//x0azY2Q37hxIwYPHoy5c+fC0dERCxcu1Ol5MpCJSNI0Gg3Gjx+PXr164eLFiygpKUF4eDgEQcB7772H0tJSnD59GlkFv6D6yLc37dtYcBTdp/wf3KK/REvlBTTkHAAANJeeQdXe5eg6YiY85m5Bj5eWQt6lO2QAXgifDrlcjsLCQmRkZCA+Ph7r169vO2ZycjJ69+6N8vJyzJ8/X6fnyjf1iEjSTp06hdLSUnz88ceQy29E1pAhQwAA3t7eAABnZ2f4jA7H/m/WoMvv9rULngC5nSMAoJP3ALSU/wIAaMj+GbaK0bB5NAgAILdzAgBcr61GYdJhnIzfCRsbG3Tu3Blz587FF198gaioKACAq6srZs2adWM/uW4jlIFMRJJWVFSEXr163RJ+5eXlmD17No4ePYr6+no0taghWHa+aRtz265tP8vkVhAargIA1HWVsPEKueW11LUV0GjUcHFxaXtMq9XCw8Oj7fff/6xrXLIgIknz8PDA5cuXb3kD7f3334dMJkNOTg7q6urw9OuLAdzmouPbkNs7Q32t7JbHze2dYS63RFVVFWpqalBTU4O6ujrk5eW1bSOTyR7qfO6GgUxEkjZgwAC4uLjg3XffxfXr16FSqXD8+HHU19fD1tYWXbp0QUlJCfL3f4v7jUpb/9FoyDmApouZEAQt1PVVaK0ugq2DE/wGDMG8efNQV1cHrVaL8+fPIzExUa/n+BsGMhFJmrm5OXbv3o3CwkI88sgjcHd3x5YtW7BgwQKkp6ejS5cuePbZZ/Hq1Cn3fUwr175wGjcb1w6uR9GKP6P82/egrq2AAGDHlu/Q0tICX19fdO3aFZMmTcKVK1f0d4K/IxNu97nCOwgJCRH4rdNEJCXNzc0oKipCfn4+1uZqcLrB8rYfl74XmQwY69sDa6ffurb8sGQyWZogCPc8MBsyERmk1atXo2vXrujcuTMUCgWee+45/LJ3Lazl5g90PGu5OWKGe+t4yvZhIBORQerfvz8aGxuh0WigUqlgaWmJuG8+x/xxPrCxaF+02ViYYf44H/i7i3vXN172RkQGqa6uDpaWltBoNLCwsEBMTAx69+6N3r1vPH+3u739Ria70Yylcrc3NmQiMih1dXVQKpV47bXXsHnzZri6usLKyuqmjzFPD/XEFmUoxvr2gJXcDNbym6POWm4GK7kZxvr2wBZlqCTCGGBDJiIDsn//fiiVSjz99NPIycmBvb09vLy8UFlZCTs7u5u29Xd3wNrpIahuaMa29GIUXKlHnaoV9tYW8HGxw6QnpPeNIbzKgogkr7a2FvPmzcOBAwewbt06jB49WuyR2oVXWRCRUfjpp5+gUCggl8uRnZ1tcGHcHlyyICJJqqmpwbx583Dw4EFs2LABTz31lNgj6R0bMhFJzr59+6BQKGBpaYmcnByTCGOADZmIJKSmpgZ/+9vfkJCQgI0bN2LUqFFij9Sh2JCJSBLi4uKgUChgY2OD7OxskwtjgA2ZiER27do1zJ07F0eOHMHXX3+NESNGiD2SaNiQiUg0e/fuhUKhgK2tLbKzs006jAE2ZCISwbVr1zBnzhwcO3YMmzZtwvDhw8UeSRLYkImoQ+3evRv9+vWDvb09srKyGMa/w4ZMRB3i6tWrmD17Nk6cOIHvvvsOYWFhYo8kOWzIRKR3u3btgkKhQLdu3ZCdnc0wvgM2ZCLSm+rqasyePRtJSUnYvHkzhg0bJvZIksaGTER6sWPHDigUCjg5OSErK4thfB/YkIlIp6qrqzFr1iykpKRgy5YtGDp0qNgjGQw2ZCLSmf/9739QKBTo2bMnsrKyGMbtxIZMRA+tqqoKs2bNQlpaGn744QcMHjxY7JEMEhsyET2U7du3Q6FQwNXVFZmZmQzjh8CGTEQPpLKyErNmzUJGRgZ+/PFHPPnkk2KPZPDYkImo3bZt2wZ/f394eHggMzOTYawjbMhEdN8qKysRGxuL7OxsbN++HYMGDRJ7JKPChkxE9+WHH36AQqGAp6cnMjIyGMZ6wIZMRHdVUVGB2NhY5ObmYseOHQgNDRV7JKPFhkxEtyUIArZu3Qp/f3/07t0bGRkZDGM9Y0MmoluUl5cjNjYW+fn52LlzJwYOHCj2SCaBDZmI2giCgO+//x4BAQHo06cP0tPTGcYdiA2ZiAAAZWVliImJwZkzZ7Br1y4MGDBA7JFMDhsykYkTBAHfffcdAgIC4OPjg7S0NIaxSNiQiUxYWVkZ/vrXv+LcuXPYs2cP+vfvL/ZIJo0NmcgECYKAb7/9FgEBAfD19UVaWhrDWALYkIlMzJUrVxAdHY1ffvkFe/fuRUhIiNgj0a/YkIlMhCAI2LRpEwICAuDv74/U1FSGscSwIROZgNLSUkRHR+PixYvYt28fgoODxR6JboMNmciICYKAr7/+GoGBgQgMDERqairDWMLYkImMVGlpKaKionD58mXs378fQUFBYo9E98CGTGRkBEHAV199hcDAQAQHByMlJYVhbCDYkImMSElJCZRKJUpKShAfH4/AwECxR6J2YEMmMgKCIGDjxo0ICgrCgAEDcOrUKYaxAWJDJjJwxcXFUCqVuHLlCn7++WcEBASIPRI9IDZkIgMlCAI2bNiAoKAgDBo0CKdOnWIYGzg2ZCIDVFRUBKVSifLychw8eBD+/v5ij0Q6wIZMZEAEQcCXX36JJ554AoMHD0ZycjLD2IiwIRMZiMuXL+Mvf/kLqqqqcOjQISgUCrFHIh1jQyaSOEEQsH79egQHB2PYsGFISkpiGBspNmQiCfutFVdXVyMhIQH9+vUTeyTSIzZkIgkSBAFffPEFgoODERYWhqSkJIaxCWBDJpKYS5cu4bXXXkNNTQ1bsYlhQyaSCEEQsHbtWgQHB2PUqFE4efIkw9jEsCETScDFixcRGRmJ+vp6HDlyBL6+vmKPRCJgQyYSkVarxeeff46QkBCMGTMGJ06cYBibMDZkIpFcuHABkZGRuH79OlsxAWBDJupwWq0Wn332Gfr3749nnnkGx48fZxgTADZkog71yy+/IDIyEiqVCseOHYOPj4/YI5GEsCETdQCtVovVq1djwIABGD9+PMOYbosNmUjPzp8/j8jISLS0tOD48ePo27ev2CORRLEhE+mJVqvFqlWrMHDgQEyYMAFHjx5lGNNdsSET6UFhYSEiIyOhVqtx4sQJPPbYY2KPRAaADZlIh7RaLT799FOEhobi+eefx5EjRxjGdN/YkIl0pLCwEDNnzoRWq8XJkyfRp08fsUciA8OGTPSQtFotVq5cidDQUEycOBGJiYkMY3ogbMhED+HcuXOYOXMmALAV00NjQyZ6ABqNBitWrMCgQYMwefJktmLSCTZkonY6c+YMZs6cCblcjqSkJHh7e4s9EhkJNmSi+6TRaPCf//wHgwcPRnh4OBISEhjGpFNsyET3oaCgADNmzICVlRWSk5Ph5eUl9khkhNiQie5Co9Hg3//+N4YMGYLp06fj0KFDDGPSGzZkojv4rRVbW1vj1KlT6N27t9gjkZFjQyb6A41Gg48++ghDhgzByy+/jIMHDzKMqUOwIRP9zunTpzFjxgx06tQJKSkpePTRR8UeiUwIGzIRALVajWXLlmHYsGGIiIjAgQMHGMbU4diQyeTl5+cjIiIC9vb2SElJgaenp9gjkYliQyaTpVarsXTpUoSFhSEyMhI///wzw5hExYZMJikvLw8RERFwcHBAamoqevXqJfZIRGzIZFrUajU+/PBDDB8+HEqlEvHx8Qxjkgw2ZDIZubm5iIiIQLdu3ZCWloZHHnlE7JGIbsKGTEavtbUVS5YswYgRIxAdHY39+/czjEmS2JDJqOXk5CAiIgLOzs5sxSR5bMhklFpbW7F48WKMHDkSMTEx2LdvH8OYJI8NmYxOdnY2IiIi0KNHD6Snp8PDw0PskYjuCxsyGY3W1lZ88MEHGDVqFGbNmoW4uDiGMRkUNmQyCpmZmZgxYwZcXFyQkZEBd3d3sUciajc2ZDJoLS0tWLhwIUaPHo3Zs2dj7969DGMyWGzIZLAyMzMREREBNzc3ZGZmws3NTeyRiB4KGzIZnJaWFixYsABjxozB3LlzsWfPHoYxGQU2ZDIoGRkZiIiIgIeHBzIyMhjEZFTYkMkgtLS04J///CfGjh2LN998E7t372YYk9FhQ6YOU9XQjG1pxSgoq0OdSg17azl8etpjcrA7HG2t7rhfWloaZsyYAU9PT2RmZsLV1bUDpybqOAxk0rusohqsOVyIxLOVAIBmtbbtOWt5GVYcOIvhfZ0RE+aNAA+Htueam5uxaNEirFu3DsuXL8e0adMgk8k6fH6ijsJAJr3alHQRS+IKoFJrIAi3Pq/6NZzj88tx5GwV5o/zwfRQT6SmpmLGjBno3bs3MjMz4eLi0sGTE3U8BjLpzY0wPo2mVu09txUEoKlVg8Vxp7Fz5y4c2bAEK1aswNSpU9mKyWTwTT3Si6yiGsx4eiCunktv136qVi1StY/g+/gTXKIgk8NAJr1Yc7gQt1mhuD/mFticVaXLcYgMApcsSOeqGprx/UdvQ1NbicptHwAyM3QZHI6WK+egKs6DoG6BpfOj6DY2BpbON74+qWrPCsgsLKGurUBzUS7iJv8T1S8G3vXqCyJjw0AmnduWVgyX599C4+U8OI57AzaegQCAhqx4OI6bDZm5Ba4d/i+qdv8brjNXte13PT8R3ScvhJXbAljJtNiWXoyoYV5inQZRh+OSBelcQVndTZe2/cY2YAzMrDpBJreAw5BpaK24AK3qetvznbwHwtrdFzKZGVogR8GV+o4cm0h0bMikc3Uq9S2PCVoNao58g8aCY9A01gKyG11A01QHM+vOAABze+c/HKdV/8MSSQgDmXTO3vrXP6vfXSFxPT8RjeeS0D18MeRdekBovo6ileHA79/6+8MVFfbWFh0wLZF0cMmCdM6npz2s5GYw7+wAdU0ZAEBoaYLM3ALmNvYQWptxLfHrux7DWm4GHxe7jhiXSDIYyKRzk4Jv3CC+S+hk1J7Ygssr/gxNUz3k9t1RvOZVlK7/K6xc+971GAKASU/wRvNkWrhkQTqjUqlQVlaG/Px8DPN2wgFNKDo9FnrH7W0Vo9p+dho/t+1nmQwY0deZl7yRyWFDpodSWVmJxx57DJ07d0bnzp3h7e2NZ599FiN7qmEtN3+gY1rLzREz3FvHkxJJHwOZHkq3bt1gZWWFxsZGaLVaCIKAadOmYerYJzF/nA9sLNr3J2ZjYYb543zg7+5w742JjAwDmR5KRUUFXFxc2u45YWdnhzVr1gAApod6Yv64x2FjYf7HCyhuIZMBNhbmmD/ucUwP9dTz1ETSxECmB6LVarF27Vr4+/ujf//+iI2NBQB88skncHD4/+12eqgntihDMda3B6zkZrCW3/wnZy03g5XcDGN9e2CLMpRhTCaNb+pRu+Xm5iIqKgqCICAhIQH9+vVDY2MjvL298corr9yyvb+7A9ZOD0F1QzO2pRej4Eo96lStsLe2gI+LHSY9cfdvDCEyFTLhdncNv4OQkBAhNTVVj+OQlDU1NWHx4sX44osvsGjRIiiVSpiZ8T9ZRPcik8nSBEEIudd2bMh0Xw4ePIjo6GgEBQUhOzub3+BBpAcMZLqryspKzJs3D4mJiVizZg3Gjx8v9khERov/36TbEgQBGzduRL9+/eDk5IS8vDyGMZGesSHTLc6ePYvo6GjU1tYiLi4OwcHBYo9EZBLYkKlNS0sLFi1ahCeffBJ/+tOfkJyczDAm6kBsyAQAOHbsGJRKJby8vJCeno5HHnlE7JGITA4D2cRdu3YN77zzDvbu3YtPPvkEEydO5Dc9E4mESxYmShAEfP/99/Dz84NcLkd+fj4mTZrEMCYSERuyCbpw4QJiYmJQXFyMH3/8EYMGDRJ7JCICG7JJaW1txccff4z+/fsjLCwM6enpDGMiCWFDNhGnTp2CUqlE9+7dkZycDC8vL7FHIqI/YEM2cnV1dXjjjTfw3HPP4a233sL+/fsZxkQSxUA2Yjt27ICfnx+uX7+O3NxcvPTSS3zTjkjCuGRhhIqLizFr1iycPn0amzZtQlhYmNgjEdF9YEM2IhqNBqtWrUJQUBACAgKQlZXFMCYyIGzIRiIrKwtKpRLW1tY4evQofHx8xB6JiNqJDdnAXb9+HW+//TZGjx4NpVKJhIQEhjGRgWIgG7CffvoJCoUCJSUlyMnJQWRkJL/Bg8iAccnCAJWXl2POnDlITk7G559/jrFjx4o9EhHpAOuUAdFqtVi3bh0UCgV69eqF3NxchjGREWFDNhD5+fmIiopCa2srDhw4AH9/f7FHIiIdY0OWOJVKhX/84x8ICwtDeHg4jh8/zjAmMlJsyBKWkJCAqKgoKBQKZGZmws3NTeyRiEiPGMgSVF1djTfffBMHDx7E6tWrMWHCBLFHIqIOwCULCREEAd988w38/PzQpUsX5OXlMYyJTAgbskQUFhYiOjoa1dXV2LNnD0JCQsQeiYg6GBuyyFpaWvDhhx8iNDQUzzzzDFJSUhjGRCaKDVlEJ06cgFKpRK9evZCamgpPT0+xRyIiETGQRVBTU4P33nsPO3fuxMqVKzF58mTep5iIuGTRkQRBwA8//AA/Pz8IgoD8/HxMmTKFYUxEANiQO8ylS5cQGxuLCxcuYOvWrRg8eLDYIxGRxLAh65larcby5csRHByMQYMGISMjg2FMRLfFhqxHqampUCqV6Nq1K06ePIk+ffqIPRIRSRgbsh7U19djzpw5GD9+PObMmYMDBw4wjInonhjIOrZr1y74+fmhpqYGubm5eOWVV/imHRHdFy5Z6EhJSQneeOMN5OTkYOPGjRg5cqTYIxGRgWFDfkgajQZr1qxBYGAgfH19kZ2dzTAmogfChvwQsrOzoVQqIZfLkZiYCF9fX7FHIiIDxob8ABobG/Huu+9i1KhRmDlzJo4cOcIwJqKHxkBup/j4eCgUCly8eBE5OTlQKpX8pmci0gkuWdyniooKzJ07FydOnMBnn32GZ555RuyRiMjIsNrdgyAI2LBhA/r16wdXV1fk5uYyjIlIL9iQ76KgoABRUVFobGxEfHw8AgMDxR6JiIwYG/JtNDc3Y+HChRgyZAgmTpyIpKQkhjER6R0b8h8kJiYiKioKjz/+ODIzM+Hu7i72SERkIhjIv7p69SreeustxMfH49NPP8ULL7wg9khEZGJMfslCEAR8++238PPzQ6dOnZCXl8cwJiJRmHRDPn/+PGJiYlBWVoYdO3Zg4MCBYo9ERCbMJBtya2srli5dioEDB2LUqFFITU1lGBOR6EyuISclJUGpVMLV1RUpKSl49NFHxR6JiAiACQVybW0t3n//fWzfvh3Lly9HeHg471NMRJJi9EsWgiDgxx9/hJ+fH1paWpCXl4epU6cyjIlIcoy6IRcVFSE2Nhbnzp3D5s2bMXToULFHIiK6I6NsyBqNBitXrkRQUBBCQkKQmZnJMCYiyZNkQ65qaMa2tGIUlNWhTqWGvbUcPj3tMTnYHY62VnfdNz09HUqlEnZ2djh+/Dj69u3bQVMTET0cSQVyVlEN1hwuROLZSgBAs1rb9py1vAwrDpzF8L7OiAnzRoCHw037NjQ0YMGCBdi0aROWLVuGV199levERGRQJLNksSnpIsLXJeHn0+VoVmtvCmMAUP36WHx+OcLXJWFT0sW25/bu3Yt+/fqhsrISubm5iIiIYBgTkcGRREPelHQRS+JOo6lVe89tBQFoatVgSdxp1NbWIfHLxUhPT8f69evx1FNPdcC0RET6IXogZxXVYElcwX2F8e9dOfQN3t5WgpefH4ucr76CjY2NniYkIuoYoi9ZrDlcCJVa80D7yszMAN8xDGMiMgqiBnJVQzMSz1ZCEB78GAlnKlHd0Ky7oYiIRKK3QF62bBnc3NxgZ2eHvn374uDBg1i4cCEmT56M6dOnw87ODgH+/mipLkbtya0o+vQlFK+JQNOF9LZjqOurUbHtAxStDEfJ2r+gPvOnW15HBmDLqQuYOnUqJk6ciJaWFmi1WixduhReXl5wdHTElClTcPXqVX2dKhGRTuglkM+cOYPVq1cjJSUF9fX12L9/Pzw9PQEAu3fvxssvv4xr167B3r0Pir77ByAIcI/9Cl0Gh6P6pzVtx6na+RHM7Zzg/vrXcH7hPdQkfo2mi1k3vVZjUxM+fvMvsLKywtatW2FpaYlVq1Zhx44dSExMRGlpKbp27YrY2Fh9nCoRkc7oJZDNzc3R3NyM/Px8tLa2wtPTE15eXgCAoUOHYuzYsZDL5XB7YiS0jbWwD50EmbkcnR8fBk1tObSqBqjrKtFcchpdh0dAJreEZY/esA0Yg+u5h9peR9vciIqtC9DJ0RX//e9/YW5uDgBYu3YtlixZAnd3d1hZWWHhwoXYtm0b1Gq1Pk6XiEgn9BLI3t7eWLlyJRYuXIju3bsjPDwcpaWlAIAePXq0bWdv2wlmNvaQmd0IUpnFjU/haVtU0DRchZm1LcysOrVtL+/SHZqG6rbfm0vPoLXiAp58MfKm644vXbqEF154AQ4ODnBwcMDjjz8Oc3NzlJeX6+N0iYh0Qm9ryNOmTcOxY8dw6dIlyGQyvPPOO7ds49G1E+70+Q1z227QqhqgbW5se0xdVwlzW8e2320eDUK3wVOwZ2nMTWHr4eGBffv2oaampu2fSqWCm5ub7k6QiEjH9LaGfOjQITQ3N8Pa2ho2NjYwM7v1pYY95nTHY8jtnWHl5oOaxK8gqFvQUnEBDVk/o3O/ETdt1+3JyYh45SWMGjUKVVVVAIDo6GjMnz8fly5dAgBUVlZi586dOjxDIiLd00sgNzc3491334WTkxN69uyJiooK/Otf/7pluy42lrC2ML9jS3aa8DbUtRUoXv0KKrcvgcPQabDxDLxpmxF9nfGvRf+H559/Hk899RSuXr2K2bNnY8KECRgzZgzs7OwQGhqK5ORkfZwqEZHOyIR2XAQcEhIipKam6nSArKIahK9LQlNr+z8cYmNhji3KUPi7O9x7YyIikchksjRBEELutZ3on9QL8HDA/HE+sLFo3yg2FmaYP86HYUxERkP0e1kAwPRQTwDAkrgCqNSau35yTyYDrOXmmD/Op20/IiJjIIlABm6Esr+7Az47XIiEM5WQ4cYtN39jLTeDgBtrxjHDvdmMicjoSCaQAcDf3QFrp4eguqEZ29KLUXClHnWqVthbW8DHxQ6Tnrj3N4YQERkqSQXybxxtrRA1zEvsMYiIOpTob+oREdENDGQiIolgIBMRSQQDmYhIIhjIREQSwUAmIpIIBjIRkUQwkImIJKJdd3uTyWSVAC7pbxwiIqPUSxAE53tt1K5AJiIi/eGSBRGRRDCQiYgkgoFMRCQRDGQiIolgIBMRSQQDmYhIIhjIREQSwUAmIpIIBjIRkUT8PxXR2+20+WF5AAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"nx.draw_networkx(g)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Иследуем причинность"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'smoke'}"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"adjustment = AdjustForDirectCauses()\n",
"admissable_set = adjustment.admissable_set(g,['tar'], ['cancer'])\n",
"admissable_set"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['smoke', 'tar', 'cancer'], dtype='object')"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataframe.columns"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/andrey/Library/Python/3.7/lib/python/site-packages/statsmodels/nonparametric/kernel_density.py:475: RuntimeWarning: invalid value encountered in log\n",
" L += func(f_i)\n"
]
}
],
"source": [
"effect = CausalEffect(dataframe.sample(299), ['smoke'], ['cancer'], \n",
" variable_types={'smoke': 'u', 'cancer': 'u', 'tar': 'u'},\n",
" admissable_set=[])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"smoke = pd.DataFrame({'smoke': [1], 'cancer': [1]})\n",
"nsmoke = pd.DataFrame({'smoke': [0], 'cancer': [1]})"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.5173014808941588"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"effect.pdf(smoke) - effect.pdf(nsmoke)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}