pwd‘d:\\python\\exerise-df\\df-data-analysis’
from scipy import stats
import pandas as pd
import numpy as np
from statsmodels.formula.api import ols
import statsmodels.api as sm
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt单变量分析分析
dat = pd.read_csv("simple-resgreesion.csv")dat.head()N  | weight  | |
0  | 58  | 115  | 
1  | 59  | 117  | 
2  | 60  | 120  | 
3  | 61  | 123  | 
4  | 62  | 126  | 
model = ols('weight ~ N',dat).fit()print(model.summary())OLS Regression Results                            
==============================================================================
Dep. Variable:                 weight   R-squared:                       0.991
Model:                            OLS   Adj. R-squared:                  0.990
Method:                 Least Squares   F-statistic:                     1433.
Date:                Wed, 27 Sep 2017   Prob (F-statistic):           1.09e-14
Time:                        14:49:40   Log-Likelihood:                -26.541
No. Observations:                  15   AIC:                             57.08
Df Residuals:                      13   BIC:                             58.50
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept    -87.5167      5.937    -14.741      0.000      -100.343   -74.691
N              3.4500      0.091     37.855      0.000         3.253     3.647
==============================================================================
Omnibus:                        2.396   Durbin-Watson:                   0.315
Prob(Omnibus):                  0.302   Jarque-Bera (JB):                1.660
Skew:                           0.789   Prob(JB):                        0.436
Kurtosis:                       2.596   Cond. No.                         982.
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.多项式回归分析
dat2 = pd.read_csv("duoxiangshi.csv")dat2.head()N  | weight  | |
0  | 58  | 115  | 
1  | 59  | 117  | 
2  | 60  | 120  | 
3  | 61  | 123  | 
4  | 62  | 126  | 
mod = ols('weight ~ N + I(N**2)',dat2).fit()print(mod.summary())OLS Regression Results                            
==============================================================================
Dep. Variable:                 weight   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 1.139e+04
Date:                Wed, 27 Sep 2017   Prob (F-statistic):           2.13e-20
Time:                        14:59:57   Log-Likelihood:                -5.2563
No. Observations:                  15   AIC:                             16.51
Df Residuals:                      12   BIC:                             18.64
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept    261.8782     25.197     10.393      0.000       206.979   316.777
N             -7.3483      0.778     -9.449      0.000        -9.043    -5.654
I(N ** 2)      0.0831      0.006     13.891      0.000         0.070     0.096
==============================================================================
Omnibus:                        2.449   Durbin-Watson:                   1.144
Prob(Omnibus):                  0.294   Jarque-Bera (JB):                1.033
Skew:                           0.049   Prob(JB):                        0.597
Kurtosis:                       1.718   Cond. No.                     1.09e+06
==============================================================================多变量回归分析
dat = pd.read_csv("mul-regression.csv")dat.head()x1  | x2  | x3  | x4  | y  | |
0  | 30.8  | 33.0  | 50.0  | 90  | 520.8  | 
1  | 23.6  | 33.6  | 28.0  | 64  | 195.0  | 
2  | 31.5  | 34.0  | 36.6  | 82  | 424.0  | 
3  | 19.8  | 32.0  | 36.0  | 70  | 213.5  | 
4  | 27.7  | 26.0  | 47.2  | 74  | 403.3  | 
mod = ols('y ~ x1 + x2 + x3 + x4',dat).fit()print(mod.summary())OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.894
Model:                            OLS   Adj. R-squared:                  0.866
Method:                 Least Squares   F-statistic:                     31.78
Date:                Wed, 27 Sep 2017   Prob (F-statistic):           3.66e-07
Time:                        14:52:33   Log-Likelihood:                -97.454
No. Observations:                  20   AIC:                             204.9
Df Residuals:                      15   BIC:                             209.9
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept   -625.3583    114.378     -5.467      0.000      -869.150  -381.566
x1            15.1962      2.127      7.146      0.000        10.663    19.729
x2             7.3785      1.889      3.907      0.001         3.353    11.404
x3             9.5034      1.342      7.082      0.000         6.643    12.364
x4            -0.8468      1.493     -0.567      0.579        -4.029     2.335
==============================================================================
Omnibus:                        0.492   Durbin-Watson:                   1.620
Prob(Omnibus):                  0.782   Jarque-Bera (JB):                0.578
Skew:                          -0.294   Prob(JB):                        0.749
Kurtosis:                       2.409   Cond. No.                     1.38e+03
==============================================================================









