Analyses of NLSY data set¶

Ground work¶

In [1]:
import pandas as pd
import numpy as np
import sys
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn import preprocessing
import scipy.stats as stats
import corrstats # allows for comparison of 2 correlation coefficients
import a12 # provides Vargha and Delaney's A (non-parametric common language effect size)
import geometric
import seaborn as sns
sns.set_style("whitegrid")
In [2]:
def cohen_d(x,y):
        return (np.mean(x) - np.mean(y)) / math.sqrt((np.std(x, ddof=1) ** 2. + np.std(y, ddof=1) ** 2.) / 2.)
In [3]:
# Load data, excluding "non-response" data
path = 'C:\\Users\\cadlab\\Desktop\\xian\\research\\projects\\risk-delay\\'
df = pd.read_csv(path+'6-17-15.csv', na_values=[-5,-4,-3,-2,-1])
In [4]:
# Rename (some) columns to something sensible
df.rename(columns={'T0961700' : 'PATIENCE_MONTH',\
                   'T0962000' : 'PATIENCE_YEAR',\
                   'T0960500' : 'RISK_10K',\
                   'T0961100' : 'RISK_1K',\
                   'R4395800' : 'RISK_INCOME_MID_RISK',\
                   'R4395900' : 'RISK_INCOME_MORE_RISK',\
                   'R4396000' : 'RISK_INCOME_LESS_RISK',\
                   'T3094800' : 'RISK',\
                   'T3094900' : 'DRIVING',\
                   'T3094901' : 'FINANCIAL',\
                   'T3094902' : 'OCCUPATION',\
                   'T3094903' : 'HEALTH',\
                   'T3094904' : 'FAITH',\
                   'T3094905' : 'ROMANCE',\
                   'T3094906' : 'MAJOR_LIFE',\
                   'T3095000' : 'BETS',\
                   'T0911100' : 'ALCOHOL',\
                   'T0911400' : 'BINGE',\
                   'T2074900' : 'CIGARETTE',\
                   'R3914300' : 'AGE_START_SMOKE',\
                   'R3914200' : 'SMOKE_100_CIGS',\
                   'H0020800' : 'EVER_DEPRESSED',\
                   'H0021000' : 'DEPRESSED_IN_LAST_YEAR',\
                   'H0008600' : 'WORRY_ANX_DEPRESS',\
                   'R6430800' : 'COCAINE',\
                   'R6431200' : 'CRACK',\
                   'R6432000' : 'SEDATIVES',\
                   'R6432100' : 'TRANQUILIZERS',\
                   'R6432200' : 'STIMULANTS',\
                   'R6432300' : 'PAINKILLERS',\
                   'R6432400' : 'INHALANTS',\
                   'R6432500' : 'HALLUCINOGENS',\
                   'R6432600' : 'HEROIN',\
                   'R6432700' : 'STEROIDS',\
                   'R6432800' : 'INJECTIONS',\
                   'R6432900' : 'MDMA',\
                   'R6433000' : 'METH',\
                   'R0006500' : 'SES',\
                   'T0988600' : 'EDU',\
                   'T0989000' : 'AGE',\
                   'T0990700' : 'NJOBS',\
                   'T0988200' : 'EMPLOYED',\
                   'T0910800' : 'MEMORY',\
                   'T0899200' : 'TRYING_LOSE_WEIGHT',\
                   'T0897300' : 'WEIGHT',\
                   'T0897400' : 'HEIGHT_FT',\
                   'T0897500' : 'HEIGHT_IN',\
                   'T0897600' : 'EXERCISE_VIG_FREQ',\
                   'T0897700' : 'EXERCISE_VIG_UNIT',\
                   'T0898100' : 'EXERCISE_MOD_FREQ',\
                   'T0898200' : 'EXERCISE_MOD_UNIT',\
                   'T0898800' : 'DOC_VISIT',\
                   'R0988000' : 'AGE_FIRST_SEX',\
                   'T0899900' : 'HEALTH_INSUR',\
                   'T2176700' : 'CCDEBT',\
                   'T2181800' : 'CCMISSPAY',\
                   'R8417600' : 'CCMAX04',\
                   'T2181900' : 'CCMAX08',\
                   'R6945200' : 'RETIREMENT',\
                   'R8046500' : 'INVEST_STOCK',\
                   'R8046501' : 'INVEST_PRIV_BOND',\
                   'R8046502' : 'INVEST_GOV_BOND',\
                   'T4100000' : 'EMERGENCY_FUND',\
                   'T4100300' : 'FIT_LIT_4',\
                   'T4100400' : 'FIT_LIT_5',\
                   'T4100500' : 'FIT_LIT_6',\
                   'T4100600' : 'FIT_LIT_7',\
                   'T4100700' : 'FIT_LIT_8',\
                   'T2463100' : 'ENTREPRENEUR',\
                   'R0214800' : 'SEX',\
                   'R0214700' : 'RACE',\
                   'T0987900' : 'POVERTY',\
                   'T0987800' : 'INCOME',\
                   'R0618301' : 'IQ',\
                   'R0305100' : 'SHOPLIFT',\
                   'R0305200' : 'STEAL_SMALL',\
                   'R0305300' : 'STEAL_LARGE',\
                   'R0307800' : 'CONVICT',\
                   'R0305000' : 'FIGHT'\
                   }, inplace=True)

Questionable responses¶

In [5]:
# Frequeny of 0 responses to various combinations of the risk items
print(len(df.query('(RISK_10K > 0) & (RISK_1K > 0)')))
print(len(df.query('(RISK_10K == 0) | (RISK_1K == 0)')))
print(len(df.query('(RISK_10K == 0) & (RISK_1K == 0)')))
print(len(df.query('(RISK_10K == 0) & (RISK_1K == 0) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_10K == 0) & (RISK_1K == 0) & (PATIENCE_YEAR == 0)')))
5860
1189
688
263
249
In [6]:
print(len(df.query('(RISK_1K >= 0) & (PATIENCE_MONTH >= 0)')))
print(len(df.query('(RISK_1K == 0) | (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_1K == 0) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_1K > 0) & (PATIENCE_MONTH > 0)')))
7006
1416
330
5648
In [7]:
print(len(df.query('(RISK_10K == 0) | (PATIENCE_YEAR == 0)')))
print(len(df.query('(RISK_10K > 0) & (PATIENCE_YEAR > 0)')))
1336
5653
In [8]:
# Frequeny of 0 responses to various combinations of the delay items
print(len(df.query('(PATIENCE_YEAR > 0) & (PATIENCE_MONTH > 0)')))
print(len(df.query('(PATIENCE_MONTH == 0)')))
print(len(df.query('(PATIENCE_YEAR == 0)')))
print(len(df.query('(PATIENCE_YEAR == 0) & (PATIENCE_MONTH == 0)')))
6164
801
697
498
In [9]:
print(len(df.query('(RISK_10K == 0) & (RISK_1K == 0) & (PATIENCE_YEAR == 0) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(PATIENCE_YEAR > 0) | (PATIENCE_MONTH > 0)')))
print(len(df.query('(RISK_10K > 0) | (RISK_1K > 0)')))
214
6774
6504
In [10]:
# Frequeny of maximal responses to various combinations of the beh. econ. items
print(len(df.query('(RISK_10K == 10000) & (RISK_1K == 1000)')))
print(len(df.query('(RISK_10K == 10000) | (RISK_1K == 1000)')))
print(len(df.query('(RISK_10K == 10000) & (RISK_1K == 1000) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_10K == 10000) & (RISK_1K == 1000) & (PATIENCE_YEAR == 0)')))
print(len(df.query('(RISK_10K == 10000) & (RISK_1K == 1000) & (PATIENCE_MONTH == 0) & (PATIENCE_YEAR == 0)')))
956
1699
88
70
46
In [11]:
# Correlation of raw responses
print(df['PATIENCE_MONTH'].corr(df['PATIENCE_YEAR']))
print(df['RISK_1K'].corr(df['RISK_10K']))
0.498386157731
0.674114469672

Data cleaning¶

In [12]:
data = df.copy()

# Convert zero responses on beh. econ. items to NaN
data.replace({'RISK_1K': {0: np.nan}}, inplace=True)
data.replace({'RISK_10K': {0: np.nan}}, inplace=True)
data.replace({'PATIENCE_MONTH': {0: np.nan}}, inplace=True)
data.replace({'PATIENCE_YEAR': {0: np.nan}}, inplace=True)

# Modify DOC_VISIT so that it's monotonic with recency of doctor visit
data.replace({'DOC_VISIT': {0: 6}}, inplace=True)

# Modify EMPLOYED so that it's only employed/not
data.replace({'EMPLOYED': {2: 0}}, inplace=True)
data.replace({'EMPLOYED': {3: np.nan}}, inplace=True)
data.replace({'EMPLOYED': {4: np.nan}}, inplace=True)

Transforming Variables¶

In [13]:
# Log transform the patience items
data['PATIENCE_MONTH'] = np.log(data['PATIENCE_MONTH'].values)
data['PATIENCE_YEAR'] = np.log(data['PATIENCE_YEAR'].values)

# Log transform income
# some response income as $0, so add $1 before transforming
print(len(data.query('(INCOME == 0)')))
data['INCOME'] = np.log(1+data['INCOME'])
289

Composite Variables¶

In [14]:
# Construct a substance use variable from idividual items
data['DRUG'] = (
    (data['COCAINE'] > 0) |
    (data['CRACK'] > 0) |
    (data['STIMULANTS'] > 0) | 
    (data['PAINKILLERS'] > 0) |
    (data['INHALANTS'] > 0) |
    (data['HALLUCINOGENS'] > 0) |
    (data['HEROIN'] > 0) |
    (data['MDMA'] > 0) |
    (data['METH'] > 0)
    )
data['DRUG'] = data['DRUG'].astype(float)

# Construct a maxed-out credit card variable
data['CCMAX'] = (data['CCMAX04'] > 0)
data['CCMAX'] = data['CCMAX'].astype(float)
In [15]:
# compute BMI
data['BMI'] = 703.06958 * data['WEIGHT']/((data['HEIGHT_FT'] * 12) + data['HEIGHT_IN'])**2
data['BMI'] = np.log(data['BMI'])
p = plt.hist(data.dropna(subset = ['BMI'])['BMI'].values, 100)

Deal with Exercise¶

In [16]:
exerData = data.copy()
exerData['EXERCISE'] = exerData['EXERCISE_VIG_FREQ']
exerData[exerData['EXERCISE_VIG_FREQ'] == 5] = 0.
exerData[exerData['EXERCISE_VIG_FREQ'] == 2] = exerData[exerData['EXERCISE_VIG_UNIT'] == 2] / 7.
exerData[exerData['EXERCISE_VIG_FREQ'] == 3] = exerData[exerData['EXERCISE_VIG_UNIT'] == 3] / 30.
exerData[exerData['EXERCISE_VIG_FREQ'] == 4] = exerData[exerData['EXERCISE_VIG_UNIT'] == 4] / 365.
exerData['EXERCISE'] = exerData['EXERCISE'].astype(float)
labels = ['EXERCISE']
p = plt.hist(exerData.dropna(subset = [labels])[labels].values, 10, range=[0,10])
print(len(exerData[exerData['EXERCISE'] == 0]))
print(len(exerData[exerData['EXERCISE'] > 0]))
2993
2787
In [17]:
# convert exercise to a binary variable
data['EXERCISE'] = (exerData['EXERCISE']> 0)
data['EXERCISE'] = data['EXERCISE'].astype(float)
labels = ['EXERCISE']
p = plt.hist(data.dropna(subset = [labels])[labels].values, 2, range=[0,1])

Deal with Income Risk¶

In [18]:
# how many would accept/decline the intial job offer
print(len(data[data['RISK_INCOME_MID_RISK'] == 0]))
print(len(data[data['RISK_INCOME_MID_RISK'] == 1]))
5193
3774
In [19]:
# how many would accept/decline the worse job offer
print(len(data[data['RISK_INCOME_MORE_RISK'] == 0]))
print(len(data[data['RISK_INCOME_MORE_RISK'] == 1]))
1504
2257
In [20]:
# how many would accept/decline the better job offer
print(len(data[data['RISK_INCOME_LESS_RISK'] == 0]))
print(len(data[data['RISK_INCOME_LESS_RISK'] == 1]))
4162
1035
In [21]:
# Bin Rs by how they responded to the income risk items (branching logic)

data.loc[data.loc[:, 'RISK_INCOME_LESS_RISK']==0, 'RISK_INCOME'] = -1.5
data.loc[data.loc[:, 'RISK_INCOME_LESS_RISK']==1, 'RISK_INCOME'] = -.5
data.loc[data.loc[:, 'RISK_INCOME_MORE_RISK']==0, 'RISK_INCOME'] = .5
data.loc[data.loc[:, 'RISK_INCOME_MORE_RISK']==1, 'RISK_INCOME'] = 1.5
In [22]:
labels = ['RISK_INCOME']
p = plt.hist(data.dropna(subset = [labels])[labels].values, 4, range=[-2,2])
In [23]:
print(data['RISK_INCOME'].corr(data['RISK_1K']))
print(data['RISK_INCOME'].corr(data['RISK_10K']))
print(data['RISK_INCOME'].corr(data['PATIENCE_MONTH']))
print(data['RISK_INCOME'].corr(data['PATIENCE_YEAR']))
0.00614310914635
0.0055190069437
0.00241784387466
-0.00482971088635
In [24]:
g = sns.PairGrid(data, y_vars=['RISK_1K', 'RISK_10K', 'PATIENCE_MONTH', 'PATIENCE_YEAR'], x_vars='RISK_INCOME')
g.map(sns.pointplot)
Out[24]:
<seaborn.axisgrid.PairGrid at 0xb5d5b00>

Inspect Variables¶

In [25]:
labels = ['BINGE']
p = plt.hist(data.dropna(subset = [labels])[labels].values, 10, range=[0,10])
In [26]:
p = plt.hist(data.dropna(subset = ['AGE_FIRST_SEX'])['AGE_FIRST_SEX'].values, 25)
In [27]:
p = plt.hist(data.dropna(subset = ['AGE_START_SMOKE'])['AGE_START_SMOKE'].values, 35)
In [28]:
data.replace({'AGE_START_SMOKE': {0: np.nan}}, inplace=True)
In [26]:
fig, axes = plt.subplots(nrows=1, ncols=2)
p = sns.violinplot(data.dropna(subset = [['RISK_1K']])[['RISK_1K']], ax=axes[0])
p.axes.set_ylim(0,1000)
p = sns.violinplot(data.dropna(subset = [['RISK_10K']])[['RISK_10K']], ax=axes[1])
p.axes.set_ylim(0,10000)
C:\Users\cadlab\AppData\Local\Enthought\Canopy\User\lib\site-packages\seaborn\categorical.py:2296: UserWarning: The violinplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[26]:
(0, 10000)
In [27]:
labels = ['PATIENCE_MONTH', 'PATIENCE_YEAR']
plt.figure()
for i in range(len(labels)):
    p = sns.distplot(data.dropna(subset = [[labels[i]]])[[labels[i]]], hist=False, label=labels[i])
plt.xlim(0, 16)
plt.legend()
Out[27]:
<matplotlib.legend.Legend at 0xe998198>
In [28]:
# RISK_1K as related to income risk
fig, axes = plt.subplots(nrows=1, ncols=2)
takers = data[data['RISK_INCOME'] > 0]
nontakers = data[data['RISK_INCOME'] < 0]
print(np.mean(takers['RISK_1K']))
p = sns.violinplot(takers.dropna(subset=[['RISK_1K']])[['RISK_1K']], ax=axes[0])
p.axes.set_ylim(0,1000)
print(np.mean(nontakers['RISK_1K']))
p = sns.violinplot(nontakers.dropna(subset=[['RISK_1K']])[['RISK_1K']], ax=axes[1])
p.axes.set_ylim(0,1000)
print(stats.ttest_ind(takers.dropna(subset=[['RISK_1K']])['RISK_1K'], \
                      nontakers.dropna(subset=[['RISK_1K']])['RISK_1K'], equal_var=False))
615.327634195
613.467079347
Ttest_indResult(statistic=0.25463761126139706, pvalue=0.79901262051257471)
In [29]:
# RISK_10K as related to income risk
fig, axes = plt.subplots(nrows=1, ncols=2)
print(np.mean(takers['RISK_10K']))
p = sns.violinplot(takers.dropna(subset=[['RISK_10K']])[['RISK_10K']], ax=axes[0])
p.axes.set_ylim(0,10000)
print(np.mean(nontakers['RISK_10K']))
p = sns.violinplot(nontakers.dropna(subset=[['RISK_10K']])[['RISK_10K']], ax=axes[1])
p.axes.set_ylim(0,10000)
print(stats.ttest_ind(takers.dropna(subset=[['RISK_10K']])['RISK_10K'], \
                      nontakers.dropna(subset=[['RISK_10K']])['RISK_10K'], equal_var=False))
5664.92109437
5594.80756014
Ttest_indResult(statistic=0.87488306439506813, pvalue=0.38167555108061479)
In [30]:
# PATIENCE_MONTH as related to income risk
fig, axes = plt.subplots(nrows=1, ncols=2)
print(np.mean(takers['PATIENCE_MONTH']))
p = sns.violinplot(takers.dropna(subset=[['PATIENCE_MONTH']])[['PATIENCE_MONTH']], ax=axes[0])
p.axes.set_ylim(0,14)
print(np.mean(nontakers['PATIENCE_MONTH']))
p = sns.violinplot(nontakers.dropna(subset=[['PATIENCE_MONTH']])[['PATIENCE_MONTH']], ax=axes[1])
p.axes.set_ylim(0,14)
print(stats.ttest_ind(takers.dropna(subset=[['PATIENCE_MONTH']])['PATIENCE_MONTH'], \
                      nontakers.dropna(subset=[['PATIENCE_MONTH']])['PATIENCE_MONTH'], equal_var=False))
5.68267117848
5.66831413124
Ttest_indResult(statistic=0.3587772504100703, pvalue=0.71977516009263709)
In [31]:
# PATIENCE_YEAR as related to income risk
fig, axes = plt.subplots(nrows=1, ncols=2)
print(np.mean(takers['PATIENCE_YEAR']))
p = sns.violinplot(takers.dropna(subset=[['PATIENCE_YEAR']])[['PATIENCE_YEAR']], ax=axes[0])
p.axes.set_ylim(0,14)
print(np.mean(nontakers['PATIENCE_YEAR']))
p = sns.violinplot(nontakers.dropna(subset=[['PATIENCE_YEAR']])[['PATIENCE_YEAR']], ax=axes[1])
p.axes.set_ylim(0,14)
print(stats.ttest_ind(takers.dropna(subset=[['PATIENCE_YEAR']])[['PATIENCE_YEAR']]['PATIENCE_YEAR'], \
                      nontakers.dropna(subset=[['PATIENCE_YEAR']])[['PATIENCE_YEAR']]['PATIENCE_YEAR'], equal_var=False))
6.81986367642
6.83009515285
Ttest_indResult(statistic=-0.26927627853950914, pvalue=0.78772702691942698)
In [32]:
labels = ['IQ']
p = sns.violinplot(data.dropna(subset = [labels])[labels])
p.axes.set_ylim(0,120000)
Out[32]:
(0, 120000)
In [33]:
labels = ['CCMAX04']
p = plt.hist(data.dropna(subset = [labels])[labels].values, 10, range=[0,10])
In [35]:
labels = ['INCOME']
p = sns.violinplot(data.dropna(subset = [labels])[labels])
p.axes.set_ylim(0,15)
Out[35]:
(0, 15)
In [36]:
labels = ['RISK', 'DRIVING', 'FINANCIAL', 'OCCUPATION', 'HEALTH', 'FAITH', 'ROMANCE', 'MAJOR_LIFE', 'BETS']
plt.figure(figsize=(12, 5))
p = sns.violinplot(data.dropna(subset = [labels])[labels])
p.axes.set_ylim(0,10)
Out[36]:
(0, 10)
In [54]:
# correlation matrix
# some of the are categorical, beware of interpretation in those cases
labels = ['RISK_1K', 'RISK_10K', 'RISK_INCOME', 'PATIENCE_MONTH', 'PATIENCE_YEAR', 'AGE', 'SEX', 'IQ',\
          'INCOME', 'EXERCISE', 'DOC_VISIT', 'HEALTH_INSUR', 'INVEST_STOCK', 'INVEST_PRIV_BOND', 'INVEST_GOV_BOND',\
          'CCMAX', 'DRUG', 'ALCOHOL', 'BINGE', 'CONVICT', 'CIGARETTE',\
          'RISK', 'DRIVING', 'FINANCIAL', 'OCCUPATION', 'HEALTH', 'FAITH', 'ROMANCE', 'MAJOR_LIFE', 'BETS']
justcorrs = data[labels]
justcorrs = justcorrs.corr()
fig, ax = plt.subplots()
fig.set_size_inches(12,8)
ax.pcolor(justcorrs, cmap='RdBu', edgecolors='k', vmin=-1., vmax=1.)
ax.invert_yaxis()
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom='off',      # ticks along the bottom edge are off
    top='off',         # ticks along the top edge are off
    labelbottom='off',
    labeltop='on') # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left='off',      # ticks along the bottom edge are off
    right='off',         # ticks along the top edge are off
    labelright='off',
    labelleft='on') # labels along the bottom edge are off
ytick = plt.yticks(np.arange(0.5, len(labels), 1), labels)
xtick = plt.xticks(np.arange(0.5, len(labels), 1), labels, rotation=90)
In [55]:
# where are the large correlations?
fig, ax = plt.subplots()
fig.set_size_inches(12,8)
threshold = .1
slammedcorrs = justcorrs.copy()
slammedcorrs[slammedcorrs[:] < -threshold] = -1
slammedcorrs[slammedcorrs[:] > threshold] = 1
slammedcorrs[(slammedcorrs[:] < threshold) & (slammedcorrs[:] > -threshold)] = 0
ax.pcolor(slammedcorrs, cmap='RdBu', edgecolors='k', vmin=-1., vmax=1.)
ax.invert_yaxis()
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom='off',      # ticks along the bottom edge are off
    top='off',         # ticks along the top edge are off
    labelbottom='off',
    labeltop='on') # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left='off',      # ticks along the bottom edge are off
    right='off',         # ticks along the top edge are off
    labelright='off',
    labelleft='on') # labels along the bottom edge are off
ytick = plt.yticks(np.arange(0.5, len(labels), 1), labels)
xtick = plt.xticks(np.arange(0.5, len(labels), 1), labels, rotation=90)

Risk-Delay Correlation¶

In [71]:
print(data[['PATIENCE_MONTH', 'RISK_1K']].corr())
p = sns.lmplot('RISK_1K', 'PATIENCE_MONTH', data)
p.axes[0,0].set_xlim(0,1000)
p.axes[0,0].set_ylim(0,3)
                PATIENCE_MONTH   RISK_1K
PATIENCE_MONTH        1.000000  0.146902
RISK_1K               0.146902  1.000000
Out[71]:
(0, 3)
In [68]:
# Waiting a year
print(data[['PATIENCE_YEAR', 'RISK_1K']].corr())
p = sns.lmplot('RISK_1K', 'PATIENCE_YEAR', data)
p.axes[0,0].set_xlim(0,1000)
p.axes[0,0].set_ylim(0,)
               PATIENCE_YEAR   RISK_1K
PATIENCE_YEAR       1.000000  0.156027
RISK_1K             0.156027  1.000000
Out[68]:
(0, 3.0)
In [69]:
# $10,000 lottery ticket
print(data[['PATIENCE_MONTH', 'RISK_10K']].corr())
p = sns.lmplot('RISK_10K', 'PATIENCE_MONTH', data)
p.axes[0,0].set_xlim(0,10000)
                PATIENCE_MONTH  RISK_10K
PATIENCE_MONTH        1.000000  0.106218
RISK_10K              0.106218  1.000000
Out[69]:
(0, 10000)
In [70]:
# Waiting a year and $10,000 lottery ticket
print(data[['PATIENCE_YEAR', 'RISK_10K']].corr())
p = sns.lmplot('RISK_10K', 'PATIENCE_YEAR', data)
p.axes[0,0].set_xlim(0,10000)
p.axes[0,0].set_ylim(0,)
               PATIENCE_YEAR  RISK_10K
PATIENCE_YEAR       1.000000  0.135245
RISK_10K            0.135245  1.000000
Out[70]:
(0, 3.0)

Self-Reported Risk¶

In [782]:
#RISK
#DRIVING
#FINANCIAL
#OCCUPATION
#HEALTH
#FAITH
#ROMANCE
#MAJOR_LIFE
#BETS

# Eliminate subjects not reporting risk
r = 'RISK'
In [783]:
# beh econ risk vs. self-reported risk
print(data[r].corr(data['RISK_1K']))
p = sns.lmplot(r, 'RISK_1K', data, x_jitter=.3, y_jitter=50)
p.set(ylim=(0, 1000), xlim=(0, 10))
0.0151567157412
Out[783]:
<seaborn.axisgrid.FacetGrid at 0x363e28d0>
In [784]:
# beh econ patience vs. self-reported risk
print(data[r].corr(data['PATIENCE_MONTH']))
p = sns.lmplot(r, 'PATIENCE_MONTH', data, x_jitter=.3)
p.set(ylim=(0, 10), xlim=(0, 10))
-0.0254355026168
Out[784]:
<seaborn.axisgrid.FacetGrid at 0x23c7bdd8>

Moderation¶

Financial Literacy #4¶

  • Buying a single company stock usually provides a safer return than a stock mutual fund
  • 1: yes
  • 0: no
In [115]:
finlit4Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_4'])
highGroup = finlit4Data[finlit4Data['FIT_LIT_4'] == 1]
lowGroup = finlit4Data[finlit4Data['FIT_LIT_4'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[972, 3266]
In [116]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.9307861182
5.54854667476
4.51905794828
4.57500636892
Ttest_indResult(statistic=6.9199995332129616, pvalue=6.507371956009053e-12)
In [118]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
606.689300412
604.872014697
287.198566561
273.187261795
RanksumsResult(statistic=0.12976496272909169, pvalue=0.8967523841655517)
In [117]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))

print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.247357293869
-0.268966593818
287.198566561
273.187261795
RanksumsResult(statistic=0.57122862088247495, pvalue=0.56784467959760676)
In [298]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', finlit4Data, hue='FIT_LIT_4')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
[0.14694367352936188, 0.13618943260628158]
[0.33501288688564917, 0.73761536506521352]
Out[298]:
(0, 15)
In [293]:
# traditional moderation analyses
finlit4Data['RISK_1K'] = preprocessing.scale(finlit4Data['RISK_1K'].values)
finlit4Data['PATIENCE_MONTH'] = preprocessing.scale(finlit4Data['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_4', data=finlit4Data).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_4', data=finlit4Data).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.015
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     20.97
Date:                Fri, 20 May 2016   Prob (F-statistic):           1.76e-13
Time:                        16:53:10   Log-Likelihood:                -5982.2
No. Observations:                4238   AIC:                         1.197e+04
Df Residuals:                    4234   BIC:                         1.200e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
============================================================================================
                               coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------
Intercept                    0.0044      0.017      0.254      0.800        -0.030     0.039
PATIENCE_MONTH               0.1032      0.017      5.915      0.000         0.069     0.137
FIT_LIT_4                   -0.0322      0.037     -0.874      0.382        -0.104     0.040
PATIENCE_MONTH:FIT_LIT_4     0.0670      0.037      1.829      0.068        -0.005     0.139
==============================================================================
Omnibus:                      253.041   Durbin-Watson:                   1.942
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               93.852
Skew:                          -0.025   Prob(JB):                     4.17e-21
Kurtosis:                       2.273   Cond. No.                         2.74
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.025
Model:                            OLS   Adj. R-squared:                  0.025
Method:                 Least Squares   F-statistic:                     36.54
Date:                Fri, 20 May 2016   Prob (F-statistic):           2.64e-23
Time:                        16:53:10   Log-Likelihood:                -5959.3
No. Observations:                4238   AIC:                         1.193e+04
Df Residuals:                    4234   BIC:                         1.195e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept            -0.0573      0.017     -3.314      0.001        -0.091    -0.023
RISK_1K               0.1049      0.017      5.997      0.000         0.071     0.139
FIT_LIT_4             0.2495      0.036      6.913      0.000         0.179     0.320
RISK_1K:FIT_LIT_4     0.0491      0.035      1.397      0.162        -0.020     0.118
==============================================================================
Omnibus:                      667.260   Durbin-Watson:                   1.815
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1850.590
Skew:                          -0.844   Prob(JB):                         0.00
Kurtosis:                       5.763   Cond. No.                         2.54
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Financial Literacy #5¶

  • Suppose you had $100 in a savings account and the interest rate was 2 percent per year. After 5 years, how much do you think you would have in the account if you left the money to grow: more than $102, exactly $102, or less than $102?
  • 1: More than $102
  • 2: Exactly $102
  • 3: Less than $102
In [119]:
finlit5Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_5'])
highGroup = finlit5Data[finlit5Data['FIT_LIT_5'] > 1]
lowGroup = finlit5Data[finlit5Data['FIT_LIT_5'] == 1]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1253, 3665]
In [120]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.82472274386
5.59419076096
4.78627007241
4.511210406
Ttest_indResult(statistic=4.5407069474113975, pvalue=5.9244084409431759e-06)
In [121]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
614.542697526
607.606821282
285.878982212
274.841573331
RanksumsResult(statistic=0.79459158535058827, pvalue=0.42685111470604331)
In [307]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.30847595762
-0.279698828779
RanksumsResult(statistic=-0.85170537646005195, pvalue=0.39437763580644647)
In [308]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.14694367352936188, 0.13618943260628158]
[0.33501288688564917, 0.73761536506521352]
In [310]:
finlit5Data.replace({'FIT_LIT_5': {2:0, 3:0}}, inplace=True)
In [311]:
# traditional moderation analyses
finlit5Data['RISK_1K'] = preprocessing.scale(finlit5Data['RISK_1K'].values)
finlit5Data['PATIENCE_MONTH'] = preprocessing.scale(finlit5Data['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_5', data=finlit5Data).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_5', data=finlit5Data).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     32.55
Date:                Fri, 20 May 2016   Prob (F-statistic):           7.93e-21
Time:                        17:09:32   Log-Likelihood:                -6930.0
No. Observations:                4918   AIC:                         1.387e+04
Df Residuals:                    4914   BIC:                         1.389e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
============================================================================================
                               coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------
Intercept                    0.0020      0.028      0.072      0.943        -0.053     0.057
PATIENCE_MONTH               0.1473      0.027      5.405      0.000         0.094     0.201
FIT_LIT_5                   -0.0031      0.033     -0.096      0.924        -0.067     0.061
PATIENCE_MONTH:FIT_LIT_5    -0.0109      0.032     -0.342      0.733        -0.073     0.052
==============================================================================
Omnibus:                      303.676   Durbin-Watson:                   1.942
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              112.328
Skew:                          -0.053   Prob(JB):                     4.06e-25
Kurtosis:                       2.267   Cond. No.                         3.92
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.024
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     39.63
Date:                Fri, 20 May 2016   Prob (F-statistic):           2.68e-25
Time:                        17:09:32   Log-Likelihood:                -6919.6
No. Observations:                4918   AIC:                         1.385e+04
Df Residuals:                    4914   BIC:                         1.387e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept             0.1099      0.028      3.935      0.000         0.055     0.165
RISK_1K               0.1465      0.027      5.402      0.000         0.093     0.200
FIT_LIT_5            -0.1476      0.032     -4.561      0.000        -0.211    -0.084
RISK_1K:FIT_LIT_5    -0.0106      0.032     -0.334      0.738        -0.073     0.052
==============================================================================
Omnibus:                      733.116   Durbin-Watson:                   1.785
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1991.340
Skew:                          -0.810   Prob(JB):                         0.00
Kurtosis:                       5.664   Cond. No.                         3.75
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Financial Literacy #6¶

  • Imagine that the interest rate on your savings account was 1% per year and inflation was 2% per year. After 1 year, would you be able to buy more than, exactly the same as, or less than today with the money in this account?
  • 1: More
  • 2: Exactly the same
  • 3: Less
In [122]:
finlit6Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_6'])
highGroup = finlit6Data[finlit6Data['FIT_LIT_6'] < 3]
lowGroup = finlit6Data[finlit6Data['FIT_LIT_6'] == 3]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[895, 3960]
In [123]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.96698243079
5.57877546414
4.17228584026
4.67095188583
Ttest_indResult(statistic=7.230609298498325, pvalue=7.8729795477286083e-13)
In [124]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
610.189944134
607.61540404
290.254948056
274.663609344
RanksumsResult(statistic=0.26450254015333835, pvalue=0.79139270773162451)
In [316]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.23490800055647693, 0.11325014036684315]
[3.3897406904800427, 0.00069958764797428685]
In [329]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.09757662295812139, 0.13888721419766356]
[1.2765051922889579, 0.20177699252591874]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', finlit6Data, hue='FIT_LIT_6')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [327]:
# Make responses binary (correct vs. incorrect)
finlit6Data.replace({'FIT_LIT_6': {3:5, 1:6, 2:6}}, inplace=True)
finlit6Data.replace({'FIT_LIT_6': {5:1, 6:0}}, inplace=True)
In [328]:
# traditional moderation analyses
finlit6Data['RISK_1K'] = preprocessing.scale(finlit6Data['RISK_1K'].values)
finlit6Data['PATIENCE_MONTH'] = preprocessing.scale(finlit6Data['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_6', data=finlit6Data).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_6', data=finlit6Data).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     35.32
Date:                Fri, 20 May 2016   Prob (F-statistic):           1.43e-22
Time:                        17:27:49   Log-Likelihood:                -6836.5
No. Observations:                4855   AIC:                         1.368e+04
Df Residuals:                    4851   BIC:                         1.371e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
============================================================================================
                               coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------
Intercept                   -0.0469      0.034     -1.384      0.167        -0.113     0.020
PATIENCE_MONTH               0.2628      0.035      7.425      0.000         0.193     0.332
FIT_LIT_6                    0.0504      0.037      1.348      0.178        -0.023     0.124
PATIENCE_MONTH:FIT_LIT_6    -0.1517      0.039     -3.922      0.000        -0.228    -0.076
==============================================================================
Omnibus:                      292.866   Durbin-Watson:                   1.943
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              108.912
Skew:                          -0.044   Prob(JB):                     2.24e-24
Kurtosis:                       2.272   Cond. No.                         5.21
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.029
Model:                            OLS   Adj. R-squared:                  0.029
Method:                 Least Squares   F-statistic:                     48.67
Date:                Fri, 20 May 2016   Prob (F-statistic):           5.56e-31
Time:                        17:27:49   Log-Likelihood:                -6817.0
No. Observations:                4855   AIC:                         1.364e+04
Df Residuals:                    4851   BIC:                         1.367e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept             0.2056      0.033      6.239      0.000         0.141     0.270
RISK_1K               0.2100      0.032      6.663      0.000         0.148     0.272
FIT_LIT_6            -0.2522      0.036     -6.912      0.000        -0.324    -0.181
RISK_1K:FIT_LIT_6    -0.0945      0.035     -2.681      0.007        -0.164    -0.025
==============================================================================
Omnibus:                      695.853   Durbin-Watson:                   1.794
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1889.362
Skew:                          -0.781   Prob(JB):                         0.00
Kurtosis:                       5.627   Cond. No.                         4.46
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Financial Literacy #7¶

  • If interest rates rise, what will typically happen to bond prices?
  • 1: rise
  • 2: fall
  • 3: stay the same
  • 4: no relationship
In [125]:
finlit7Data  = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_7'])
highGroup = finlit7Data[finlit7Data['FIT_LIT_7'] == 2]
lowGroup = finlit7Data[(finlit7Data['FIT_LIT_7'] < 2) | (finlit7Data['FIT_LIT_7'] > 2)]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1463, 2550]
In [126]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.49588174724
5.74757390775
4.71311014012
4.53481444734
Ttest_indResult(statistic=-4.9934030035989849, pvalue=6.2736531668578401e-07)
In [127]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
602.103212577
608.917647059
266.456640169
281.434729455
RanksumsResult(statistic=-0.95098004408611769, pvalue=0.34161450629232026)
In [323]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.18843069874
-0.307474226804
RanksumsResult(statistic=2.379664687020957, pvalue=0.017328398003038203)
In [809]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', finlit7Data, hue='FIT_LIT_7')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
[0.097576622958120807, 0.1388872141976652, 0.20177699252589543]
Out[809]:
(0, 15)
In [330]:
# Make responses binary (correct vs. incorrect)
finlit7Data.replace({'FIT_LIT_7': {2:5, 1:6, 3:6, 4:6}}, inplace=True)
finlit7Data.replace({'FIT_LIT_7': {5:1, 6:0}}, inplace=True)
In [332]:
# traditional moderation analyses
finlit7Data['RISK_1K'] = preprocessing.scale(finlit7Data['RISK_1K'].values)
finlit7Data['PATIENCE_MONTH'] = preprocessing.scale(finlit7Data['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_7', data=finlit7Data).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_7', data=finlit7Data).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.016
Model:                            OLS   Adj. R-squared:                  0.015
Method:                 Least Squares   F-statistic:                     21.88
Date:                Fri, 20 May 2016   Prob (F-statistic):           4.72e-14
Time:                        17:31:58   Log-Likelihood:                -5661.6
No. Observations:                4013   AIC:                         1.133e+04
Df Residuals:                    4009   BIC:                         1.136e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
============================================================================================
                               coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------
Intercept                    0.0004      0.020      0.021      0.984        -0.038     0.039
PATIENCE_MONTH               0.1434      0.020      7.204      0.000         0.104     0.182
FIT_LIT_7                   -0.0064      0.033     -0.195      0.845        -0.070     0.058
PATIENCE_MONTH:FIT_LIT_7    -0.0504      0.032     -1.553      0.121        -0.114     0.013
==============================================================================
Omnibus:                      237.140   Durbin-Watson:                   1.986
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               88.589
Skew:                          -0.032   Prob(JB):                     5.80e-20
Kurtosis:                       2.275   Cond. No.                         2.49
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     29.75
Date:                Fri, 20 May 2016   Prob (F-statistic):           5.11e-19
Time:                        17:31:58   Log-Likelihood:                -5650.0
No. Observations:                4013   AIC:                         1.131e+04
Df Residuals:                    4009   BIC:                         1.133e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept             0.0587      0.020      2.997      0.003         0.020     0.097
RISK_1K               0.1346      0.019      7.000      0.000         0.097     0.172
FIT_LIT_7            -0.1616      0.032     -4.979      0.000        -0.225    -0.098
RISK_1K:FIT_LIT_7    -0.0322      0.033     -0.975      0.330        -0.097     0.033
==============================================================================
Omnibus:                      597.267   Durbin-Watson:                   1.788
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1743.691
Skew:                          -0.784   Prob(JB):                         0.00
Kurtosis:                       5.823   Cond. No.                         2.44
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Financial Literacy #8¶

  • A 15-year mortgage typically requires higher monthly payments than a 30-year mortgage, but the total interest paid over the life of the loan will be less.
  • 0: False
  • 1: True
In [128]:
finlit8Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_8'])
highGroup = finlit8Data[finlit8Data['FIT_LIT_8'] == 1]
lowGroup = finlit8Data[finlit8Data['FIT_LIT_8'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[4173, 691]
In [130]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.57181477745
6.09959877856
4.6330885166
4.15827343644
Ttest_indResult(statistic=-8.912508467928733, pvalue=2.4128283368618584e-18)
In [131]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
605.162952312
627.513748191
274.950795412
291.588172354
RanksumsResult(statistic=-2.0795132580890332, pvalue=0.037570200491370748)
In [336]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.30675146771
-0.206408345753
RanksumsResult(statistic=-1.8993936760867924, pvalue=0.057512734352180121)
In [337]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.13061489660901585, 0.15125573239750181]
[0.51178893099145373, 0.60879873801921525]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', finlit8Data, hue='FIT_LIT_8')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [338]:
# traditional moderation analyses
finlit8Data['RISK_1K'] = preprocessing.scale(finlit8Data['RISK_1K'].values)
finlit8Data['PATIENCE_MONTH'] = preprocessing.scale(finlit8Data['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_8', data=finlit8Data).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_8', data=finlit8Data).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     30.96
Date:                Fri, 20 May 2016   Prob (F-statistic):           8.14e-20
Time:                        17:41:44   Log-Likelihood:                -6855.7
No. Observations:                4864   AIC:                         1.372e+04
Df Residuals:                    4860   BIC:                         1.375e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
============================================================================================
                               coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------
Intercept                    0.0186      0.040      0.470      0.638        -0.059     0.096
PATIENCE_MONTH               0.1706      0.040      4.216      0.000         0.091     0.250
FIT_LIT_8                   -0.0237      0.042     -0.559      0.576        -0.107     0.059
PATIENCE_MONTH:FIT_LIT_8    -0.0415      0.043     -0.959      0.338        -0.126     0.043
==============================================================================
Omnibus:                      291.262   Durbin-Watson:                   1.927
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              108.913
Skew:                          -0.049   Prob(JB):                     2.24e-24
Kurtosis:                       2.273   Cond. No.                         6.28
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.031
Method:                 Least Squares   F-statistic:                     53.63
Date:                Fri, 20 May 2016   Prob (F-statistic):           4.27e-34
Time:                        17:41:44   Log-Likelihood:                -6822.5
No. Observations:                4864   AIC:                         1.365e+04
Df Residuals:                    4860   BIC:                         1.368e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept             0.2868      0.038      7.643      0.000         0.213     0.360
RISK_1K               0.1341      0.036      3.764      0.000         0.064     0.204
FIT_LIT_8            -0.3343      0.040     -8.254      0.000        -0.414    -0.255
RISK_1K:FIT_LIT_8    -0.0020      0.039     -0.051      0.959        -0.078     0.074
==============================================================================
Omnibus:                      723.147   Durbin-Watson:                   1.813
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1991.138
Skew:                          -0.803   Prob(JB):                         0.00
Kurtosis:                       5.691   Cond. No.                         5.22
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Emergency Fund¶

In [132]:
emergData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'EMERGENCY_FUND'])
highGroup = emergData[emergData['EMERGENCY_FUND'] == 1]
lowGroup = emergData[emergData['EMERGENCY_FUND'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1921, 3138]
In [133]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.42289695205
5.80929337639
4.88944989002
4.31696499445
Ttest_indResult(statistic=-8.6535807436898864, pvalue=7.249458673928688e-18)
In [134]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
619.319625195
604.024537922
267.182451013
283.929158567
Ttest_indResult(statistic=1.928855940074617, pvalue=0.053815279310263857)
In [29]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.313397129187
-0.279622395833
(-0.55429006820311288, 0.57938034627772628)
In [340]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.1079879578546849, 0.16554933617081041]
[2.024099648654885, 0.042959899772015975]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', emergData, hue='EMERGENCY_FUND')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [342]:
# traditional moderation analyses
emergData['RISK_1K'] = preprocessing.scale(emergData['RISK_1K'].values)
emergData['PATIENCE_MONTH'] = preprocessing.scale(emergData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*EMERGENCY_FUND', data=emergData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*EMERGENCY_FUND', data=emergData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.022
Method:                 Least Squares   F-statistic:                     38.91
Date:                Fri, 20 May 2016   Prob (F-statistic):           7.51e-25
Time:                        17:52:07   Log-Likelihood:                -7120.7
No. Observations:                5059   AIC:                         1.425e+04
Df Residuals:                    5055   BIC:                         1.428e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=================================================================================================
                                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------------
Intercept                        -0.0379      0.018     -2.135      0.033        -0.073    -0.003
PATIENCE_MONTH                    0.1762      0.018      9.584      0.000         0.140     0.212
EMERGENCY_FUND                    0.0877      0.029      3.034      0.002         0.031     0.144
PATIENCE_MONTH:EMERGENCY_FUND    -0.0765      0.028     -2.694      0.007        -0.132    -0.021
==============================================================================
Omnibus:                      306.900   Durbin-Watson:                   1.960
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              113.958
Skew:                          -0.046   Prob(JB):                     1.80e-25
Kurtosis:                       2.271   Cond. No.                         2.54
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.036
Model:                            OLS   Adj. R-squared:                  0.035
Method:                 Least Squares   F-statistic:                     62.31
Date:                Fri, 20 May 2016   Prob (F-statistic):           1.48e-39
Time:                        17:52:07   Log-Likelihood:                -7086.6
No. Observations:                5059   AIC:                         1.418e+04
Df Residuals:                    5055   BIC:                         1.421e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==========================================================================================
                             coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
Intercept                  0.0996      0.018      5.679      0.000         0.065     0.134
RISK_1K                    0.1556      0.017      9.067      0.000         0.122     0.189
EMERGENCY_FUND            -0.2610      0.028     -9.167      0.000        -0.317    -0.205
RISK_1K:EMERGENCY_FUND    -0.0386      0.029     -1.332      0.183        -0.095     0.018
==============================================================================
Omnibus:                      747.095   Durbin-Watson:                   1.801
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2099.169
Skew:                          -0.793   Prob(JB):                         0.00
Kurtosis:                       5.729   Cond. No.                         2.45
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Age of First Sex¶

In [247]:
firstsexData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'AGE_FIRST_SEX'])
len(firstsexData)
Out[247]:
2334
In [248]:
sexdelayCorr = firstsexData['PATIENCE_MONTH'].corr(firstsexData['AGE_FIRST_SEX'])
sexriskCorr = firstsexData['RISK_1K'].corr(firstsexData['AGE_FIRST_SEX'])
riskdelayCorr = firstsexData['PATIENCE_MONTH'].corr(firstsexData['RISK_1K'])

z,p = corrstats.dependent_corr(sexdelayCorr, sexriskCorr, riskdelayCorr, len(firstsexData))
print([sexdelayCorr, sexriskCorr])
print([z, p])
[-0.17949985455258058, -0.059176368065683167]
[-4.4317061741143613, 9.7816884649937208e-06]
In [252]:
nCat = 5

bins = []
for i in range(nCat):
    i = float(i)
    bins.append(firstsexData['AGE_FIRST_SEX'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)

binned = np.digitize(firstsexData['AGE_FIRST_SEX'], bins)
firstsexData['bin'] = binned
for i in range(nCat):
    print(firstsexData[firstsexData['bin'] == i]['PATIENCE_MONTH'].corr(firstsexData[firstsexData['bin'] == i]['RISK_1K']))

g = sns.PairGrid(firstsexData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
0.214638667472
0.0587950764313
0.0729134923722
0.0904493121142
0.0719206979091
Out[252]:
<seaborn.axisgrid.PairGrid at 0xc06a5c0>
In [824]:
sns.lmplot('RISK_1K', 'PATIENCE_MONTH', firstsexData, col='bin', palette="PuBuGn_d", size=3, x_jitter=50)
Out[824]:
<seaborn.axisgrid.FacetGrid at 0x3676f128>
In [251]:
# traditional moderation analyses
firstsexData['RISK_1K'] = preprocessing.scale(firstsexData['RISK_1K'].values)
firstsexData['PATIENCE_MONTH'] = preprocessing.scale(firstsexData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*AGE_FIRST_SEX', data=firstsexData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*AGE_FIRST_SEX', data=firstsexData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.015
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     12.15
Date:                Fri, 20 May 2016   Prob (F-statistic):           6.87e-08
Time:                        15:27:44   Log-Likelihood:                -3293.7
No. Observations:                2334   AIC:                             6595.
Df Residuals:                    2330   BIC:                             6618.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------------
Intercept                        0.2291      0.132      1.735      0.083        -0.030     0.488
PATIENCE_MONTH                   0.2553      0.130      1.957      0.050        -0.001     0.511
AGE_FIRST_SEX                   -0.0148      0.008     -1.794      0.073        -0.031     0.001
PATIENCE_MONTH:AGE_FIRST_SEX    -0.0091      0.008     -1.142      0.254        -0.025     0.007
==============================================================================
Omnibus:                      152.697   Durbin-Watson:                   1.968
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               54.589
Skew:                          -0.014   Prob(JB):                     1.40e-12
Kurtosis:                       2.251   Cond. No.                         113.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.043
Model:                            OLS   Adj. R-squared:                  0.042
Method:                 Least Squares   F-statistic:                     35.21
Date:                Fri, 20 May 2016   Prob (F-statistic):           2.99e-22
Time:                        15:27:44   Log-Likelihood:                -3260.1
No. Observations:                2334   AIC:                             6528.
Df Residuals:                    2330   BIC:                             6551.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------
Intercept                 1.0737      0.127      8.423      0.000         0.824     1.324
RISK_1K                   0.1729      0.124      1.391      0.164        -0.071     0.417
AGE_FIRST_SEX            -0.0682      0.008     -8.537      0.000        -0.084    -0.052
RISK_1K:AGE_FIRST_SEX    -0.0043      0.008     -0.552      0.581        -0.020     0.011
==============================================================================
Omnibus:                      288.686   Durbin-Watson:                   1.887
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1055.144
Skew:                          -0.582   Prob(JB):                    7.56e-230
Kurtosis:                       6.082   Cond. No.                         102.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Retirement Investment¶

In [136]:
retireData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'RETIREMENT'])
highGroup = retireData[retireData['RETIREMENT'] == 1]
lowGroup = retireData[retireData['RETIREMENT'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1280, 3956]
In [137]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.19994437271
5.80981100163
4.73242263996
4.43635443258
Ttest_indResult(statistic=-12.320147916059414, pvalue=1.010569253603058e-33)
In [138]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
606.49453125
610.398129424
272.634281653
278.919847956
RanksumsResult(statistic=-0.10953151674018294, pvalue=0.9127809253992939)
In [346]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.261489698891
-0.310942717698
RanksumsResult(statistic=1.3187496762541331, pvalue=0.18725281324836585)
In [347]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.085405184031171838, 0.14760283582091113]
[1.9595978207854414, 0.050042816293539572]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', retireData, hue='RETIREMENT')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [830]:
# traditional moderation analyses
retireData['RISK_1K'] = preprocessing.scale(retireData['RISK_1K'].values)
retireData['PATIENCE_MONTH'] = preprocessing.scale(retireData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*RETIREMENT', data=retireData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*RETIREMENT', data=retireData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.018
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     32.69
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           6.29e-21
Time:                        10:33:02   Log-Likelihood:                -7380.9
No. Observations:                5236   AIC:                         1.477e+04
Df Residuals:                    5232   BIC:                         1.480e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=============================================================================================
                                coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------------------
Intercept                    -0.0114      0.016     -0.721      0.471        -0.042     0.020
PATIENCE_MONTH                0.1523      0.016      9.418      0.000         0.121     0.184
RETIREMENT                    0.0257      0.033      0.779      0.436        -0.039     0.090
PATIENCE_MONTH:RETIREMENT    -0.0697      0.032     -2.201      0.028        -0.132    -0.008
==============================================================================
Omnibus:                      322.319   Durbin-Watson:                   1.953
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              119.018
Skew:                          -0.048   Prob(JB):                     1.43e-26
Kurtosis:                       2.268   Cond. No.                         2.81
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.047
Model:                            OLS   Adj. R-squared:                  0.046
Method:                 Least Squares   F-statistic:                     85.79
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           3.49e-54
Time:                        10:33:02   Log-Likelihood:                -7303.8
No. Observations:                5236   AIC:                         1.462e+04
Df Residuals:                    5232   BIC:                         1.464e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
Intercept              0.0970      0.016      6.250      0.000         0.067     0.127
RISK_1K                0.1431      0.015      9.265      0.000         0.113     0.173
RETIREMENT            -0.3976      0.031    -12.658      0.000        -0.459    -0.336
RISK_1K:RETIREMENT    -0.0547      0.032     -1.722      0.085        -0.117     0.008
==============================================================================
Omnibus:                      794.478   Durbin-Watson:                   1.847
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2241.857
Skew:                          -0.812   Prob(JB):                         0.00
Kurtosis:                       5.764   Cond. No.                         2.52
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Theft (> $50)¶

In [157]:
lgTheftData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'STEAL_LARGE'])
In [159]:
# Make theft a binary variable
lgTheftData.replace({'STEAL_LARGE': {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
In [168]:
highGroup = lgTheftData[lgTheftData['STEAL_LARGE'] == 1]
lowGroup = lgTheftData[lgTheftData['STEAL_LARGE'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[265, 5095]
In [170]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.8388166393
5.66265789092
3.56410272922
4.65197078472
Ttest_indResult(statistic=2.1712973008973684, pvalue=0.030678240879931339)
In [171]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
613.426415094
608.792737978
285.063599756
277.581036141
Ttest_indResult(statistic=0.25785116961114568, pvalue=0.79670430965557948)
In [396]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.107003891051
-0.306690705128
RanksumsResult(statistic=2.4707307334279176, pvalue=0.013483730341160039)
In [834]:
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
             4, range=[-2,2], normed=True, alpha=.5, label='Thieves')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
             4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
Out[834]:
<matplotlib.legend.Legend at 0x2ea85fd0>
In [399]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.15295163535824852, 0.14209417628051096]
[0.17520420635095602, 0.86091917926660599]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', lgTheftData, hue='STEAL_LARGE')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [400]:
# traditional moderation analyses
lgTheftData['RISK_1K'] = preprocessing.scale(lgTheftData['RISK_1K'].values)
lgTheftData['PATIENCE_MONTH'] = preprocessing.scale(lgTheftData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*STEAL_LARGE', data=lgTheftData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*STEAL_LARGE', data=lgTheftData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     37.12
Date:                Mon, 23 May 2016   Prob (F-statistic):           9.75e-24
Time:                        11:37:42   Log-Likelihood:                -7550.4
No. Observations:                5360   AIC:                         1.511e+04
Df Residuals:                    5356   BIC:                         1.514e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================================
                                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------------
Intercept                  -2.026e-05      0.014     -0.001      0.999        -0.027     0.027
PATIENCE_MONTH                 0.1408      0.014     10.230      0.000         0.114     0.168
STEAL_LARGE                   -0.0048      0.063     -0.076      0.939        -0.128     0.119
PATIENCE_MONTH:STEAL_LARGE     0.0475      0.074      0.639      0.523        -0.098     0.193
==============================================================================
Omnibus:                      341.635   Durbin-Watson:                   1.941
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              123.868
Skew:                          -0.042   Prob(JB):                     1.27e-27
Kurtosis:                       2.260   Cond. No.                         5.60
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     38.14
Date:                Mon, 23 May 2016   Prob (F-statistic):           2.23e-24
Time:                        11:37:42   Log-Likelihood:                -7548.9
No. Observations:                5360   AIC:                         1.511e+04
Df Residuals:                    5356   BIC:                         1.513e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=======================================================================================
                          coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------------
Intercept              -0.0056      0.014     -0.403      0.687        -0.033     0.022
RISK_1K                 0.1434      0.014     10.324      0.000         0.116     0.171
STEAL_LARGE             0.1134      0.062      1.818      0.069        -0.009     0.236
RISK_1K:STEAL_LARGE    -0.0191      0.061     -0.314      0.753        -0.139     0.100
==============================================================================
Omnibus:                      833.454   Durbin-Watson:                   1.785
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2342.392
Skew:                          -0.832   Prob(JB):                         0.00
Kurtosis:                       5.778   Cond. No.                         4.64
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Theft (< $50)¶

In [172]:
theftData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'STEAL_SMALL'])
In [173]:
# Make theft a binary variable
theftData.replace({'STEAL_SMALL': {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
In [174]:
highGroup = theftData[theftData['STEAL_SMALL'] == 1]
lowGroup = theftData[theftData['STEAL_SMALL'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1059, 4304]
In [175]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.49833062547
5.71485841284
4.4389006458
4.62294051353
Ttest_indResult(statistic=-4.2107479286083196, pvalue=2.6824748405836808e-05)
In [176]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
616.046270066
606.717936803
279.768244728
277.321772689
Ttest_indResult(statistic=0.97332692380261876, pvalue=0.33053724690632802)
In [407]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.195568400771
-0.323208353109
RanksumsResult(statistic=2.6926811637484862, pvalue=0.0070880015748147288)
In [841]:
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
             4, range=[-2,2], normed=True, alpha=.5, label='Thieves')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
             4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
Out[841]:
<matplotlib.legend.Legend at 0x19bdeac8>
In [408]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.12351929074563077, 0.14697508769818446]
[0.69573752632259123, 0.48659321993305515]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', theftBinData, hue='STEAL_SMALL')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [409]:
# traditional moderation analyses
theftData['RISK_1K'] = preprocessing.scale(theftData['RISK_1K'].values)
theftData['PATIENCE_MONTH'] = preprocessing.scale(theftData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*STEAL_SMALL', data=theftData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*STEAL_SMALL', data=theftData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     37.40
Date:                Mon, 23 May 2016   Prob (F-statistic):           6.54e-24
Time:                        11:48:07   Log-Likelihood:                -7554.2
No. Observations:                5363   AIC:                         1.512e+04
Df Residuals:                    5359   BIC:                         1.514e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================================
                                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------------
Intercept                     -0.0107      0.015     -0.711      0.477        -0.040     0.019
PATIENCE_MONTH                 0.1462      0.015      9.721      0.000         0.117     0.176
STEAL_SMALL                    0.0522      0.034      1.528      0.127        -0.015     0.119
PATIENCE_MONTH:STEAL_SMALL    -0.0189      0.035     -0.546      0.585        -0.087     0.049
==============================================================================
Omnibus:                      338.201   Durbin-Watson:                   1.939
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              123.058
Skew:                          -0.040   Prob(JB):                     1.90e-27
Kurtosis:                       2.262   Cond. No.                         2.77
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.024
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     43.04
Date:                Mon, 23 May 2016   Prob (F-statistic):           1.77e-27
Time:                        11:48:07   Log-Likelihood:                -7545.9
No. Observations:                5363   AIC:                         1.510e+04
Df Residuals:                    5359   BIC:                         1.513e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=======================================================================================
                          coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------------
Intercept               0.0290      0.015      1.925      0.054        -0.001     0.059
RISK_1K                 0.1478      0.015      9.789      0.000         0.118     0.177
STEAL_SMALL            -0.1461      0.034     -4.309      0.000        -0.213    -0.080
RISK_1K:STEAL_SMALL    -0.0279      0.034     -0.828      0.408        -0.094     0.038
==============================================================================
Omnibus:                      846.206   Durbin-Watson:                   1.788
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2378.942
Skew:                          -0.843   Prob(JB):                         0.00
Kurtosis:                       5.793   Cond. No.                         2.65
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Worry/Anxiety/Depression¶

In [101]:
anxData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'WORRY_ANX_DEPRESS'])
highGroup = anxData[anxData['WORRY_ANX_DEPRESS'] == 1]
lowGroup = anxData[anxData['WORRY_ANX_DEPRESS'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[703, 4931]
In [102]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.74613247892
5.66070153309
4.93472501149
4.53346378157
Ttest_indResult(statistic=1.3353037563611256, pvalue=0.18211812832989677)
In [103]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
621.436699858
609.081930643
288.311617542
277.231881834
Ttest_indResult(statistic=1.0672917707325169, pvalue=0.28612732504383254)
In [256]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.235682819383
-0.302609776305
RanksumsResult(statistic=1.704093070857811, pvalue=0.088363700941841122)
In [848]:
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
             4, range=[-2,2], normed=True, alpha=.5, label='Anxious')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
             4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
Out[848]:
<matplotlib.legend.Legend at 0x36921eb8>
In [258]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.1735723512873866, 0.13201154672166271]
[1.0537083129545466, 0.29201648180939221]
In [257]:
# traditional moderation analyses
anxData['RISK_1K'] = preprocessing.scale(anxData['RISK_1K'].values)
anxData['PATIENCE_MONTH'] = preprocessing.scale(anxData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*WORRY_ANX_DEPRESS', data=anxData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*WORRY_ANX_DEPRESS', data=anxData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     37.00
Date:                Fri, 20 May 2016   Prob (F-statistic):           1.14e-23
Time:                        15:41:36   Log-Likelihood:                -7939.3
No. Observations:                5634   AIC:                         1.589e+04
Df Residuals:                    5630   BIC:                         1.591e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
====================================================================================================
                                       coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------------------
Intercept                           -0.0046      0.014     -0.326      0.744        -0.032     0.023
PATIENCE_MONTH                       0.1323      0.014      9.309      0.000         0.104     0.160
WORRY_ANX_DEPRESS                    0.0350      0.040      0.875      0.381        -0.043     0.113
PATIENCE_MONTH:WORRY_ANX_DEPRESS     0.0390      0.038      1.016      0.310        -0.036     0.114
==============================================================================
Omnibus:                      381.968   Durbin-Watson:                   1.939
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              135.429
Skew:                          -0.052   Prob(JB):                     3.91e-30
Kurtosis:                       2.247   Cond. No.                         3.11
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     37.34
Date:                Fri, 20 May 2016   Prob (F-statistic):           6.96e-24
Time:                        15:41:37   Log-Likelihood:                -7938.8
No. Observations:                5634   AIC:                         1.589e+04
Df Residuals:                    5630   BIC:                         1.591e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=============================================================================================
                                coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------------------
Intercept                    -0.0063      0.014     -0.445      0.657        -0.034     0.021
RISK_1K                       0.1317      0.014      9.290      0.000         0.104     0.160
WORRY_ANX_DEPRESS             0.0486      0.040      1.215      0.224        -0.030     0.127
RISK_1K:WORRY_ANX_DEPRESS     0.0442      0.039      1.138      0.255        -0.032     0.120
==============================================================================
Omnibus:                      886.074   Durbin-Watson:                   1.791
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2433.761
Skew:                          -0.849   Prob(JB):                         0.00
Kurtosis:                       5.736   Cond. No.                         3.11
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Depression¶

In [104]:
depressData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'EVER_DEPRESSED'])
highGroup = depressData[depressData['EVER_DEPRESSED'] == 1]
lowGroup = depressData[depressData['EVER_DEPRESSED'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[611, 3360]
In [105]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.65306931144
5.6766685368
4.87054911251
4.65215551788
Ttest_indResult(statistic=-0.34017626272419571, pvalue=0.73380962150620221)
In [106]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
615.885433715
612.70922619
290.938776188
276.554882886
Ttest_indResult(statistic=0.24990739118338492, pvalue=0.80272136289745177)
In [262]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.35594639866
-0.325189681335
RanksumsResult(statistic=-0.24589731624280414, pvalue=0.80576172138837321)
In [263]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.17571945979076775, 0.13232279988111606]
[1.0087081078131384, 0.31311464223423946]
In [854]:
# traditional moderation analyses
depressData['RISK_1K'] = preprocessing.scale(depressData['RISK_1K'].values)
depressData['PATIENCE_MONTH'] = preprocessing.scale(depressData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*EVER_DEPRESSED', data=depressData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*EVER_DEPRESSED', data=depressData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     26.66
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           4.55e-17
Time:                        10:33:09   Log-Likelihood:                -5595.0
No. Observations:                3971   AIC:                         1.120e+04
Df Residuals:                    3967   BIC:                         1.122e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=================================================================================================
                                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------------
Intercept                        -0.0021      0.017     -0.121      0.904        -0.036     0.031
PATIENCE_MONTH                    0.1319      0.017      7.680      0.000         0.098     0.166
EVER_DEPRESSED                    0.0140      0.044      0.322      0.748        -0.071     0.099
PATIENCE_MONTH:EVER_DEPRESSED     0.0470      0.043      1.101      0.271        -0.037     0.131
==============================================================================
Omnibus:                      256.203   Durbin-Watson:                   1.965
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               93.858
Skew:                          -0.065   Prob(JB):                     4.16e-21
Kurtosis:                       2.258   Cond. No.                         2.85
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     26.57
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           5.18e-17
Time:                        10:33:09   Log-Likelihood:                -5595.1
No. Observations:                3971   AIC:                         1.120e+04
Df Residuals:                    3967   BIC:                         1.122e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==========================================================================================
                             coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
Intercept                  0.0026      0.017      0.151      0.880        -0.031     0.036
RISK_1K                    0.1328      0.017      7.707      0.000         0.099     0.167
EVER_DEPRESSED            -0.0172      0.044     -0.394      0.693        -0.103     0.068
RISK_1K:EVER_DEPRESSED     0.0398      0.042      0.946      0.344        -0.043     0.122
==============================================================================
Omnibus:                      579.318   Durbin-Watson:                   1.770
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1636.930
Skew:                          -0.779   Prob(JB):                         0.00
Kurtosis:                       5.732   Cond. No.                         2.86
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Weight Loss¶

In [107]:
#1 Lose weight
#2 Gain weight
#3 Stay about the same
#4 Not trying to do anything

weightData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'TRYING_LOSE_WEIGHT'])
highGroup = weightData[weightData['TRYING_LOSE_WEIGHT'] > 1]
lowGroup = weightData[weightData['TRYING_LOSE_WEIGHT'] == 1]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[2919, 2729]
In [108]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.70203835072
5.64236030239
4.64260162288
4.52453820347
Ttest_indResult(statistic=1.4722776592333751, pvalue=0.14100179375868077)
In [109]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
613.126070572
608.011359472
281.558750126
275.916091413
Ttest_indResult(statistic=0.68920106505035605, pvalue=0.49072513109887506)
In [268]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.274114345844
-0.318488023952
RanksumsResult(statistic=1.1663044884744089, pvalue=0.24349135910037456)
In [270]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.1380669791268532, 0.13686265227323149]
[0.046075470732227626, 0.96325009675926365]
In [269]:
# traditional moderation analyses
weightData['RISK_1K'] = preprocessing.scale(weightData['RISK_1K'].values)
weightData['PATIENCE_MONTH'] = preprocessing.scale(weightData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*TRYING_LOSE_WEIGHT', data=weightData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*TRYING_LOSE_WEIGHT', data=weightData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     36.40
Date:                Fri, 20 May 2016   Prob (F-statistic):           2.73e-23
Time:                        15:59:09   Log-Likelihood:                -7960.1
No. Observations:                5648   AIC:                         1.593e+04
Df Residuals:                    5644   BIC:                         1.595e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================================
                                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------------------
Intercept                            -0.0079      0.027     -0.296      0.767        -0.060     0.045
PATIENCE_MONTH                        0.1440      0.027      5.344      0.000         0.091     0.197
TRYING_LOSE_WEIGHT                    0.0036      0.010      0.340      0.734        -0.017     0.024
PATIENCE_MONTH:TRYING_LOSE_WEIGHT    -0.0028      0.010     -0.268      0.788        -0.023     0.018
==============================================================================
Omnibus:                      383.685   Durbin-Watson:                   1.936
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              136.107
Skew:                          -0.054   Prob(JB):                     2.79e-30
Kurtosis:                       2.247   Cond. No.                         5.94
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     36.38
Date:                Fri, 20 May 2016   Prob (F-statistic):           2.82e-23
Time:                        15:59:09   Log-Likelihood:                -7960.1
No. Observations:                5648   AIC:                         1.593e+04
Df Residuals:                    5644   BIC:                         1.595e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================================
                                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------------
Intercept                      0.0053      0.027      0.196      0.845        -0.047     0.058
RISK_1K                        0.1443      0.027      5.349      0.000         0.091     0.197
TRYING_LOSE_WEIGHT            -0.0024      0.010     -0.224      0.822        -0.023     0.018
RISK_1K:TRYING_LOSE_WEIGHT    -0.0030      0.010     -0.282      0.778        -0.024     0.018
==============================================================================
Omnibus:                      881.893   Durbin-Watson:                   1.793
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2426.870
Skew:                          -0.842   Prob(JB):                         0.00
Kurtosis:                       5.734   Cond. No.                         5.90
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Fighting¶

In [177]:
fightData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIGHT'])
In [178]:
# Make shoplifting a binary variable
fightData.replace({'FIGHT': {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
In [179]:
highGroup = fightData[fightData['FIGHT'] == 1]
lowGroup = fightData[fightData['FIGHT'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1523, 3842]
In [180]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.78616076156
5.62657119533
4.44807599975
4.64463087392
Ttest_indResult(statistic=3.5013157730270956, pvalue=0.00047003402189015005)
In [181]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
616.701247538
606.057001562
281.165883685
276.64563247
Ttest_indResult(statistic=1.2556030701610108, pvalue=0.20936646830086492)
In [417]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.187206965841
-0.338915470494
RanksumsResult(statistic=3.1979243674597932, pvalue=0.0013842057916770847)
In [864]:
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
             4, range=[-2,2], normed=True, alpha=.5, label='Shoplifters')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
             4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
Out[864]:
<matplotlib.legend.Legend at 0x16b0ccf8>
In [418]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.14165043492868631, 0.1389130162723046]
[0.092142962363106345, 0.92658445459143235]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', fightData, hue='FIGHT', ci=0, palette=sns.cubehelix_palette(7, rot=-.5))
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [419]:
# traditional moderation analyses
fightData['RISK_1K'] = preprocessing.scale(fightData['RISK_1K'].values)
fightData['PATIENCE_MONTH'] = preprocessing.scale(fightData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIGHT', data=fightData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIGHT', data=fightData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     36.12
Date:                Mon, 23 May 2016   Prob (F-statistic):           4.20e-23
Time:                        12:01:32   Log-Likelihood:                -7558.9
No. Observations:                5365   AIC:                         1.513e+04
Df Residuals:                    5361   BIC:                         1.515e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
========================================================================================
                           coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------
Intercept               -0.0068      0.016     -0.425      0.671        -0.038     0.025
PATIENCE_MONTH           0.1373      0.016      8.652      0.000         0.106     0.168
FIGHT                    0.0232      0.030      0.773      0.439        -0.036     0.082
PATIENCE_MONTH:FIGHT     0.0091      0.030      0.300      0.764        -0.050     0.069
==============================================================================
Omnibus:                      346.397   Durbin-Watson:                   1.943
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              124.843
Skew:                          -0.041   Prob(JB):                     7.77e-28
Kurtosis:                       2.257   Cond. No.                         2.51
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     39.62
Date:                Mon, 23 May 2016   Prob (F-statistic):           2.59e-25
Time:                        12:01:32   Log-Likelihood:                -7553.8
No. Observations:                5365   AIC:                         1.512e+04
Df Residuals:                    5361   BIC:                         1.514e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------
Intercept        -0.0282      0.016     -1.765      0.078        -0.059     0.003
RISK_1K           0.1405      0.016      8.762      0.000         0.109     0.172
FIGHT             0.0993      0.030      3.315      0.001         0.041     0.158
RISK_1K:FIGHT    -0.0035      0.030     -0.118      0.906        -0.062     0.055
==============================================================================
Omnibus:                      834.674   Durbin-Watson:                   1.791
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2337.914
Skew:                          -0.834   Prob(JB):                         0.00
Kurtosis:                       5.771   Cond. No.                         2.46
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Shoplifting¶

In [154]:
shopliftData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'SHOPLIFT'])
highGroup = shopliftData[shopliftData['SHOPLIFT'] > 0]
lowGroup = shopliftData[shopliftData['SHOPLIFT'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1412, 3952]
In [155]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.66337516134
5.67642379321
4.49139729561
4.62425075231
Ttest_indResult(statistic=-0.27865926232271548, pvalue=0.78052911733927599)
In [156]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
612.808073654
607.73582996
275.292951081
278.943783156
Ttest_indResult(statistic=0.59202266414010907, pvalue=0.55388863378541853)
In [387]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.226878612717
-0.322693202378
RanksumsResult(statistic=2.0941785361226044, pvalue=0.036244079048706287)
In [873]:
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values, 4, range=[-2,2], normed=True, alpha=.5, label='Shoplifters')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values, 4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
Out[873]:
<matplotlib.legend.Legend at 0x19cefbe0>
In [388]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.14807491565105696, 0.13936171851969878]
[0.28670981493700853, 0.774334521737301]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', shopliftData, hue='SHOPLIFT', ci=0, palette=sns.cubehelix_palette(7, rot=-.5))
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [389]:
# Make shoplifting a binary variable
shopliftData.replace({'SHOPLIFT': {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
In [390]:
# traditional moderation analyses
shopliftData['RISK_1K'] = preprocessing.scale(shopliftData['RISK_1K'].values)
shopliftData['PATIENCE_MONTH'] = preprocessing.scale(shopliftData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*SHOPLIFT', data=shopliftData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*SHOPLIFT', data=shopliftData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     36.71
Date:                Mon, 23 May 2016   Prob (F-statistic):           1.79e-23
Time:                        11:30:40   Log-Likelihood:                -7556.6
No. Observations:                5364   AIC:                         1.512e+04
Df Residuals:                    5360   BIC:                         1.515e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===========================================================================================
                              coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------
Intercept                  -0.0051      0.016     -0.325      0.745        -0.036     0.026
PATIENCE_MONTH              0.1391      0.016      8.877      0.000         0.108     0.170
SHOPLIFT                    0.0195      0.031      0.635      0.525        -0.041     0.080
PATIENCE_MONTH:SHOPLIFT     0.0096      0.031      0.310      0.757        -0.051     0.070
==============================================================================
Omnibus:                      344.904   Durbin-Watson:                   1.936
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              124.525
Skew:                          -0.041   Prob(JB):                     9.11e-28
Kurtosis:                       2.258   Cond. No.                         2.48
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     36.60
Date:                Mon, 23 May 2016   Prob (F-statistic):           2.07e-23
Time:                        11:30:40   Log-Likelihood:                -7556.8
No. Observations:                5364   AIC:                         1.512e+04
Df Residuals:                    5360   BIC:                         1.515e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
====================================================================================
                       coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------
Intercept            0.0029      0.016      0.186      0.853        -0.028     0.034
RISK_1K              0.1396      0.016      8.891      0.000         0.109     0.170
SHOPLIFT            -0.0112      0.031     -0.365      0.715        -0.071     0.049
RISK_1K:SHOPLIFT     0.0078      0.031      0.254      0.800        -0.053     0.068
==============================================================================
Omnibus:                      842.463   Durbin-Watson:                   1.786
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2370.186
Skew:                          -0.840   Prob(JB):                         0.00
Kurtosis:                       5.790   Cond. No.                         2.48
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Health Insurance¶

In [112]:
healthinsurData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'HEALTH_INSUR'])
highGroup = healthinsurData[healthinsurData['HEALTH_INSUR'] == 1]
lowGroup = healthinsurData[healthinsurData['HEALTH_INSUR'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[4631, 1014]
In [113]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.58956314789
6.05863554695
4.65023853306
4.03678297557
Ttest_indResult(statistic=-9.5107947470703547, pvalue=6.6807247056779045e-21)
In [114]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
609.241200605
617.83234714
276.049638764
291.149523219
Ttest_indResult(statistic=-0.85851895956311863, pvalue=0.39074899294251009)
In [285]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.326660800704
-0.150614754098
RanksumsResult(statistic=-4.2440190906968578, pvalue=2.1955166181168712e-05)
In [286]:
print(np.mean(highGroup['RISK_INCOME_MID_RISK']))
print(np.mean(lowGroup['RISK_INCOME_MID_RISK']))
print(stats.ranksums(highGroup['RISK_INCOME_MID_RISK'], lowGroup['RISK_INCOME_MID_RISK']))
0.404222564328
0.476482617587
RanksumsResult(statistic=-4.2872174061901864, pvalue=1.8092520188330801e-05)
In [288]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.1351536033583042, 0.14683529589736968]
[0.34332301923714525, 0.73135546521724759]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', data, hue='HEALTH_INSUR')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [883]:
# tradtiional moderation analyses
healthinsurData['RISK_1K'] = preprocessing.scale(healthinsurData['RISK_1K'].values)
healthinsurData['PATIENCE_MONTH'] = preprocessing.scale(healthinsurData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*HEALTH_INSUR', data=healthinsurData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*HEALTH_INSUR', data=healthinsurData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     36.51
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           2.31e-23
Time:                        10:34:05   Log-Likelihood:                -7955.6
No. Observations:                5645   AIC:                         1.592e+04
Df Residuals:                    5641   BIC:                         1.595e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===============================================================================================
                                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------------
Intercept                      -0.0170      0.032     -0.527      0.598        -0.080     0.046
PATIENCE_MONTH                  0.1673      0.034      4.928      0.000         0.101     0.234
HEALTH_INSUR                    0.0188      0.035      0.531      0.595        -0.051     0.088
PATIENCE_MONTH:HEALTH_INSUR    -0.0347      0.037     -0.942      0.346        -0.107     0.038
==============================================================================
Omnibus:                      382.130   Durbin-Watson:                   1.937
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              135.633
Skew:                          -0.052   Prob(JB):                     3.53e-30
Kurtosis:                       2.248   Cond. No.                         5.49
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.032
Method:                 Least Squares   F-statistic:                     63.13
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           3.85e-40
Time:                        10:34:05   Log-Likelihood:                -7916.7
No. Observations:                5645   AIC:                         1.584e+04
Df Residuals:                    5641   BIC:                         1.587e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
========================================================================================
                           coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------
Intercept                0.2494      0.031      8.068      0.000         0.189     0.310
RISK_1K                  0.1288      0.030      4.353      0.000         0.071     0.187
HEALTH_INSUR            -0.3039      0.034     -8.907      0.000        -0.371    -0.237
RISK_1K:HEALTH_INSUR     0.0089      0.033      0.270      0.787        -0.056     0.074
==============================================================================
Omnibus:                      879.874   Durbin-Watson:                   1.812
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2437.228
Skew:                          -0.839   Prob(JB):                         0.00
Kurtosis:                       5.747   Cond. No.                         4.54
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Doctor Visits¶

In [275]:
docData = data.dropna(subset=['RISK_1K', 'PATIENCE_MONTH', 'DOC_VISIT'])
len(docData)
Out[275]:
5630
In [884]:
#sns.factorplot('DOC_VISIT', 'RISK_INCOME', data=data.dropna(subset=['DOC_VISIT', 'RISK_INCOME']))
#sns.factorplot('DOC_VISIT', 'PATIENCE_MONTH', data=data.dropna(subset=['DOC_VISIT', 'PATIENCE_MONTH']))

g = sns.PairGrid(docData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='DOC_VISIT')
g.map(sns.pointplot)
In [273]:
delaydocCorr = docData['PATIENCE_MONTH'].corr(docData['DOC_VISIT'])
riskdocCorr = docData['RISK_1K'].corr(docData['DOC_VISIT'])
delayriskCorr = docData['PATIENCE_MONTH'].corr(docData['RISK_1K'])

z,p = corrstats.dependent_corr(delaydocCorr, riskdocCorr, delayriskCorr, len(docData))
print([delaydocCorr, riskdocCorr])
print([z, p])
[-0.036593402761855728, -0.028163998556457175]
[-0.55278288489606398, 0.5804287303271507]
In [276]:
# tradtiional moderation analyses
docData['RISK_1K'] = preprocessing.scale(docData['RISK_1K'].values)
docData['PATIENCE_MONTH'] = preprocessing.scale(docData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*DOC_VISIT', data=docData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*DOC_VISIT', data=docData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     36.80
Date:                Fri, 20 May 2016   Prob (F-statistic):           1.51e-23
Time:                        16:25:29   Log-Likelihood:                -7933.9
No. Observations:                5630   AIC:                         1.588e+04
Df Residuals:                    5626   BIC:                         1.590e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
============================================================================================
                               coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------
Intercept                    0.0217      0.022      0.966      0.334        -0.022     0.066
PATIENCE_MONTH               0.1579      0.022      7.116      0.000         0.114     0.201
DOC_VISIT                   -0.0128      0.010     -1.230      0.219        -0.033     0.008
PATIENCE_MONTH:DOC_VISIT    -0.0121      0.010     -1.213      0.225        -0.032     0.007
==============================================================================
Omnibus:                      380.562   Durbin-Watson:                   1.936
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              135.352
Skew:                          -0.055   Prob(JB):                     4.06e-30
Kurtosis:                       2.248   Cond. No.                         4.40
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     38.95
Date:                Fri, 20 May 2016   Prob (F-statistic):           6.65e-25
Time:                        16:25:29   Log-Likelihood:                -7930.8
No. Observations:                5630   AIC:                         1.587e+04
Df Residuals:                    5626   BIC:                         1.590e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept             0.0531      0.022      2.370      0.018         0.009     0.097
RISK_1K               0.1504      0.022      6.760      0.000         0.107     0.194
DOC_VISIT            -0.0306      0.010     -2.944      0.003        -0.051    -0.010
RISK_1K:DOC_VISIT    -0.0082      0.010     -0.798      0.425        -0.028     0.012
==============================================================================
Omnibus:                      876.199   Durbin-Watson:                   1.798
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2417.940
Skew:                          -0.839   Prob(JB):                         0.00
Kurtosis:                       5.737   Cond. No.                         4.26
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Investment Preferences¶

In [886]:
print(np.mean(data['INVEST_STOCK']))
print(np.mean(data['INVEST_PRIV_BOND']))
print(np.mean(data['INVEST_GOV_BOND']))

investData = data.dropna(subset=['PATIENCE_MONTH', 'RISK_1K', 'INVEST_STOCK', 'INVEST_GOV_BOND'])

investData['bin'] = (investData['INVEST_STOCK'] == 1) & (investData['INVEST_GOV_BOND'] == 0)

highGroup = investData[investData['bin'] == 1]
lowGroup = investData[investData['bin'] == 0]

nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
0.626732230666
0.445090150499
0.64222917598
[1452, 3348]
In [887]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.60864327908
5.66676940178
(-1.2197602109808643, 0.22265948534300775)
In [888]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
620.50137741
607.060035842
(1.5473810324408408, 0.12188578477849794)
In [889]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.221988795518
-0.30523432745
(1.8008884898092523, 0.071720457546197158)
In [890]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr, p])
[0.12215140979811975, 0.14007596283542509, 0.56196232449612094]
In [891]:
# traditional moderation analyses
investData['RISK_1K'] = preprocessing.scale(investData['RISK_1K'].values)
investData['PATIENCE_MONTH'] = preprocessing.scale(investData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*bin', data=investData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*bin', data=investData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     30.46
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           1.69e-19
Time:                        10:34:08   Log-Likelihood:                -6765.6
No. Observations:                4800   AIC:                         1.354e+04
Df Residuals:                    4796   BIC:                         1.357e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================================
                                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------------
Intercept                     -0.0163      0.017     -0.952      0.341        -0.050     0.017
bin[T.True]                    0.0534      0.031      1.716      0.086        -0.008     0.115
PATIENCE_MONTH                 0.1399      0.017      8.185      0.000         0.106     0.173
PATIENCE_MONTH:bin[T.True]    -0.0175      0.031     -0.561      0.575        -0.079     0.044
==============================================================================
Omnibus:                      300.653   Durbin-Watson:                   1.954
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              110.111
Skew:                          -0.046   Prob(JB):                     1.23e-24
Kurtosis:                       2.264   Cond. No.                         2.44
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     30.16
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           2.62e-19
Time:                        10:34:08   Log-Likelihood:                -6766.1
No. Observations:                4800   AIC:                         1.354e+04
Df Residuals:                    4796   BIC:                         1.357e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=======================================================================================
                          coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------------
Intercept               0.0136      0.017      0.795      0.426        -0.020     0.047
bin[T.True]            -0.0444      0.031     -1.426      0.154        -0.106     0.017
RISK_1K                 0.1402      0.017      8.193      0.000         0.107     0.174
RISK_1K:bin[T.True]    -0.0183      0.031     -0.587      0.557        -0.079     0.043
==============================================================================
Omnibus:                      718.485   Durbin-Watson:                   1.822
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1915.179
Skew:                          -0.818   Prob(JB):                         0.00
Kurtosis:                       5.626   Cond. No.                         2.45
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Entrepreneurs¶

In [139]:
enetreData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'ENTREPRENEUR'])
highGroup = enetreData[enetreData['ENTREPRENEUR'] == 1]
lowGroup = enetreData[enetreData['ENTREPRENEUR'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[984, 4188]
In [140]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.67698736145
5.66446448971
4.27434674465
4.60172724953
Ttest_indResult(statistic=0.24086395494113869, pvalue=0.80969273614379766)
In [141]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
626.291666667
604.597421203
275.306442881
277.656390985
Ttest_indResult(statistic=2.2197519577036822, pvalue=0.026585800208192633)
In [351]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.175703858186
-0.332115009747
RanksumsResult(statistic=3.3336821254918929, pvalue=0.00085704542195754688)
In [896]:
p = plt.hist(highGroup['RISK_1K'].values, normed=True, bins=10, alpha = .5, label=['Entrepreneurs'])
p = plt.hist(lowGroup['RISK_1K'].values, normed=True, bins=10, alpha = .5, label=['Not'])
p = plt.legend()
In [897]:
p = plt.hist(highGroup.dropna(subset = ['RISK_10K'])['RISK_10K'].values, normed=True, alpha = .5, label=['Entrepreneurs'])
p = plt.hist(lowGroup.dropna(subset = ['RISK_10K'])['RISK_10K'].values, normed=True, alpha = .5, label=['Not'])
p = plt.legend()
In [353]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.091883441851504613, 0.14848799858156025]
[1.6195710146872915, 0.10532446019107788]
In [354]:
# traditional moderation analyses
enetreData['RISK_1K'] = preprocessing.scale(enetreData['RISK_1K'].values)
enetreData['PATIENCE_MONTH'] = preprocessing.scale(enetreData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*ENTREPRENEUR', data=enetreData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*ENTREPRENEUR', data=enetreData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     35.91
Date:                Mon, 23 May 2016   Prob (F-statistic):           5.82e-23
Time:                        10:25:09   Log-Likelihood:                -7285.4
No. Observations:                5172   AIC:                         1.458e+04
Df Residuals:                    5168   BIC:                         1.461e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===============================================================================================
                                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------------
Intercept                      -0.0147      0.015     -0.958      0.338        -0.045     0.015
PATIENCE_MONTH                  0.1473      0.015      9.716      0.000         0.118     0.177
ENTREPRENEUR                    0.0774      0.035      2.205      0.027         0.009     0.146
PATIENCE_MONTH:ENTREPRENEUR    -0.0523      0.036     -1.446      0.148        -0.123     0.019
==============================================================================
Omnibus:                      315.730   Durbin-Watson:                   1.963
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              116.989
Skew:                          -0.048   Prob(JB):                     3.95e-26
Kurtosis:                       2.269   Cond. No.                         2.73
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     34.57
Date:                Mon, 23 May 2016   Prob (F-statistic):           4.11e-22
Time:                        10:25:09   Log-Likelihood:                -7287.4
No. Observations:                5172   AIC:                         1.458e+04
Df Residuals:                    5168   BIC:                         1.461e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
========================================================================================
                           coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------
Intercept                0.0007      0.015      0.043      0.966        -0.029     0.031
RISK_1K                  0.1497      0.015      9.790      0.000         0.120     0.180
ENTREPRENEUR             0.0004      0.035      0.012      0.990        -0.068     0.069
RISK_1K:ENTREPRENEUR    -0.0608      0.035     -1.722      0.085        -0.130     0.008
==============================================================================
Omnibus:                      773.767   Durbin-Watson:                   1.797
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2075.190
Skew:                          -0.817   Prob(JB):                         0.00
Kurtosis:                       5.638   Cond. No.                         2.73
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Exercise¶

In [97]:
exerciseData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'EXERCISE'])
highGroup = exerciseData[exerciseData['EXERCISE'] > 0]
lowGroup = exerciseData[exerciseData['EXERCISE'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[2105, 3543]
In [98]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.6558921239
5.68348808641
4.73472562172
4.49920263743
Ttest_indResult(statistic=-0.6526803871123984, pvalue=0.51399721757879102)
In [99]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
611.077909739
610.403330511
277.966264655
279.386634462
Ttest_indResult(statistic=0.088001497279122515, pvalue=0.92987945687816098)
In [240]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.302139037433
-0.291690709752
RanksumsResult(statistic=-0.2917597663572612, pvalue=0.77047030921631532)
In [244]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.15060096379238691, 0.12978796260445696]
[0.77103530566197132, 0.44068600506029987]
In [241]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', data, hue='EXERCISE')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,16)
Out[241]:
(0, 16)
In [242]:
# traditional moderation analyses
exerciseData['RISK_1K'] = preprocessing.scale(exerciseData['RISK_1K'].values)
exerciseData['PATIENCE_MONTH'] = preprocessing.scale(exerciseData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*EXERCISE', data=exerciseData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*EXERCISE', data=exerciseData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     36.45
Date:                Fri, 20 May 2016   Prob (F-statistic):           2.52e-23
Time:                        15:13:35   Log-Likelihood:                -7960.0
No. Observations:                5648   AIC:                         1.593e+04
Df Residuals:                    5644   BIC:                         1.595e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===========================================================================================
                              coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------
Intercept                  -0.0018      0.017     -0.108      0.914        -0.034     0.031
PATIENCE_MONTH              0.1317      0.017      7.812      0.000         0.099     0.165
EXERCISE                    0.0050      0.027      0.183      0.855        -0.048     0.058
PATIENCE_MONTH:EXERCISE     0.0154      0.027      0.568      0.570        -0.038     0.068
==============================================================================
Omnibus:                      382.846   Durbin-Watson:                   1.937
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              135.928
Skew:                          -0.054   Prob(JB):                     3.04e-30
Kurtosis:                       2.248   Cond. No.                         2.44
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     36.80
Date:                Fri, 20 May 2016   Prob (F-statistic):           1.51e-23
Time:                        15:13:35   Log-Likelihood:                -7959.5
No. Observations:                5648   AIC:                         1.593e+04
Df Residuals:                    5644   BIC:                         1.595e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
====================================================================================
                       coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------
Intercept            0.0069      0.017      0.413      0.680        -0.026     0.039
RISK_1K              0.1279      0.017      7.699      0.000         0.095     0.160
EXERCISE            -0.0185      0.027     -0.677      0.498        -0.072     0.035
RISK_1K:EXERCISE     0.0263      0.027      0.964      0.335        -0.027     0.080
==============================================================================
Omnibus:                      882.313   Durbin-Watson:                   1.794
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2431.115
Skew:                          -0.842   Prob(JB):                         0.00
Kurtosis:                       5.737   Cond. No.                         2.43
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Sex¶

In [25]:
sexData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'SEX'])
maleData = sexData[sexData['SEX'] == 1]
femaleData = sexData[sexData['SEX'] == 2]
nMales = len(maleData)
nFemales = len(femaleData)
print([nMales, nFemales])
[2790, 2858]
In [66]:
print(np.mean(maleData['PATIENCE_MONTH']))
print(np.mean(femaleData['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(maleData['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(femaleData['PATIENCE_MONTH'])))

print(stats.ttest_ind(maleData['PATIENCE_MONTH'], femaleData['PATIENCE_MONTH'], equal_var=False))
5.58812965365
5.75625244132
4.444456685
4.70593545805
Ttest_indResult(statistic=-4.1547474960714448, pvalue=3.3045818393888525e-05)
In [68]:
print(np.mean(maleData['RISK_1K']))
print(np.mean(femaleData['RISK_1K']))

print(np.std(maleData['RISK_1K']))
print(np.std(femaleData['RISK_1K']))

print(stats.ttest_ind(maleData['RISK_1K'], femaleData['RISK_1K'], equal_var=False))
605.372759857
615.811056683
275.166203493
282.320692472
Ttest_indResult(statistic=-1.4069066435664053, pvalue=0.15951011552429775)
In [29]:
print(np.mean(maleData['RISK_INCOME']))
print(np.mean(femaleData['RISK_INCOME']))
print(stats.ranksums(maleData['RISK_INCOME'], femaleData['RISK_INCOME']))
-0.192647058824
-0.395469140207
RanksumsResult(statistic=5.5979419216332182, pvalue=2.1691138843642794e-08)
In [30]:
maleCorr = maleData['PATIENCE_MONTH'].corr(maleData['RISK_1K'])
femaleCorr = femaleData['PATIENCE_MONTH'].corr(femaleData['RISK_1K'])
print([maleCorr, femaleCorr])
z,p = corrstats.independent_corr(maleCorr, femaleCorr, nMales, nFemales)
print([z, p])
[0.11351492211428131, 0.15822447055853112]
[1.7109085826842465, 0.087097989987706903]
In [912]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', sexData, hue='SEX')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
Out[912]:
(0, 15)
In [913]:
# tradtiional moderation analyses
sexData['RISK_1K'] = preprocessing.scale(sexData['RISK_1K'].values)
sexData['PATIENCE_MONTH'] = preprocessing.scale(sexData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*SEX', data=sexData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*SEX', data=sexData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     37.48
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           5.64e-24
Time:                        10:34:14   Log-Likelihood:                -7958.5
No. Observations:                5648   AIC:                         1.592e+04
Df Residuals:                    5644   BIC:                         1.595e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
Intercept             -0.0350      0.042     -0.836      0.403        -0.117     0.047
PATIENCE_MONTH         0.0712      0.042      1.679      0.093        -0.012     0.154
SEX                    0.0225      0.026      0.850      0.395        -0.029     0.074
PATIENCE_MONTH:SEX     0.0432      0.026      1.633      0.103        -0.009     0.095
==============================================================================
Omnibus:                      376.479   Durbin-Watson:                   1.937
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              134.579
Skew:                          -0.053   Prob(JB):                     5.98e-30
Kurtosis:                       2.251   Cond. No.                         7.16
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.022
Method:                 Least Squares   F-statistic:                     42.81
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           2.40e-27
Time:                        10:34:14   Log-Likelihood:                -7950.6
No. Observations:                5648   AIC:                         1.591e+04
Df Residuals:                    5644   BIC:                         1.594e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------
Intercept      -0.1590      0.042     -3.806      0.000        -0.241    -0.077
RISK_1K         0.0664      0.042      1.576      0.115        -0.016     0.149
SEX             0.1053      0.026      3.999      0.000         0.054     0.157
RISK_1K:SEX     0.0463      0.026      1.756      0.079        -0.005     0.098
==============================================================================
Omnibus:                      897.158   Durbin-Watson:                   1.795
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2520.916
Skew:                          -0.849   Prob(JB):                         0.00
Kurtosis:                       5.798   Cond. No.                         7.01
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

BMI¶

In [47]:
bmiData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'BMI'])

nBMI = len(bmiData)
pCorr = bmiData['PATIENCE_MONTH'].corr(bmiData['BMI'])
rCorr = bmiData['RISK_1K'].corr(bmiData['BMI'])
print(nBMI)
print(pCorr)
print(rCorr)
prCorr = bmiData['RISK_1K'].corr(bmiData['PATIENCE_MONTH'])
z,p = corrstats.dependent_corr(pCorr, rCorr, prCorr, nBMI)
print([z, p])
5486
0.0768470059948
-0.0113165388086
[4.9891463586644607, 6.2518042676096286e-07]
In [48]:
nCat = 5

bins = []
for i in range(nCat):
    i = float(i)
    bins.append(bmiData['BMI'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)

binned = np.digitize(bmiData['BMI'], bins)
bmiData['bin'] = binned

g = sns.PairGrid(bmiData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
-c:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[48]:
<seaborn.axisgrid.PairGrid at 0xc072748>
In [915]:
sns.lmplot('RISK_1K', 'PATIENCE_MONTH', bmiData, col='bin', palette="PuBuGn_d", size=3, x_jitter=50)
Out[915]:
<seaborn.axisgrid.FacetGrid at 0x19bacf28>
In [916]:
corrs = []
for i in range(nCat):
    corrs.append(bmiData[bmiData['bin'] == i]['PATIENCE_MONTH'].corr(bmiData[bmiData['bin'] == i]['RISK_1K']))

p = sns.pointplot('bin', 'correlation', data=pd.DataFrame({'bin' : range(len(corrs)), 'correlation' : corrs}))
In [49]:
# Traditional moderation analyses
bmiData['RISK_1K'] = preprocessing.scale(bmiData['RISK_1K'].values)
bmiData['PATIENCE_MONTH'] = preprocessing.scale(bmiData['PATIENCE_MONTH'].values)
bmiData['BMI'] = preprocessing.scale(bmiData['BMI'].values)
In [53]:
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*BMI', data=bmiData).fit()
print mod.summary()
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*BMI', data=bmiData).fit()
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                  0.025
Method:                 Least Squares   F-statistic:                     47.90
Date:                Mon, 16 May 2016   Prob (F-statistic):           1.49e-30
Time:                        15:36:18   Log-Likelihood:                -7713.3
No. Observations:                5486   AIC:                         1.543e+04
Df Residuals:                    5482   BIC:                         1.546e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------
Intercept      -0.0001      0.013     -0.010      0.992        -0.026     0.026
RISK_1K         0.1396      0.013     10.469      0.000         0.113     0.166
BMI             0.0773      0.013      5.770      0.000         0.051     0.104
RISK_1K:BMI    -0.0119      0.013     -0.933      0.351        -0.037     0.013
==============================================================================
Omnibus:                      862.326   Durbin-Watson:                   1.804
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2444.519
Skew:                          -0.838   Prob(JB):                         0.00
Kurtosis:                       5.808   Cond. No.                         1.11
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     36.82
Date:                Mon, 16 May 2016   Prob (F-statistic):           1.49e-23
Time:                        15:36:19   Log-Likelihood:                -7729.6
No. Observations:                5486   AIC:                         1.547e+04
Df Residuals:                    5482   BIC:                         1.549e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
Intercept              0.0004      0.013      0.030      0.976        -0.026     0.027
PATIENCE_MONTH         0.1398      0.013     10.361      0.000         0.113     0.166
BMI                   -0.0220      0.013     -1.637      0.102        -0.048     0.004
PATIENCE_MONTH:BMI    -0.0052      0.014     -0.385      0.700        -0.032     0.021
==============================================================================
Omnibus:                      367.031   Durbin-Watson:                   1.938
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              130.585
Skew:                          -0.047   Prob(JB):                     4.40e-29
Kurtosis:                       2.250   Cond. No.                         1.16
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Income¶

In [277]:
incomeData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'INCOME'])
print(len(incomeData))
5370
In [280]:
delayincomeCorr = incomeData['PATIENCE_MONTH'].corr(incomeData['INCOME'])
riskincomeCorr = incomeData['RISK_1K'].corr(incomeData['INCOME'])
delayriskCorr = incomeData['PATIENCE_MONTH'].corr(incomeData['RISK_1K'])

z,p = corrstats.dependent_corr(delayincomeCorr, riskincomeCorr, delayriskCorr, len(incomeData))
print([delayincomeCorr, riskincomeCorr])
print([z, p])
[-0.1410882880676359, 0.013142665931087585]
[-8.6729076937749046, 0.0]
In [918]:
nCat = 5
bins = []
for i in range(nCat):
    i = float(i)
    bins.append(incomeData['INCOME'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)

binned = np.digitize(incomeData['INCOME'], bins)
incomeData['bin'] = binned

g = sns.PairGrid(incomeData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
In [919]:
sns.lmplot('RISK_1K', 'PATIENCE_MONTH', incomeData, col='bin', palette="PuBuGn_d", size=3, x_jitter=50)
Out[919]:
<seaborn.axisgrid.FacetGrid at 0x26e28198>
In [920]:
corrs = []
for i in range(nCat):
    corrs.append(incomeData[incomeData['bin'] == i]['PATIENCE_MONTH'].corr(incomeData[incomeData['bin'] == i]['RISK_1K']))

p = sns.pointplot('bin', 'correlation', data=pd.DataFrame({'bin' : range(len(corrs)), 'correlation' : corrs}))
In [281]:
# Traditional moderation analyses
incomeData['RISK_1K'] = preprocessing.scale(incomeData['RISK_1K'].values)
incomeData['PATIENCE_MONTH'] = preprocessing.scale(incomeData['PATIENCE_MONTH'].values)
incomeData['INCOME'] = preprocessing.scale(incomeData['INCOME'].values)
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*INCOME', data=incomeData).fit()
print mod.summary()
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*INCOME', data=incomeData).fit()
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.039
Model:                            OLS   Adj. R-squared:                  0.038
Method:                 Least Squares   F-statistic:                     71.65
Date:                Fri, 20 May 2016   Prob (F-statistic):           1.97e-45
Time:                        16:34:46   Log-Likelihood:                -7514.2
No. Observations:                5370   AIC:                         1.504e+04
Df Residuals:                    5366   BIC:                         1.506e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept       7.974e-05      0.013      0.006      0.995        -0.026     0.026
RISK_1K            0.1360      0.013     10.142      0.000         0.110     0.162
INCOME            -0.1432      0.013    -10.683      0.000        -0.170    -0.117
RISK_1K:INCOME    -0.0061      0.013     -0.476      0.634        -0.031     0.019
==============================================================================
Omnibus:                      869.355   Durbin-Watson:                   1.827
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2386.414
Skew:                          -0.871   Prob(JB):                         0.00
Kurtosis:                       5.762   Cond. No.                         1.09
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     35.57
Date:                Fri, 20 May 2016   Prob (F-statistic):           9.34e-23
Time:                        16:34:46   Log-Likelihood:                -7566.8
No. Observations:                5370   AIC:                         1.514e+04
Df Residuals:                    5366   BIC:                         1.517e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------
Intercept                -0.0029      0.014     -0.213      0.832        -0.030     0.024
PATIENCE_MONTH            0.1405      0.014     10.260      0.000         0.114     0.167
INCOME                    0.0382      0.014      2.698      0.007         0.010     0.066
PATIENCE_MONTH:INCOME    -0.0206      0.014     -1.447      0.148        -0.048     0.007
==============================================================================
Omnibus:                      362.067   Durbin-Watson:                   1.925
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              128.096
Skew:                          -0.042   Prob(JB):                     1.53e-28
Kurtosis:                       2.248   Cond. No.                         1.40
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

IQ¶

In [41]:
iqData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'IQ'])

nIQ = len(iqData)
pCorr = iqData['PATIENCE_MONTH'].corr(iqData['IQ'])
rCorr = iqData['RISK_1K'].corr(iqData['IQ'])
print()
print(pCorr)
print(rCorr)
prCorr = iqData['RISK_1K'].corr(iqData['PATIENCE_MONTH'])
z,p = corrstats.dependent_corr(pCorr, rCorr, prCorr, nIQ)
print([z, p])
()
-0.312883076283
-0.0609658271817
[-14.772734910960727, 0.0]
In [ ]:
nCat = 5
bins = []
for i in range(nCat):
    i = float(i)
    bins.append(iqData['IQ'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)

binned = np.digitize(iqData['IQ'], bins)
iqData['bin'] = binned
for i in range(nCat):
    print(iqData[iqData['bin'] == i]['PATIENCE_MONTH'].corr(iqData[iqData['bin'] == i]['RISK_1K']))

g = sns.PairGrid(iqData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
In [925]:
sns.lmplot('RISK_1K', 'PATIENCE_MONTH', iqData, col='bin', palette="PuBuGn_d", size=3, x_jitter=50)
Out[925]:
<seaborn.axisgrid.FacetGrid at 0x33b4c0b8>
In [926]:
corrs = []
for i in range(nCat):
    corrs.append(iqData[iqData['bin'] == i]['PATIENCE_MONTH'].corr(iqData[iqData['bin'] == i]['RISK_1K']))

p = sns.pointplot('bin', 'correlation', data=pd.DataFrame({'bin' : range(len(corrs)), 'correlation' : corrs}))
In [927]:
# traditional moderation analyses
iqData['RISK_1K'] = preprocessing.scale(iqData['RISK_1K'].values)
iqData['PATIENCE_MONTH'] = preprocessing.scale(iqData['PATIENCE_MONTH'].values)
iqData['IQ'] = iqData['IQ'].astype(float)
iqData['IQ'] = preprocessing.scale(iqData['IQ'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*IQ', data=iqData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*IQ', data=iqData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     38.70
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           9.73e-25
Time:                        10:35:30   Log-Likelihood:                -7650.2
No. Observations:                5432   AIC:                         1.531e+04
Df Residuals:                    5428   BIC:                         1.533e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept            -0.0148      0.014     -1.051      0.293        -0.042     0.013
PATIENCE_MONTH        0.1345      0.014      9.455      0.000         0.107     0.162
IQ                   -0.0238      0.014     -1.681      0.093        -0.052     0.004
PATIENCE_MONTH:IQ    -0.0474      0.014     -3.464      0.001        -0.074    -0.021
==============================================================================
Omnibus:                      338.219   Durbin-Watson:                   1.931
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              124.518
Skew:                          -0.052   Prob(JB):                     9.14e-28
Kurtosis:                       2.266   Cond. No.                         1.47
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.112
Model:                            OLS   Adj. R-squared:                  0.111
Method:                 Least Squares   F-statistic:                     228.0
Date:                Thu, 18 Jun 2015   Prob (F-statistic):          2.47e-139
Time:                        10:35:30   Log-Likelihood:                -7385.3
No. Observations:                5432   AIC:                         1.478e+04
Df Residuals:                    5428   BIC:                         1.481e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -0.0012      0.013     -0.096      0.923        -0.026     0.024
RISK_1K        0.1148      0.013      8.902      0.000         0.090     0.140
IQ            -0.3062      0.013    -23.890      0.000        -0.331    -0.281
RISK_1K:IQ    -0.0202      0.013     -1.540      0.124        -0.046     0.006
==============================================================================
Omnibus:                      893.422   Durbin-Watson:                   1.921
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2706.545
Skew:                          -0.854   Prob(JB):                         0.00
Kurtosis:                       6.007   Cond. No.                         1.15
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
-c:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

Conviction¶

In [182]:
conData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CONVICT'])

highGroup = conData[conData['CONVICT'] == 1]
lowGroup = conData[conData['CONVICT'] == 0]

nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[263, 5220]
In [183]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.63778121671
5.67494693309
4.13827114992
4.60438129591
Ttest_indResult(statistic=-0.41178530242425387, pvalue=0.68079762277533762)
In [184]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
591.372623574
611.15651341
284.918609437
278.099931434
Ttest_indResult(statistic=-1.0979857396215484, pvalue=0.27312901203236883)
In [425]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.169260700389
-0.30156555773
RanksumsResult(statistic=1.5021222628716622, pvalue=0.13306553627343029)
In [427]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])

z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
[-0.048031352429710773, 0.1492028300342898]
[3.1221434010069342, 0.0017953946320077296]
In [933]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', conData, hue='CONVICT')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
Out[933]:
(0, 15)
In [934]:
# traditional moderation analyses
conData['RISK_1K'] = preprocessing.scale(conData['RISK_1K'].values)
conData['PATIENCE_MONTH'] = preprocessing.scale(conData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*CONVICT', data=conData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*CONVICT', data=conData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     40.10
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           1.26e-25
Time:                        10:35:33   Log-Likelihood:                -7720.5
No. Observations:                5483   AIC:                         1.545e+04
Df Residuals:                    5479   BIC:                         1.548e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==========================================================================================
                             coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
Intercept                  0.0032      0.014      0.236      0.813        -0.024     0.030
PATIENCE_MONTH             0.1485      0.014     10.879      0.000         0.122     0.175
CONVICT                   -0.0721      0.063     -1.153      0.249        -0.195     0.051
PATIENCE_MONTH:CONVICT    -0.2012      0.067     -3.012      0.003        -0.332    -0.070
==============================================================================
Omnibus:                      355.088   Durbin-Watson:                   1.942
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              128.141
Skew:                          -0.047   Prob(JB):                     1.49e-28
Kurtosis:                       2.257   Cond. No.                         5.02
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     40.04
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           1.37e-25
Time:                        10:35:33   Log-Likelihood:                -7720.6
No. Observations:                5483   AIC:                         1.545e+04
Df Residuals:                    5479   BIC:                         1.548e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===================================================================================
                      coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------
Intercept           0.0007      0.014      0.048      0.962        -0.026     0.028
RISK_1K             0.1499      0.014     10.929      0.000         0.123     0.177
CONVICT            -0.0269      0.063     -0.429      0.668        -0.150     0.096
RISK_1K:CONVICT    -0.1937      0.061     -3.165      0.002        -0.314    -0.074
==============================================================================
Omnibus:                      840.987   Durbin-Watson:                   1.788
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2322.810
Skew:                          -0.827   Prob(JB):                         0.00
Kurtosis:                       5.726   Cond. No.                         4.80
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Credit Card Missed Payment¶

In [142]:
misssedpayData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CCMISSPAY'])

highGroup = misssedpayData[misssedpayData['CCMISSPAY'] == 1]
lowGroup = misssedpayData[misssedpayData['CCMISSPAY'] == 0]

nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1167, 4241]
In [143]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.89099152326
5.60232068657
4.28963893541
4.62993993496
Ttest_indResult(statistic=5.926433995482097, pvalue=3.6561670248433362e-09)
In [144]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
626.266495287
604.901674133
281.443049895
277.371526879
Ttest_indResult(statistic=2.3027797647922394, pvalue=0.021402574839598731)
In [358]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.320264317181
-0.291396298967
RanksumsResult(statistic=-0.30919637407061717, pvalue=0.75717215193419896)
In [359]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])

z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
[0.16667078026670271, 0.1316586647130519]
[1.0822304983936413, 0.27915011984382021]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', misssedpayData, hue='CCMISSPAY')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [360]:
# traditional moderation analyses
misssedpayData['RISK_1K'] = preprocessing.scale(misssedpayData['RISK_1K'].values)
misssedpayData['PATIENCE_MONTH'] = preprocessing.scale(misssedpayData['PATIENCE_MONTH'].values)

mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*CCMISSPAY', data=misssedpayData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*CCMISSPAY', data=misssedpayData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     37.92
Date:                Mon, 23 May 2016   Prob (F-statistic):           3.04e-24
Time:                        10:48:31   Log-Likelihood:                -7617.3
No. Observations:                5408   AIC:                         1.524e+04
Df Residuals:                    5404   BIC:                         1.527e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
============================================================================================
                               coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------
Intercept                   -0.0112      0.015     -0.738      0.461        -0.041     0.019
PATIENCE_MONTH               0.1302      0.015      8.629      0.000         0.101     0.160
CCMISSPAY                    0.0452      0.033      1.369      0.171        -0.020     0.110
PATIENCE_MONTH:CCMISSPAY     0.0458      0.034      1.354      0.176        -0.020     0.112
==============================================================================
Omnibus:                      352.308   Durbin-Watson:                   1.945
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              127.092
Skew:                          -0.051   Prob(JB):                     2.53e-28
Kurtosis:                       2.256   Cond. No.                         2.76
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.025
Model:                            OLS   Adj. R-squared:                  0.025
Method:                 Least Squares   F-statistic:                     46.93
Date:                Mon, 23 May 2016   Prob (F-statistic):           6.24e-30
Time:                        10:48:31   Log-Likelihood:                -7604.1
No. Observations:                5408   AIC:                         1.522e+04
Df Residuals:                    5404   BIC:                         1.524e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept            -0.0387      0.015     -2.555      0.011        -0.068    -0.009
RISK_1K               0.1331      0.015      8.747      0.000         0.103     0.163
CCMISSPAY             0.1781      0.033      5.447      0.000         0.114     0.242
RISK_1K:CCMISSPAY     0.0247      0.032      0.762      0.446        -0.039     0.088
==============================================================================
Omnibus:                      841.430   Durbin-Watson:                   1.788
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2285.621
Skew:                          -0.843   Prob(JB):                         0.00
Kurtosis:                       5.702   Cond. No.                         2.62
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Credit Card Debt¶

In [145]:
ccdebtData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CCDEBT'])

highGroup = ccdebtData[ccdebtData['CCDEBT'] == 1]
lowGroup = ccdebtData[ccdebtData['CCDEBT'] == 0]

nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[3211, 2192]
In [146]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.4944018615
5.91078102945
4.71570583076
4.26263055425
Ttest_indResult(statistic=-10.072736794696159, pvalue=1.2358293212588379e-23)
In [147]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
611.800996574
607.834854015
268.271154978
292.292783467
Ttest_indResult(statistic=0.50609792379353624, pvalue=0.61281306700682825)
In [364]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.340101522843
-0.236792893876
RanksumsResult(statistic=-2.6478741705375803, pvalue=0.0080999667703708766)
In [365]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])

z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
[0.14072591082686972, 0.14822962254759237]
[0.27644172253541444, 0.78220881477569582]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', ccdebtData, hue='CCDEBT')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [366]:
# traditional moderation analyses
ccdebtData['RISK_1K'] = preprocessing.scale(ccdebtData['RISK_1K'].values)
ccdebtData['PATIENCE_MONTH'] = preprocessing.scale(ccdebtData['PATIENCE_MONTH'].values)

mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*CCDEBT', data=ccdebtData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*CCDEBT', data=ccdebtData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     38.27
Date:                Mon, 23 May 2016   Prob (F-statistic):           1.83e-24
Time:                        10:50:56   Log-Likelihood:                -7609.7
No. Observations:                5403   AIC:                         1.523e+04
Df Residuals:                    5399   BIC:                         1.525e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------
Intercept                -0.0350      0.021     -1.634      0.102        -0.077     0.007
PATIENCE_MONTH            0.1637      0.022      7.364      0.000         0.120     0.207
CCDEBT                    0.0556      0.028      2.005      0.045         0.001     0.110
PATIENCE_MONTH:CCDEBT    -0.0304      0.028     -1.080      0.280        -0.085     0.025
==============================================================================
Omnibus:                      361.622   Durbin-Watson:                   1.945
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              128.359
Skew:                          -0.043   Prob(JB):                     1.34e-28
Kurtosis:                       2.250   Cond. No.                         3.27
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.038
Model:                            OLS   Adj. R-squared:                  0.038
Method:                 Least Squares   F-statistic:                     71.53
Date:                Mon, 23 May 2016   Prob (F-statistic):           2.31e-45
Time:                        10:50:56   Log-Likelihood:                -7561.2
No. Observations:                5403   AIC:                         1.513e+04
Df Residuals:                    5399   BIC:                         1.516e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.1635      0.021      7.800      0.000         0.122     0.205
RISK_1K            0.1342      0.020      6.728      0.000         0.095     0.173
CCDEBT            -0.2751      0.027    -10.121      0.000        -0.328    -0.222
RISK_1K:CCDEBT     0.0143      0.027      0.532      0.595        -0.038     0.067
==============================================================================
Omnibus:                      848.789   Durbin-Watson:                   1.802
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2448.693
Skew:                          -0.832   Prob(JB):                         0.00
Kurtosis:                       5.848   Cond. No.                         2.90
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Maxed Credit Cards¶

In [148]:
ccData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CCMAX04'])

# Construct a binary maxed-out credit card variable
data['CCMAX'] = (data['CCMAX04'] > 0)
data['CCMAX'] = data['CCMAX'].astype(float)

highGroup = ccData[ccData['CCMAX'] == 1]
lowGroup = ccData[ccData['CCMAX'] == 0]

nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[556, 4735]
In [149]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.970145025
5.62815965621
3.9723576274
4.71353437695
Ttest_indResult(statistic=5.4510794617678338, pvalue=6.8548015146955797e-08)
In [150]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
630.138489209
608.448363252
275.456365398
278.160020617
Ttest_indResult(statistic=1.7532104964173634, pvalue=0.080007062113253813)
In [375]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.334862385321
-0.288589881593
RanksumsResult(statistic=-0.83093912199124886, pvalue=0.4060080214081957)
In [376]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])

z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
[0.23167453800631352, 0.1240405548415566]
[2.4760669145406706, 0.013283867891809908]
In [952]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', ccData, hue='CCMAX')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
Out[952]:
(0, 15)
In [377]:
# traditional moderation analyses
ccData['RISK_1K'] = preprocessing.scale(ccData['RISK_1K'].values)
ccData['PATIENCE_MONTH'] = preprocessing.scale(ccData['PATIENCE_MONTH'].values)

mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*CCMAX', data=ccData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*CCMAX', data=ccData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     35.78
Date:                Mon, 23 May 2016   Prob (F-statistic):           6.90e-23
Time:                        11:09:56   Log-Likelihood:                -7454.4
No. Observations:                5291   AIC:                         1.492e+04
Df Residuals:                    5287   BIC:                         1.494e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
========================================================================================
                           coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------
Intercept               -0.0053      0.014     -0.370      0.712        -0.034     0.023
PATIENCE_MONTH           0.1231      0.014      8.625      0.000         0.095     0.151
CCMAX                    0.0242      0.045      0.534      0.594        -0.065     0.113
PATIENCE_MONTH:CCMAX     0.1328      0.049      2.714      0.007         0.037     0.229
==============================================================================
Omnibus:                      345.459   Durbin-Watson:                   1.943
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              124.507
Skew:                          -0.051   Prob(JB):                     9.20e-28
Kurtosis:                       2.255   Cond. No.                         3.85
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     41.88
Date:                Mon, 23 May 2016   Prob (F-statistic):           9.73e-27
Time:                        11:09:56   Log-Likelihood:                -7445.5
No. Observations:                5291   AIC:                         1.490e+04
Df Residuals:                    5287   BIC:                         1.493e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------
Intercept        -0.0224      0.014     -1.556      0.120        -0.051     0.006
RISK_1K           0.1250      0.014      8.709      0.000         0.097     0.153
CCMAX             0.2068      0.044      4.656      0.000         0.120     0.294
RISK_1K:CCMAX     0.0848      0.045      1.897      0.058        -0.003     0.172
==============================================================================
Omnibus:                      800.606   Durbin-Watson:                   1.799
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2189.238
Skew:                          -0.820   Prob(JB):                         0.00
Kurtosis:                       5.691   Cond. No.                         3.42
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Race¶

In [70]:
raceData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'RACE'])

hispanicGroup = raceData[raceData['RACE'] == 1]
blackGroup = raceData[raceData['RACE'] == 2]
whiteGroup = raceData[raceData['RACE'] == 3]

nhispanicGroup = len(hispanicGroup)
nblackhGroup = len(blackGroup)
nwhiteGroup = len(whiteGroup)
print([nhispanicGroup, nblackhGroup, nwhiteGroup])
[1074, 1706, 2868]
In [72]:
print(np.mean(hispanicGroup['PATIENCE_MONTH']))
print(np.mean(blackGroup['PATIENCE_MONTH']))
print(np.mean(whiteGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(hispanicGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(blackGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(whiteGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(hispanicGroup['PATIENCE_MONTH'], blackGroup['PATIENCE_MONTH'], equal_var=False))
print(stats.ttest_ind(blackGroup['PATIENCE_MONTH'], whiteGroup['PATIENCE_MONTH'], equal_var=False))
print(stats.ttest_ind(hispanicGroup['PATIENCE_MONTH'], whiteGroup['PATIENCE_MONTH'], equal_var=False))
5.87720230192
6.18004786922
5.29531878445
4.22154607145
3.89176468233
4.68131639033
Ttest_indResult(statistic=-5.5144357457113964, pvalue=3.9130664363278932e-08)
Ttest_indResult(statistic=20.222436232523687, pvalue=1.3267327361193897e-86)
Ttest_indResult(statistic=11.067701469884245, pvalue=1.0749618457495447e-27)
In [73]:
print(np.mean(hispanicGroup['RISK_1K']))
print(np.mean(blackGroup['RISK_1K']))
print(np.mean(whiteGroup['RISK_1K']))

print(np.std(hispanicGroup['RISK_1K']))
print(np.std(blackGroup['RISK_1K']))
print(np.std(whiteGroup['RISK_1K']))

print(stats.ttest_ind(hispanicGroup['RISK_1K'], blackGroup['RISK_1K'], equal_var=False))
print(stats.ttest_ind(blackGroup['RISK_1K'], whiteGroup['RISK_1K'], equal_var=False))
print(stats.ttest_ind(hispanicGroup['RISK_1K'], whiteGroup['RISK_1K'], equal_var=False))
615.889199255
638.373974209
592.206066946
285.228327981
284.817239898
271.31290073
Ttest_indResult(statistic=-2.0241073719255724, pvalue=0.043075618849470282)
Ttest_indResult(statistic=5.3941914556813648, pvalue=7.3473486543098893e-08)
Ttest_indResult(statistic=2.3508006065808904, pvalue=0.018837895168701307)
In [34]:
print(np.mean(hispanicGroup['RISK_INCOME']))
print(np.mean(blackGroup['RISK_INCOME']))
print(np.mean(whiteGroup['RISK_INCOME']))
print(stats.ranksums(hispanicGroup['RISK_INCOME'], blackGroup['RISK_INCOME']))
print(stats.ranksums(blackGroup['RISK_INCOME'], whiteGroup['RISK_INCOME']))
print(stats.ranksums(hispanicGroup['RISK_INCOME'], whiteGroup['RISK_INCOME']))
-0.27969348659
-0.253317249698
-0.326302729529
RanksumsResult(statistic=-0.38905442398232343, pvalue=0.69723588702768724)
RanksumsResult(statistic=2.1131125256407075, pvalue=0.03459113785072912)
RanksumsResult(statistic=1.3548084754791943, pvalue=0.17547858557658291)
In [36]:
hispanicCorr = hispanicGroup['PATIENCE_MONTH'].corr(hispanicGroup['RISK_1K'])
blackCorr = blackGroup['PATIENCE_MONTH'].corr(blackGroup['RISK_1K'])
whiteCorr = whiteGroup['PATIENCE_MONTH'].corr(whiteGroup['RISK_1K'])
print([hispanicCorr, blackCorr, whiteCorr])

z,p = corrstats.independent_corr(hispanicCorr, blackCorr, nhispanicGroup, nblackhGroup)
print([z, p])
z,p = corrstats.independent_corr(blackCorr, whiteCorr, nblackhGroup, nwhiteGroup)
print([z, p])
z,p = corrstats.independent_corr(hispanicCorr, whiteCorr, nhispanicGroup, nwhiteGroup)
print([z, p])
[0.15605868367929054, 0.16154534511130955, 0.091234446342760248]
[0.14432805555088982, 0.88524142510941251]
[2.3362353570778733, 0.01947897849847724]
[1.8387471783877549, 0.065952380714138004]
In [959]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', raceData, hue='RACE')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
Out[959]:
(0, 15)
In [960]:
# traditional moderation analyses
raceData['RISK_1K'] = preprocessing.scale(raceData['RISK_1K'].values)
raceData['PATIENCE_MONTH'] = preprocessing.scale(raceData['PATIENCE_MONTH'].values)
# make non-black/non-hispanic category the pivot
raceData['RACE'] = raceData['RACE'] * -1
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*C(RACE)', data=raceData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*C(RACE)', data=raceData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     25.79
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           7.97e-26
Time:                        10:35:43   Log-Likelihood:                -7950.4
No. Observations:                5648   AIC:                         1.591e+04
Df Residuals:                    5642   BIC:                         1.595e+04
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------------
Intercept                       -0.0444      0.019     -2.336      0.020        -0.082    -0.007
C(RACE)[T.-2]                    0.0823      0.032      2.583      0.010         0.020     0.145
C(RACE)[T.-1]                    0.0406      0.036      1.130      0.259        -0.030     0.111
PATIENCE_MONTH                   0.0876      0.018      4.805      0.000         0.052     0.123
PATIENCE_MONTH:C(RACE)[T.-2]     0.0974      0.032      3.000      0.003         0.034     0.161
PATIENCE_MONTH:C(RACE)[T.-1]     0.0812      0.037      2.209      0.027         0.009     0.153
==============================================================================
Omnibus:                      362.963   Durbin-Watson:                   1.937
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              132.189
Skew:                          -0.057   Prob(JB):                     1.98e-29
Kurtosis:                       2.259   Cond. No.                         3.57
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.083
Model:                            OLS   Adj. R-squared:                  0.082
Method:                 Least Squares   F-statistic:                     102.0
Date:                Thu, 18 Jun 2015   Prob (F-statistic):          2.86e-103
Time:                        10:35:43   Log-Likelihood:                -7769.9
No. Observations:                5648   AIC:                         1.555e+04
Df Residuals:                    5642   BIC:                         1.559e+04
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------
Intercept                -0.2418      0.018    -13.483      0.000        -0.277    -0.207
C(RACE)[T.-2]             0.5605      0.029     19.060      0.000         0.503     0.618
C(RACE)[T.-1]             0.3730      0.034     10.874      0.000         0.306     0.440
RISK_1K                   0.0950      0.018      5.167      0.000         0.059     0.131
RISK_1K:C(RACE)[T.-2]     0.0461      0.029      1.577      0.115        -0.011     0.103
RISK_1K:C(RACE)[T.-1]     0.0492      0.034      1.449      0.148        -0.017     0.116
==============================================================================
Omnibus:                      918.022   Durbin-Watson:                   1.909
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2856.263
Skew:                          -0.836   Prob(JB):                         0.00
Kurtosis:                       6.056   Cond. No.                         3.39
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Drug Use¶

In [75]:
drugData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'DRUG'])
highGroup = drugData[drugData['DRUG'] == 1]
lowGroup = drugData[drugData['DRUG'] == 0]

nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[1999, 3649]
In [76]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.62426878029
5.70001039166
4.40004521409
4.68708824728
Ttest_indResult(statistic=-1.8090888501567992, pvalue=0.070507687506922434)
In [78]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
604.544772386
614.001918334
275.184429259
280.79419186
Ttest_indResult(statistic=-1.2258825688046784, pvalue=0.22031190081095478)
In [77]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))

print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.24008138352
-0.326258082654
275.184429259
280.79419186
RanksumsResult(statistic=1.7514361082611691, pvalue=0.079870817804464173)
In [59]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])

z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
[0.13818042854693069, 0.13687970047138864]
[0.047615877527640342, 0.96202237794545797]
In [966]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', drugData, hue='DRUG')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
Out[966]:
(0, 15)
In [967]:
# traditional moderation analyses
drugData['RISK_1K'] = preprocessing.scale(drugData['RISK_1K'].values)
drugData['PATIENCE_MONTH'] = preprocessing.scale(drugData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*DRUG', data=drugData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*DRUG', data=drugData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     36.67
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           1.84e-23
Time:                        10:35:46   Log-Likelihood:                -7959.7
No. Observations:                5648   AIC:                         1.593e+04
Df Residuals:                    5644   BIC:                         1.595e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=======================================================================================
                          coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------------
Intercept               0.0096      0.016      0.586      0.558        -0.023     0.042
PATIENCE_MONTH          0.1359      0.016      8.404      0.000         0.104     0.168
DRUG                   -0.0270      0.028     -0.980      0.327        -0.081     0.027
PATIENCE_MONTH:DRUG     0.0043      0.028      0.153      0.878        -0.050     0.059
==============================================================================
Omnibus:                      382.384   Durbin-Watson:                   1.937
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              135.902
Skew:                          -0.055   Prob(JB):                     3.08e-30
Kurtosis:                       2.248   Cond. No.                         2.44
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     37.24
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           7.99e-24
Time:                        10:35:46   Log-Likelihood:                -7958.8
No. Observations:                5648   AIC:                         1.593e+04
Df Residuals:                    5644   BIC:                         1.595e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
================================================================================
                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------
Intercept        0.0159      0.016      0.972      0.331        -0.016     0.048
RISK_1K          0.1379      0.016      8.466      0.000         0.106     0.170
DRUG            -0.0451      0.028     -1.635      0.102        -0.099     0.009
RISK_1K:DRUG    -0.0017      0.028     -0.060      0.952        -0.056     0.053
==============================================================================
Omnibus:                      886.237   Durbin-Watson:                   1.797
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2440.734
Skew:                          -0.846   Prob(JB):                         0.00
Kurtosis:                       5.740   Cond. No.                         2.43
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Alcohol¶

In [79]:
alcData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'ALCOHOL'])

highGroup = alcData[alcData['ALCOHOL'] == 1]
lowGroup = alcData[alcData['ALCOHOL'] == 0]

nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[2968, 2620]
In [80]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.53465788634
5.81794267332
4.7224391023
4.40070436498
Ttest_indResult(statistic=-6.9728964441447845, pvalue=3.4663668268252864e-12)
In [81]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
611.549528302
608.95
275.073626948
282.955508904
Ttest_indResult(statistic=0.34715120014697853, pvalue=0.72849111412071155)
In [63]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.237095664143
-0.360492379836
RanksumsResult(statistic=3.4771718698275591, pvalue=0.0005067328784822678)
In [65]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])

z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
[0.12251559136411032, 0.15613787117408387]
[1.2785122203966457, 0.20106887869452894]
In [973]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', alcData, hue='ALCOHOL')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
Out[973]:
(0, 15)
In [974]:
# traditional moderation anaylses
alcData['RISK_1K'] = preprocessing.scale(alcData['RISK_1K'].values)
alcData['PATIENCE_MONTH'] = preprocessing.scale(alcData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*ALCOHOL', data=alcData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*ALCOHOL', data=alcData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     37.13
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           9.49e-24
Time:                        10:35:48   Log-Likelihood:                -7873.8
No. Observations:                5588   AIC:                         1.576e+04
Df Residuals:                    5584   BIC:                         1.578e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==========================================================================================
                             coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
Intercept                 -0.0210      0.019     -1.082      0.279        -0.059     0.017
PATIENCE_MONTH             0.1632      0.020      8.189      0.000         0.124     0.202
ALCOHOL                    0.0358      0.027      1.341      0.180        -0.017     0.088
PATIENCE_MONTH:ALCOHOL    -0.0444      0.027     -1.657      0.098        -0.097     0.008
==============================================================================
Omnibus:                      373.264   Durbin-Watson:                   1.935
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              133.336
Skew:                          -0.053   Prob(JB):                     1.11e-29
Kurtosis:                       2.251   Cond. No.                         2.89
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.028
Model:                            OLS   Adj. R-squared:                  0.027
Method:                 Least Squares   F-statistic:                     52.83
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           1.15e-33
Time:                        10:35:48   Log-Likelihood:                -7850.8
No. Observations:                5588   AIC:                         1.571e+04
Df Residuals:                    5584   BIC:                         1.574e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===================================================================================
                      coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------
Intercept           0.0993      0.019      5.154      0.000         0.062     0.137
RISK_1K             0.1494      0.019      7.866      0.000         0.112     0.187
ALCOHOL            -0.1869      0.026     -7.068      0.000        -0.239    -0.135
RISK_1K:ALCOHOL    -0.0231      0.026     -0.873      0.382        -0.075     0.029
==============================================================================
Omnibus:                      873.780   Durbin-Watson:                   1.825
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2436.505
Skew:                          -0.839   Prob(JB):                         0.00
Kurtosis:                       5.765   Cond. No.                         2.70
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Binge Drinking¶

In [82]:
bingData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'BINGE'])
In [84]:
# Discretize this variable
bingData['BINGE'] = (bingData['BINGE'] > 0)
bingData['BINGE'] = bingData['BINGE'].astype(float)
In [85]:
highGroup = bingData[bingData['BINGE'] == 1]
lowGroup = bingData[bingData['BINGE'] == 0]

nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[785, 2181]
In [86]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.61853176553
5.50540538831
4.52542753533
4.79143082989
Ttest_indResult(statistic=1.781284265176011, pvalue=0.075077960626970155)
In [87]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
610.401273885
612.088033012
279.946274643
273.43369214
Ttest_indResult(statistic=-0.14557374703559076, pvalue=0.88427951647974057)
In [198]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.143328929987
-0.271774569166
RanksumsResult(statistic=2.8050887171458534, pvalue=0.0050302732190054267)
In [202]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])

z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
[0.1096575966669139, 0.12728126290818548]
[0.42877876851941787, 0.66808423125947947]
In [200]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', bingData, hue='BINGE')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
Out[200]:
(0, 15)
In [203]:
# traditional moderation analyses
bingData['RISK_1K'] = preprocessing.scale(bingData['RISK_1K'].values)
bingData['PATIENCE_MONTH'] = preprocessing.scale(bingData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*BINGE', data=bingData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*BINGE', data=bingData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.015
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     15.10
Date:                Thu, 19 May 2016   Prob (F-statistic):           9.41e-10
Time:                        17:48:20   Log-Likelihood:                -4186.1
No. Observations:                2966   AIC:                             8380.
Df Residuals:                    2962   BIC:                             8404.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
========================================================================================
                           coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------
Intercept                0.0040      0.021      0.190      0.849        -0.038     0.046
PATIENCE_MONTH           0.1253      0.021      5.948      0.000         0.084     0.167
BINGE                   -0.0147      0.041     -0.355      0.723        -0.096     0.066
PATIENCE_MONTH:BINGE    -0.0106      0.042     -0.252      0.801        -0.093     0.072
==============================================================================
Omnibus:                      189.400   Durbin-Watson:                   1.955
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               68.603
Skew:                          -0.033   Prob(JB):                     1.27e-15
Kurtosis:                       2.258   Cond. No.                         2.52
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.016
Model:                            OLS   Adj. R-squared:                  0.015
Method:                 Least Squares   F-statistic:                     16.23
Date:                Thu, 19 May 2016   Prob (F-statistic):           1.84e-10
Time:                        17:48:20   Log-Likelihood:                -4184.4
No. Observations:                2966   AIC:                             8377.
Df Residuals:                    2962   BIC:                             8401.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------
Intercept        -0.0195      0.021     -0.917      0.359        -0.061     0.022
RISK_1K           0.1293      0.021      6.043      0.000         0.087     0.171
BINGE             0.0735      0.041      1.780      0.075        -0.007     0.155
RISK_1K:BINGE    -0.0245      0.041     -0.598      0.550        -0.105     0.056
==============================================================================
Omnibus:                      426.309   Durbin-Watson:                   1.827
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1192.219
Skew:                          -0.768   Prob(JB):                    1.30e-259
Kurtosis:                       5.700   Cond. No.                         2.47
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Smoking (Ever)¶

Whether subjects report having smoked > 100 cigarettes in their lifetimes

In [88]:
eversmokData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'SMOKE_100_CIGS'])
highGroup = eversmokData[eversmokData['SMOKE_100_CIGS'] == 1]
lowGroup = eversmokData[eversmokData['SMOKE_100_CIGS'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[2488, 2763]
In [89]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.72191101849
5.63105338917
4.46434304943
4.60702427907
Ttest_indResult(statistic=2.1750881636271329, pvalue=0.029668315639256884)
In [90]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
617.564308682
600.851610568
282.946837881
273.100928157
Ttest_indResult(statistic=2.1722281921834301, pvalue=0.029883944401607389)
In [207]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.274390243902
-0.318681318681
RanksumsResult(statistic=0.89363333611734486, pvalue=0.37151810402064767)
In [211]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', eversmokData, hue='SMOKE_100_CIGS')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
[0.13993570181264001, 0.1446474077109931]
[0.17390313096494978, 0.86194159197023335]
Out[211]:
(0, 15)
In [212]:
# traditional moderation analyses
eversmokData['RISK_1K'] = preprocessing.scale(eversmokData['RISK_1K'].values)
eversmokData['PATIENCE_MONTH'] = preprocessing.scale(eversmokData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*SMOKE_100_CIGS', data=eversmokData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*SMOKE_100_CIGS', data=eversmokData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     37.79
Date:                Fri, 20 May 2016   Prob (F-statistic):           3.76e-24
Time:                        12:06:50   Log-Likelihood:                -7394.7
No. Observations:                5251   AIC:                         1.480e+04
Df Residuals:                    5247   BIC:                         1.482e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=================================================================================================
                                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------------
Intercept                        -0.0245      0.019     -1.300      0.194        -0.061     0.012
PATIENCE_MONTH                    0.1408      0.019      7.548      0.000         0.104     0.177
SMOKE_100_CIGS                    0.0516      0.027      1.885      0.060        -0.002     0.105
PATIENCE_MONTH:SMOKE_100_CIGS     0.0033      0.027      0.120      0.904        -0.050     0.057
==============================================================================
Omnibus:                      327.241   Durbin-Watson:                   1.942
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              120.462
Skew:                          -0.052   Prob(JB):                     6.95e-27
Kurtosis:                       2.265   Cond. No.                         2.58
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     37.85
Date:                Fri, 20 May 2016   Prob (F-statistic):           3.43e-24
Time:                        12:06:50   Log-Likelihood:                -7394.6
No. Observations:                5251   AIC:                         1.480e+04
Df Residuals:                    5247   BIC:                         1.482e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==========================================================================================
                             coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
Intercept                 -0.0242      0.019     -1.285      0.199        -0.061     0.013
RISK_1K                    0.1486      0.019      7.754      0.000         0.111     0.186
SMOKE_100_CIGS             0.0515      0.027      1.882      0.060        -0.002     0.105
RISK_1K:SMOKE_100_CIGS    -0.0127      0.027     -0.465      0.642        -0.066     0.041
==============================================================================
Omnibus:                      801.344   Durbin-Watson:                   1.776
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2218.118
Skew:                          -0.822   Prob(JB):                         0.00
Kurtosis:                       5.727   Cond. No.                         2.62
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Smoking (Age)¶

  • Age at which subjects began smokign daily
  • Note that this is among subjects reporting that they have smoked > 100 cigarettes in their lifetimes
In [221]:
smokageData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'AGE_START_SMOKE'])
len(smokageData)
Out[221]:
2415
In [219]:
nCat = 5

bins = []
for i in range(nCat):
    i = float(i)
    bins.append(smokageData['AGE_START_SMOKE'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)

binned = np.digitize(smokageData['AGE_START_SMOKE'], bins)
smokageData['bin'] = binned
for i in range(nCat):
    print(smokageData[smokageData['bin'] == i]['PATIENCE_MONTH'].corr(smokageData[smokageData['bin'] == i]['RISK_1K']))

g = sns.PairGrid(smokageData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
0.100186751841
0.155996190442
0.147628086716
0.167129479771
0.137998810759
Out[219]:
<seaborn.axisgrid.PairGrid at 0xc079940>
In [225]:
delaySAcorr = smokageData['PATIENCE_MONTH'].corr(smokageData['AGE_START_SMOKE'])
riskSAcorr = smokageData['RISK_1K'].corr(smokageData['AGE_START_SMOKE'])
delayriskcorr = smokageData['RISK_1K'].corr(smokageData['PATIENCE_MONTH'])

print([delaySAcorr, riskSAcorr])
z,p = corrstats.dependent_corr(delaySAcorr, riskSAcorr, delayriskcorr, len(smokageData))
print([z, p])
[0.011180368581907964, 0.035900866978200878]
[-0.92437624213773839, 0.35538288768347881]
In [218]:
# traditional moderation analyses
smokageData['RISK_1K'] = preprocessing.scale(smokageData['RISK_1K'].values)
smokageData['PATIENCE_MONTH'] = preprocessing.scale(smokageData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*AGE_START_SMOKE', data=smokageData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*AGE_START_SMOKE', data=smokageData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     16.52
Date:                Fri, 20 May 2016   Prob (F-statistic):           1.27e-10
Time:                        12:09:10   Log-Likelihood:                -3402.2
No. Observations:                2415   AIC:                             6812.
Df Residuals:                    2411   BIC:                             6835.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==================================================================================================
                                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------------
Intercept                         -0.1205      0.073     -1.652      0.099        -0.264     0.023
PATIENCE_MONTH                     0.0755      0.071      1.070      0.285        -0.063     0.214
AGE_START_SMOKE                    0.0075      0.004      1.716      0.086        -0.001     0.016
PATIENCE_MONTH:AGE_START_SMOKE     0.0038      0.004      0.895      0.371        -0.005     0.012
==============================================================================
Omnibus:                      215.009   Durbin-Watson:                   1.959
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               69.222
Skew:                          -0.087   Prob(JB):                     9.30e-16
Kurtosis:                       2.189   Cond. No.                         61.0
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     15.39
Date:                Fri, 20 May 2016   Prob (F-statistic):           6.47e-10
Time:                        12:09:10   Log-Likelihood:                -3403.8
No. Observations:                2415   AIC:                             6816.
Df Residuals:                    2411   BIC:                             6839.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===========================================================================================
                              coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------
Intercept                  -0.0185      0.073     -0.252      0.801        -0.162     0.125
RISK_1K                     0.1001      0.071      1.403      0.161        -0.040     0.240
AGE_START_SMOKE             0.0011      0.004      0.257      0.797        -0.007     0.010
RISK_1K:AGE_START_SMOKE     0.0023      0.004      0.528      0.597        -0.006     0.011
==============================================================================
Omnibus:                      390.945   Durbin-Watson:                   1.868
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1230.611
Skew:                          -0.818   Prob(JB):                    5.97e-268
Kurtosis:                       6.091   Cond. No.                         63.4
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Smoking (Current)¶

Note that this is among subjects reporting that they have smoked > 100 cigarettes in their lifetimes

In [91]:
smokData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CIGARETTE'])
In [92]:
upperThresh = 2.1
lowerThresh = 1.1

#bins = np.array([lowerThresh, upperThresh])
#smokData['bin'] = np.digitize(smokData['CIGARETTE'], bins)
#
#lowGroup = smokData[smokData['bin'] == 2]
#midGroup = smokData[smokData['bin'] == 1]
#highGroup = smokData[smokData['bin'] == 0]

lowGroup = smokData[smokData['CIGARETTE'] == 3]
midGroup = smokData[smokData['CIGARETTE'] == 2]
highGroup = smokData[smokData['CIGARETTE'] == 1]

nHighGroup = len(highGroup)
nMidGroup = len(midGroup)
nLowGroup = len(lowGroup)
print([nHighGroup, nMidGroup, nLowGroup])
[1109, 313, 1472]
In [93]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(midGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(midGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], midGroup['PATIENCE_MONTH'], equal_var=False))
print(stats.ttest_ind(midGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.83943836841
5.81634398321
5.61065705465
4.39116499615
5.1385137354
4.49469457284
Ttest_indResult(statistic=0.22471205676173053, pvalue=0.8223019583526826)
Ttest_indResult(statistic=2.044434743085247, pvalue=0.041518213102178451)
Ttest_indResult(statistic=3.8608827634706033, pvalue=0.00011595680352897648)
In [94]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(midGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(midGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], midGroup['RISK_1K'], equal_var=False))
print(stats.ttest_ind(midGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
608.279531109
641.539936102
611.138586957
276.485047291
302.813525379
287.680991858
Ttest_indResult(statistic=-1.7459823694200121, pvalue=0.081469609044106786)
Ttest_indResult(statistic=1.6246518909887269, pvalue=0.10495369002191492)
Ttest_indResult(statistic=-0.25546226016818907, pvalue=0.79838768112860803)
In [991]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(midGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))

print(stats.ranksums(highGroup['RISK_1K'], midGroup['RISK_1K']))
print(stats.ranksums(midGroup['RISK_1K'], lowGroup['RISK_1K']))
print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
-0.2000691085
-0.224025974026
-0.351817334576
(-1.6012802459323581, 0.10931486226730171)
(1.8594764172505225, 0.062959638939650717)
(0.42416286092698258, 0.67144704401426059)
In [236]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
print(highCorr)
midCorr = midGroup['PATIENCE_MONTH'].corr(midGroup['RISK_1K'])
print(midCorr)
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print(lowCorr)
print('')
z,p = corrstats.independent_corr(highCorr, midCorr, nHighGroup, nMidGroup)
print([z, p])
z,p = corrstats.independent_corr(midCorr, lowCorr, nMidGroup, nLowGroup)
print([z, p])
z,p = corrstats.independent_corr(highCorr, lowCorr, nHighGroup, nLowGroup)
print([z, p])
0.122265136651
0.135890858993
0.148882182074

[0.21562067845049152, 0.82928343586264242]
[0.21215791391991889, 0.83198383913611673]
[0.68115267648774058, 0.49577488863095187]
In [993]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', smokData, hue='bin')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
Out[993]:
(0, 15)
In [994]:
# traditional moderation analyses
smokData['RISK_1K'] = preprocessing.scale(smokData['RISK_1K'].values)
smokData['PATIENCE_MONTH'] = preprocessing.scale(smokData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*C(CIGARETTE)', data=smokData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*C(CIGARETTE)', data=smokData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     11.97
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           1.73e-11
Time:                        10:35:56   Log-Likelihood:                -4076.7
No. Observations:                2894   AIC:                             8165.
Df Residuals:                    2888   BIC:                             8201.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
======================================================================================================
                                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------------------
Intercept                             -0.0272      0.030     -0.912      0.362        -0.086     0.031
C(CIGARETTE)[T.2.0]                    0.1177      0.064      1.851      0.064        -0.007     0.242
C(CIGARETTE)[T.3.0]                    0.0305      0.040      0.772      0.440        -0.047     0.108
PATIENCE_MONTH                         0.1212      0.030      3.982      0.000         0.062     0.181
PATIENCE_MONTH:C(CIGARETTE)[T.2.0]     0.0122      0.060      0.203      0.839        -0.106     0.130
PATIENCE_MONTH:C(CIGARETTE)[T.3.0]     0.0300      0.040      0.749      0.454        -0.049     0.108
==============================================================================
Omnibus:                      249.789   Durbin-Watson:                   1.925
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               82.569
Skew:                          -0.101   Prob(JB):                     1.18e-18
Kurtosis:                       2.198   Cond. No.                         4.43
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.024
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     14.45
Date:                Thu, 18 Jun 2015   Prob (F-statistic):           5.19e-14
Time:                        10:35:56   Log-Likelihood:                -4070.6
No. Observations:                2894   AIC:                             8153.
Df Residuals:                    2888   BIC:                             8189.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
===============================================================================================
                                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------------
Intercept                       0.0807      0.030      2.719      0.007         0.023     0.139
C(CIGARETTE)[T.2.0]            -0.0311      0.064     -0.490      0.624        -0.156     0.093
C(CIGARETTE)[T.3.0]            -0.1522      0.039     -3.872      0.000        -0.229    -0.075
RISK_1K                         0.1234      0.031      4.026      0.000         0.063     0.183
RISK_1K:C(CIGARETTE)[T.2.0]     0.0151      0.061      0.248      0.804        -0.104     0.135
RISK_1K:C(CIGARETTE)[T.3.0]     0.0233      0.040      0.584      0.560        -0.055     0.102
==============================================================================
Omnibus:                      497.102   Durbin-Watson:                   1.860
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1644.027
Skew:                          -0.854   Prob(JB):                         0.00
Kurtosis:                       6.273   Cond. No.                         4.35
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Employment¶

In [151]:
employmentData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'EMPLOYED'])
highGroup = employmentData[employmentData['EMPLOYED'] == 1]
lowGroup = employmentData[employmentData['EMPLOYED'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
[4532, 245]
In [152]:
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))

print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))

print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
5.61095015272
6.01111008681
4.55478707866
4.50390917455
Ttest_indResult(statistic=-4.0443787702919503, pvalue=6.842628133705665e-05)
In [153]:
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))

print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))

print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
609.753089144
592.640816327
276.966121979
272.811038677
Ttest_indResult(statistic=0.95369763151418785, pvalue=0.34108412427766766)
In [381]:
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
-0.311809781384
-0.0654008438819
RanksumsResult(statistic=-2.8260003090487591, pvalue=0.0047133218573719863)
In [382]:
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
[0.12871516424798418, 0.20503703253779282]
[1.1905857718655926, 0.23381623992610856]
In [ ]:
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', employmentData, hue='EMPLOYED')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
In [383]:
# traditional moderation analyses
employmentData['RISK_1K'] = preprocessing.scale(employmentData['RISK_1K'].values)
employmentData['PATIENCE_MONTH'] = preprocessing.scale(employmentData['PATIENCE_MONTH'].values)
mod  = smf.ols(formula='RISK_1K~PATIENCE_MONTH*EMPLOYED', data=employmentData).fit(disp=False)
print mod.summary()
mod  = smf.ols(formula='PATIENCE_MONTH~RISK_1K*EMPLOYED', data=employmentData).fit(disp=False)
print mod.summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                RISK_1K   R-squared:                       0.018
Model:                            OLS   Adj. R-squared:                  0.017
Method:                 Least Squares   F-statistic:                     29.20
Date:                Mon, 23 May 2016   Prob (F-statistic):           1.07e-18
Time:                        11:17:57   Log-Likelihood:                -6734.8
No. Observations:                4777   AIC:                         1.348e+04
Df Residuals:                    4773   BIC:                         1.350e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===========================================================================================
                              coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------
Intercept                  -0.1096      0.065     -1.678      0.093        -0.238     0.018
PATIENCE_MONTH              0.2039      0.064      3.191      0.001         0.079     0.329
EMPLOYED                    0.1146      0.067      1.711      0.087        -0.017     0.246
PATIENCE_MONTH:EMPLOYED    -0.0749      0.066     -1.142      0.253        -0.203     0.054
==============================================================================
Omnibus:                      287.687   Durbin-Watson:                   1.922
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              106.960
Skew:                          -0.042   Prob(JB):                     5.94e-24
Kurtosis:                       2.272   Cond. No.                         9.96
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         PATIENCE_MONTH   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     34.42
Date:                Mon, 23 May 2016   Prob (F-statistic):           5.31e-22
Time:                        11:17:57   Log-Likelihood:                -6727.1
No. Observations:                4777   AIC:                         1.346e+04
Df Residuals:                    4773   BIC:                         1.349e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
====================================================================================
                       coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------
Intercept            0.2622      0.063      4.138      0.000         0.138     0.386
RISK_1K              0.2062      0.064      3.214      0.001         0.080     0.332
EMPLOYED            -0.2761      0.065     -4.245      0.000        -0.404    -0.149
RISK_1K:EMPLOYED    -0.0777      0.066     -1.181      0.238        -0.207     0.051
==============================================================================
Omnibus:                      749.933   Durbin-Watson:                   1.810
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1811.047
Skew:                          -0.887   Prob(JB):                         0.00
Kurtosis:                       5.439   Cond. No.                         9.06
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Effect Sizes¶

In [431]:
from effect_sizes import cohen_d
from scipy.stats import pearsonr

Binary Moderators¶

In [429]:
# Gender
label = 'SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 2]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 2]['RISK_1K']))
-0.110550440599
-0.0374379723608
In [54]:
# Retirement savings
label = 'RETIREMENT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.400469223985
-0.0141503687517
In [55]:
# Theft (> $50)
label = 'STEAL_LARGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
0.0860402894629
-0.0913948859938
In [56]:
# Theft (< $50)
label = 'STEAL_SMALL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.0772662313811
0.0413157630998
In [57]:
# Worry/Anxiety/Depression
label = 'WORRY_ANX_DEPRESS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
0.0549349828149
0.0436650019074
In [58]:
# Weight loss
label = 'TRYING_LOSE_WEIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 1]['PATIENCE_MONTH'],d[d[label] == 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 1]['RISK_1K'],d[d[label] == 1]['RISK_1K']))
0.0391917549272
0.0183453770924
In [59]:
# Fighting
label = 'FIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 0]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 0]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
0.105368227
0.0381542467936
In [60]:
# Health Insurance
label = 'HEALTH_INSUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.319465707681
-0.0302730833051
In [61]:
# Entrepreneurs
label = 'ENTREPRENEUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
0.00840211961024
0.078440266379
In [62]:
# Exercise
label = 'EXERCISE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 0]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 0]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.0180375565407
0.00242018826361
In [63]:
# Credit Card Missed Payment
label = 'CCMISSPAY'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
0.193057623641
0.0764416987874
In [64]:
# Credit Cards Maxed
label = 'CCMAX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
0.225638003944
0.0778939462782
In [65]:
# Credit Card Debt
label = 'CCDEBT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.277303836307
0.0141348128089
In [66]:
# Drug use
label = 'DRUG'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.0500330177149
-0.0340115400669
In [67]:
# Alcohol
label = 'ALCOHOL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.186650674436
0.00931421333326
In [68]:
# Binge drinking
label = 'BINGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 0]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 0]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
0.0734973019496
-0.00609311343154
In [69]:
# Smoking (ever)
label = 'SMOKE_100_CIGS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
0.0600821858009
0.0600915334594
In [70]:
# Smoking (current)
label = 'CIGARETTE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 1]['PATIENCE_MONTH'],d[d[label] == 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 1]['RISK_1K'],d[d[label] == 1]['RISK_1K']))
-0.128035733335
0.0288638344336
In [71]:
# Emergency fund
label = 'EMERGENCY_FUND'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 0]['PATIENCE_MONTH'],d[d[label] == 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 0]['RISK_1K'],d[d[label] == 1]['RISK_1K']))
0.253140076912
-0.0554692341623
In [72]:
# Financial lit, #4
label = 'FIT_LIT_4'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 0]['PATIENCE_MONTH'],d[d[label] == 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 0]['RISK_1K'],d[d[label] == 1]['RISK_1K']))
-0.252307943391
-0.00648158786332
In [73]:
# Financial lit, #5
label = 'FIT_LIT_5'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] > 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] > 1]['RISK_1K']))
-0.150001664836
-0.0247276112581
In [74]:
# Financial lit, #6
label = 'FIT_LIT_6'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 3]['PATIENCE_MONTH'],d[d[label] < 3]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 3]['RISK_1K'],d[d[label] < 3]['RISK_1K']))
-0.261160148001
-0.00910803033954
In [75]:
# Financial lit, #7
label = 'FIT_LIT_7'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 2]['PATIENCE_MONTH'],d[(d[label] < 2) | (d[label] > 2)]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 2]['RISK_1K'],d[(d[label] < 2) | (d[label] > 2)]['RISK_1K']))
-0.164332585672
-0.024859250724
In [76]:
# Financial lit, #8
label = 'FIT_LIT_8'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.356432199746
-0.078834263247
In [77]:
# Employment
label = 'EMPLOYED'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
-0.264622437547
0.062183693383

Continuous Moderators¶

In [441]:
# IQ
label = 'IQ'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
-0.312883076283
-0.0609658271817
In [442]:
# Income
label = 'INCOME'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
-0.141088288068
0.0131426659311
In [443]:
# Doctor Visit
label = 'DOC_VISIT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
-0.0414812109439
-0.0213020268626
In [444]:
# BMI
label = 'BMI'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr * corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr * corr)
0.00704744688928
7.09151905358e-05
In [445]:
# Age at which R started smoking
label = 'AGE_START_SMOKE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
0.0111803685819
0.0359008669782
In [446]:
# Age of sexual debut
label = 'AGE_FIRST_SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
-0.179499854553
-0.0591763680657

Partial Correlations¶

In [22]:
from partial_corr import partial_corr
In [24]:
# Gender
label = 'SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K       SEX
PATIENCE_MONTH        1.000000  0.137643  0.055184
RISK_1K               0.137643  1.000000  0.018715
SEX                   0.055184  0.018715  1.000000

[[ 1.          0.13684307  0.05312316]
 [ 0.13684307  1.          0.01124301]
 [ 0.05312316  0.01124301  1.        ]]
In [25]:
# Retirement savings
label = 'RETIREMENT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  RETIREMENT
PATIENCE_MONTH        1.000000  0.131196   -0.171473
RISK_1K               0.131196  1.000000   -0.006048
RETIREMENT           -0.171473 -0.006048    1.000000

[[ 1.          0.1321185  -0.17217128]
 [ 0.1321185   1.          0.01684185]
 [-0.17217128  0.01684185  1.        ]]
In [26]:
label = 'STEAL_LARGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  STEAL_LARGE
PATIENCE_MONTH        1.000000  0.142462     0.033815
RISK_1K               0.142462  1.000000     0.022838
STEAL_LARGE           0.033815  0.022838     1.000000

[[ 1.          0.14180791  0.03088486]
 [ 0.14180791  1.          0.01821659]
 [ 0.03088486  0.01821659  1.        ]]
In [27]:
label = 'STEAL_SMALL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  STEAL_SMALL
PATIENCE_MONTH        1.000000  0.141401    -0.061069
RISK_1K               0.141401  1.000000     0.016319
STEAL_SMALL          -0.061069  0.016319     1.000000

[[ 1.          0.14268264 -0.0640284 ]
 [ 0.14268264  1.          0.02525498]
 [-0.0640284   0.02525498  1.        ]]
In [28]:
label = 'WORRY_ANX_DEPRESS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                   PATIENCE_MONTH   RISK_1K  WORRY_ANX_DEPRESS
PATIENCE_MONTH           1.000000  0.137870           0.018542
RISK_1K                  0.137870  1.000000           0.014651
WORRY_ANX_DEPRESS        0.018542  0.014651           1.000000

[[ 1.          0.13763636  0.01668332]
 [ 0.13763636  1.          0.01221358]
 [ 0.01668332  0.01221358  1.        ]]
In [29]:
label = 'TRYING_LOSE_WEIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                    PATIENCE_MONTH   RISK_1K  TRYING_LOSE_WEIGHT
PATIENCE_MONTH            1.000000  0.137643           -0.002362
RISK_1K                   0.137643  1.000000            0.004255
TRYING_LOSE_WEIGHT       -0.002362  0.004255            1.000000

[[ 1.          0.13765506 -0.00297621]
 [ 0.13765506  1.          0.00462446]
 [-0.00297621  0.00462446  1.        ]]
In [30]:
label = 'FIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K     FIGHT
PATIENCE_MONTH        1.000000  0.140293  0.020994
RISK_1K               0.140293  1.000000  0.016197
FIGHT                 0.020994  0.016197  1.000000

[[ 1.          0.14000234  0.01891141]
 [ 0.14000234  1.          0.01338654]
 [ 0.01891141  0.01338654  1.        ]]
In [31]:
label = 'HEALTH_INSUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  HEALTH_INSUR
PATIENCE_MONTH        1.000000  0.137386     -0.118219
RISK_1K               0.137386  1.000000     -0.011827
HEALTH_INSUR         -0.118219 -0.011827      1.000000

[[ 1.          0.13695796 -0.11771815]
 [ 0.13695796  1.          0.00448804]
 [-0.11771815  0.00448804  1.        ]]
In [32]:
label = 'HEALTH_INSUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  HEALTH_INSUR
PATIENCE_MONTH        1.000000  0.137386     -0.118219
RISK_1K               0.137386  1.000000     -0.011827
HEALTH_INSUR         -0.118219 -0.011827      1.000000

[[ 1.          0.13695796 -0.11771815]
 [ 0.13695796  1.          0.00448804]
 [-0.11771815  0.00448804  1.        ]]
In [33]:
label = 'ENTREPRENEUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  ENTREPRENEUR
PATIENCE_MONTH        1.000000  0.138232      0.003249
RISK_1K               0.138232  1.000000      0.030702
ENTREPRENEUR          0.003249  0.030702      1.000000

[[ 1.          0.13819857 -0.0010048 ]
 [ 0.13819857  1.          0.03054662]
 [-0.0010048   0.03054662  1.        ]]
In [34]:
label = 'EXERCISE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  EXERCISE
PATIENCE_MONTH        1.000000  0.137643  -0.00876
RISK_1K               0.137643  1.000000   0.00117
EXERCISE             -0.008760  0.001170   1.00000

[[ 1.          0.137659   -0.00900691]
 [ 0.137659    1.          0.00239838]
 [-0.00900691  0.00239838  1.        ]]
In [35]:
label = 'CCMISSPAY'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  CCMISSPAY
PATIENCE_MONTH        1.000000  0.140922   0.078072
RISK_1K               0.140922  1.000000   0.031570
CCMISSPAY             0.078072  0.031570   1.000000

[[ 1.          0.13895016  0.0744024 ]
 [ 0.13895016  1.          0.02083871]
 [ 0.0744024   0.02083871  1.        ]]
In [36]:
label = 'CCMAX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K     CCMAX
PATIENCE_MONTH        1.000000  0.137643  0.064419
RISK_1K               0.137643  1.000000  0.023088
CCMAX                 0.064419  0.023088  1.000000

[[ 1.          0.13647585  0.06184625]
 [ 0.13647585  1.          0.01438745]
 [ 0.06184625  0.01438745  1.        ]]
In [37]:
label = 'CCDEBT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K    CCDEBT
PATIENCE_MONTH        1.000000  0.141159 -0.134111
RISK_1K               0.141159  1.000000  0.006998
CCDEBT               -0.134111  0.006998  1.000000

[[ 1.          0.1433967  -0.13646911]
 [ 0.1433967   1.          0.02643054]
 [-0.13646911  0.02643054  1.        ]]
In [38]:
label = 'DRUG'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K      DRUG
PATIENCE_MONTH        1.000000  0.137643 -0.023778
RISK_1K               0.137643  1.000000 -0.016217
DRUG                 -0.023778 -0.016217  1.000000

[[ 1.          0.13731463 -0.02175617]
 [ 0.13731463  1.         -0.01307231]
 [-0.02175617 -0.01307231  1.        ]]
In [39]:
label = 'ALCOHOL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K   ALCOHOL
PATIENCE_MONTH        1.000000  0.137005 -0.092626
RISK_1K               0.137005  1.000000  0.004653
ALCOHOL              -0.092626  0.004653  1.000000

[[ 1.          0.13803093 -0.09415271]
 [ 0.13803093  1.          0.01758392]
 [-0.09415271  0.01758392  1.        ]]
In [40]:
label = 'BINGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K     BINGE
PATIENCE_MONTH        1.000000  0.122469  0.022948
RISK_1K               0.122469  1.000000 -0.011895
BINGE                 0.022948 -0.011895  1.000000

[[ 1.          0.12278297  0.02459171]
 [ 0.12278297  1.         -0.01482062]
 [ 0.02459171 -0.01482062  1.        ]]
In [41]:
label = 'SMOKE_100_CIGS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  SMOKE_100_CIGS
PATIENCE_MONTH        1.000000  0.143114        0.029976
RISK_1K               0.143114  1.000000        0.030025
SMOKE_100_CIGS        0.029976  0.030025        1.000000

[[ 1.          0.14234163  0.02595764]
 [ 0.14234163  1.          0.026014  ]
 [ 0.02595764  0.026014    1.        ]]
In [42]:
label = 'CIGARETTE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  CIGARETTE
PATIENCE_MONTH        1.000000  0.137521  -0.071627
RISK_1K               0.137521  1.000000   0.003073
CIGARETTE            -0.071627  0.003073   1.000000

[[ 1.          0.13809637 -0.07274151]
 [ 0.13809637  1.          0.01308061]
 [-0.07274151  0.01308061  1.        ]]
In [43]:
label = 'EMERGENCY_FUND'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  EMERGENCY_FUND
PATIENCE_MONTH        1.000000  0.138633       -0.123158
RISK_1K               0.138633  1.000000        0.026722
EMERGENCY_FUND       -0.123158  0.026722        1.000000

[[ 1.          0.14306384 -0.12814495]
 [ 0.14306384  1.          0.04456166]
 [-0.12814495  0.04456166  1.        ]]
In [44]:
label = 'FIT_LIT_4'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  FIT_LIT_4
PATIENCE_MONTH        1.000000  0.117339   0.105289
RISK_1K               0.117339  1.000000   0.002764
FIT_LIT_4             0.105289  0.002764   1.000000

[[ 1.          0.11770277  0.10569505]
 [ 0.11770277  1.         -0.00971165]
 [ 0.10569505 -0.00971165  1.        ]]
In [45]:
label = 'FIT_LIT_5'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  FIT_LIT_5
PATIENCE_MONTH        1.000000  0.139498   0.062226
RISK_1K               0.139498  1.000000   0.013936
FIT_LIT_5             0.062226  0.013936   1.000000

[[ 1.          0.13891347  0.06088352]
 [ 0.13891347  1.          0.00531767]
 [ 0.06088352  0.00531767  1.        ]]
In [46]:
label = 'FIT_LIT_6'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  FIT_LIT_6
PATIENCE_MONTH        1.000000  0.134822  -0.092344
RISK_1K               0.134822  1.000000  -0.005481
FIT_LIT_6            -0.092344 -0.005481   1.000000

[[ 1.          0.13489415 -0.09245045]
 [ 0.13489415  1.          0.00706377]
 [-0.09245045  0.00706377  1.        ]]
In [47]:
label = 'FIT_LIT_7'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  FIT_LIT_7
PATIENCE_MONTH        1.000000  0.124560   0.050567
RISK_1K               0.124560  1.000000   0.018459
FIT_LIT_7             0.050567  0.018459   1.000000

[[ 1.          0.12380557  0.04865468]
 [ 0.12380557  1.          0.01227122]
 [ 0.04865468  0.01227122  1.        ]]
In [48]:
label = 'FIT_LIT_8'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  FIT_LIT_8
PATIENCE_MONTH        1.000000  0.135735  -0.120472
RISK_1K               0.135735  1.000000  -0.028121
FIT_LIT_8            -0.120472 -0.028121   1.000000

[[ 1.          0.13337119 -0.11779085]
 [ 0.13337119  1.         -0.01196537]
 [-0.11779085 -0.01196537  1.        ]]
In [49]:
label = 'EMPLOYED'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  EMPLOYED
PATIENCE_MONTH        1.000000  0.131517 -0.058141
RISK_1K               0.131517  1.000000  0.013638
EMPLOYED             -0.058141  0.013638  1.000000

[[ 1.          0.13254642 -0.06046574]
 [ 0.13254642  1.          0.02150729]
 [-0.06046574  0.02150729  1.        ]]
In [50]:
label = 'IQ'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K        IQ
PATIENCE_MONTH        1.000000  0.135635 -0.312883
RISK_1K               0.135635  1.000000 -0.060966
IQ                   -0.312883 -0.060966  1.000000

[[ 1.          0.12295037 -0.30802818]
 [ 0.12295037  1.         -0.01968925]
 [-0.30802818 -0.01968925  1.        ]]
In [51]:
label = 'INCOME'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K    INCOME
PATIENCE_MONTH        1.000000  0.134406 -0.141088
RISK_1K               0.134406  1.000000  0.013143
INCOME               -0.141088  0.013143  1.000000

[[ 1.          0.13764943 -0.14417529]
 [ 0.13764943  1.          0.0327272 ]
 [-0.14417529  0.0327272   1.        ]]
In [52]:
label = 'DOC_VISIT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  DOC_VISIT
PATIENCE_MONTH        1.000000  0.136919  -0.041481
RISK_1K               0.136919  1.000000  -0.021302
DOC_VISIT            -0.041481 -0.021302   1.000000

[[ 1.          0.13618381 -0.03894003]
 [ 0.13618381  1.         -0.01578456]
 [-0.03894003 -0.01578456  1.        ]]
In [53]:
label = 'BMI'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K       BMI
PATIENCE_MONTH        1.000000  0.138712  0.083949
RISK_1K               0.138712  1.000000 -0.008421
BMI                   0.083949 -0.008421  1.000000

[[ 1.          0.13991743  0.0859511 ]
 [ 0.13991743  1.         -0.02033348]
 [ 0.0859511  -0.02033348  1.        ]]
In [54]:
label = 'AGE_START_SMOKE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                 PATIENCE_MONTH   RISK_1K  AGE_START_SMOKE
PATIENCE_MONTH         1.000000  0.136499         0.011180
RISK_1K                0.136499  1.000000         0.035901
AGE_START_SMOKE        0.011180  0.035901         1.000000

[[ 1.          0.13619399  0.00634336]
 [ 0.13619399  1.          0.03470171]
 [ 0.00634336  0.03470171  1.        ]]
In [55]:
label = 'AGE_FIRST_SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
                PATIENCE_MONTH   RISK_1K  AGE_FIRST_SEX
PATIENCE_MONTH        1.000000  0.115434      -0.179500
RISK_1K               0.115434  1.000000      -0.059176
AGE_FIRST_SEX        -0.179500 -0.059176       1.000000

[[ 1.          0.10672903 -0.1741361 ]
 [ 0.10672903  1.         -0.03935403]
 [-0.1741361  -0.03935403  1.        ]]

Log-Likelihood Ratio Tests (Nested Regressions)¶

In [157]:
# Gender
label = 'SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d[label] = d[label]-1

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                    SEX   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5645
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.002293
Time:                        13:37:38   Log-Likelihood:                -3905.5
converged:                       True   LL-Null:                       -3914.5
                                        LLR p-value:                 0.0001263
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.4272      0.113     -3.786      0.000        -0.648    -0.206
PATIENCE_MONTH     0.0708      0.018      3.982      0.000         0.036     0.106
RISK_1K         8.143e-05   9.65e-05      0.844      0.399        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                    SEX   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.002202
Time:                        13:37:38   Log-Likelihood:                -3905.9
converged:                       True   LL-Null:                       -3914.5
                                        LLR p-value:                 3.290e-05
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.3891      0.103     -3.762      0.000        -0.592    -0.186
PATIENCE_MONTH     0.0728      0.018      4.136      0.000         0.038     0.107
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                    SEX   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0002527
Time:                        13:37:38   Log-Likelihood:                -3913.5
converged:                       True   LL-Null:                       -3914.5
                                        LLR p-value:                    0.1596
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -0.0579      0.064     -0.904      0.366        -0.183     0.068
RISK_1K        0.0001   9.55e-05      1.406      0.160     -5.29e-05     0.000
==============================================================================
0.476711003795
0.0
In [158]:
# Retirement savings
label = 'RETIREMENT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:             RETIREMENT   No. Observations:                 5236
Model:                          Logit   Df Residuals:                     5233
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.02590
Time:                        13:38:05   Log-Likelihood:                -2836.7
converged:                       True   LL-Null:                       -2912.1
                                        LLR p-value:                 1.746e-33
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.2087      0.131      1.587      0.113        -0.049     0.466
PATIENCE_MONTH    -0.2561      0.021    -12.126      0.000        -0.298    -0.215
RISK_1K            0.0001      0.000      1.073      0.283        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:             RETIREMENT   No. Observations:                 5236
Model:                          Logit   Df Residuals:                     5234
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.02570
Time:                        13:38:05   Log-Likelihood:                -2837.2
converged:                       True   LL-Null:                       -2912.1
                                        LLR p-value:                 2.013e-34
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.2712      0.118      2.300      0.021         0.040     0.502
PATIENCE_MONTH    -0.2535      0.021    -12.089      0.000        -0.295    -0.212
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:             RETIREMENT   No. Observations:                 5236
Model:                          Logit   Df Residuals:                     5234
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               3.288e-05
Time:                        13:38:05   Log-Likelihood:                -2912.0
converged:                       True   LL-Null:                       -2912.1
                                        LLR p-value:                    0.6617
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -1.0975      0.077    -14.180      0.000        -1.249    -0.946
RISK_1K    -5.072e-05      0.000     -0.438      0.662        -0.000     0.000
==============================================================================
0.249159399695
0.0
In [159]:
# Theft (> $50)
label = 'STEAL_LARGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
# Make theft a binary variable
d.replace({label: {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            STEAL_LARGE   No. Observations:                 5360
Model:                          Logit   Df Residuals:                     5357
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.001634
Time:                        13:38:14   Log-Likelihood:                -1053.5
converged:                       True   LL-Null:                       -1055.2
                                        LLR p-value:                    0.1782
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -3.4080      0.277    -12.281      0.000        -3.952    -2.864
PATIENCE_MONTH     0.0788      0.043      1.815      0.070        -0.006     0.164
RISK_1K        -2.682e-06      0.000     -0.012      0.991        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            STEAL_LARGE   No. Observations:                 5360
Model:                          Logit   Df Residuals:                     5358
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.001634
Time:                        13:38:14   Log-Likelihood:                -1053.5
converged:                       True   LL-Null:                       -1055.2
                                        LLR p-value:                   0.06328
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -3.4092      0.258    -13.212      0.000        -3.915    -2.903
PATIENCE_MONTH     0.0787      0.043      1.835      0.067        -0.005     0.163
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            STEAL_LARGE   No. Observations:                 5360
Model:                          Logit   Df Residuals:                     5358
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.001634
Time:                        13:38:14   Log-Likelihood:                -1053.5
converged:                       True   LL-Null:                       -1055.2
                                        LLR p-value:                   0.06328
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -3.4092      0.258    -13.212      0.000        -3.915    -2.903
PATIENCE_MONTH     0.0787      0.043      1.835      0.067        -0.005     0.163
==================================================================================
0.999891063309
0.00072666877884
In [160]:
label = 'STEAL_SMALL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
# Make theft a binary variable
d.replace({label: {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            STEAL_SMALL   No. Observations:                 5363
Model:                          Logit   Df Residuals:                     5360
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.003601
Time:                        13:38:23   Log-Likelihood:                -2655.1
converged:                       True   LL-Null:                       -2664.7
                                        LLR p-value:                 6.805e-05
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.9882      0.140     -7.077      0.000        -1.262    -0.715
PATIENCE_MONTH    -0.0949      0.022     -4.306      0.000        -0.138    -0.052
RISK_1K            0.0002      0.000      1.552      0.121     -5.09e-05     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            STEAL_SMALL   No. Observations:                 5363
Model:                          Logit   Df Residuals:                     5361
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.003148
Time:                        13:38:23   Log-Likelihood:                -2656.3
converged:                       True   LL-Null:                       -2664.7
                                        LLR p-value:                 4.201e-05
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.8954      0.126     -7.097      0.000        -1.143    -0.648
PATIENCE_MONTH    -0.0904      0.022     -4.128      0.000        -0.133    -0.047
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            STEAL_SMALL   No. Observations:                 5363
Model:                          Logit   Df Residuals:                     5361
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.003148
Time:                        13:38:23   Log-Likelihood:                -2656.3
converged:                       True   LL-Null:                       -2664.7
                                        LLR p-value:                 4.201e-05
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.8954      0.126     -7.097      0.000        -1.143    -0.648
PATIENCE_MONTH    -0.0904      0.022     -4.128      0.000        -0.133    -0.047
==================================================================================
0.015860698715
0.0
In [161]:
label = 'WORRY_ANX_DEPRESS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:      WORRY_ANX_DEPRESS   No. Observations:                 5634
Model:                          Logit   Df Residuals:                     5631
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0006586
Time:                        13:38:32   Log-Likelihood:                -2118.9
converged:                       True   LL-Null:                       -2120.3
                                        LLR p-value:                    0.2475
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -2.2239      0.174    -12.813      0.000        -2.564    -1.884
PATIENCE_MONTH     0.0340      0.027      1.251      0.211        -0.019     0.087
RISK_1K            0.0001      0.000      0.913      0.361        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:      WORRY_ANX_DEPRESS   No. Observations:                 5634
Model:                          Logit   Df Residuals:                     5632
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0004620
Time:                        13:38:32   Log-Likelihood:                -2119.3
converged:                       True   LL-Null:                       -2120.3
                                        LLR p-value:                    0.1616
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -2.1618      0.160    -13.537      0.000        -2.475    -1.849
PATIENCE_MONTH     0.0375      0.027      1.392      0.164        -0.015     0.090
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:      WORRY_ANX_DEPRESS   No. Observations:                 5634
Model:                          Logit   Df Residuals:                     5632
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0004620
Time:                        13:38:32   Log-Likelihood:                -2119.3
converged:                       True   LL-Null:                       -2120.3
                                        LLR p-value:                    0.1616
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -2.1618      0.160    -13.537      0.000        -2.475    -1.849
PATIENCE_MONTH     0.0375      0.027      1.392      0.164        -0.015     0.090
==================================================================================
0.404463102779
0.113505792525
In [162]:
label = 'TRYING_LOSE_WEIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
#1 Lose weight
#2 Gain weight
#3 Stay about the same
#4 Not trying to do anything
d.replace({label: {4: 0, 3: 0, 2: 0}}, inplace=True)

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:     TRYING_LOSE_WEIGHT   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5645
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0003076
Time:                        13:38:41   Log-Likelihood:                -3910.5
converged:                       True   LL-Null:                       -3911.7
                                        LLR p-value:                    0.3002
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.1008      0.112      0.898      0.369        -0.119     0.321
PATIENCE_MONTH    -0.0245      0.018     -1.389      0.165        -0.059     0.010
RISK_1K        -4.736e-05   9.64e-05     -0.491      0.623        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:     TRYING_LOSE_WEIGHT   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0002767
Time:                        13:38:41   Log-Likelihood:                -3910.6
converged:                       True   LL-Null:                       -3911.7
                                        LLR p-value:                    0.1412
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.0786      0.103      0.766      0.444        -0.123     0.280
PATIENCE_MONTH    -0.0257      0.017     -1.471      0.141        -0.060     0.009
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:     TRYING_LOSE_WEIGHT   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0002767
Time:                        13:38:41   Log-Likelihood:                -3910.6
converged:                       True   LL-Null:                       -3911.7
                                        LLR p-value:                    0.1412
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.0786      0.103      0.766      0.444        -0.123     0.280
PATIENCE_MONTH    -0.0257      0.017     -1.471      0.141        -0.060     0.009
==================================================================================
0.809392454285
0.053384059422
In [163]:
label = 'FIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d.replace({label: {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  FIGHT   No. Observations:                 5365
Model:                          Logit   Df Residuals:                     5362
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.001985
Time:                        13:38:52   Log-Likelihood:                -3194.3
converged:                       True   LL-Null:                       -3200.6
                                        LLR p-value:                  0.001740
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.3645      0.131    -10.439      0.000        -1.621    -1.108
PATIENCE_MONTH     0.0678      0.021      3.304      0.001         0.028     0.108
RISK_1K         8.554e-05      0.000      0.776      0.438        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  FIGHT   No. Observations:                 5365
Model:                          Logit   Df Residuals:                     5363
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.001891
Time:                        13:38:52   Log-Likelihood:                -3194.6
converged:                       True   LL-Null:                       -3200.6
                                        LLR p-value:                 0.0005027
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.3254      0.121    -10.992      0.000        -1.562    -1.089
PATIENCE_MONTH     0.0701      0.020      3.449      0.001         0.030     0.110
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  FIGHT   No. Observations:                 5365
Model:                          Logit   Df Residuals:                     5363
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.001891
Time:                        13:38:52   Log-Likelihood:                -3194.6
converged:                       True   LL-Null:                       -3200.6
                                        LLR p-value:                 0.0005027
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.3254      0.121    -10.992      0.000        -1.562    -1.089
PATIENCE_MONTH     0.0701      0.020      3.449      0.001         0.030     0.110
==================================================================================
0.547395450055
0.0
In [164]:
label = 'HEALTH_INSUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:           HEALTH_INSUR   No. Observations:                 5645
Model:                          Logit   Df Residuals:                     5642
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01583
Time:                        13:39:15   Log-Likelihood:                -2615.7
converged:                       True   LL-Null:                       -2657.8
                                        LLR p-value:                 5.283e-19
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          2.7916      0.164     16.995      0.000         2.470     3.113
PATIENCE_MONTH    -0.2248      0.026     -8.782      0.000        -0.275    -0.175
RISK_1K         6.215e-05      0.000      0.490      0.624        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:           HEALTH_INSUR   No. Observations:                 5645
Model:                          Logit   Df Residuals:                     5643
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01579
Time:                        13:39:15   Log-Likelihood:                -2615.9
converged:                       True   LL-Null:                       -2657.8
                                        LLR p-value:                 5.128e-20
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          2.8182      0.155     18.186      0.000         2.514     3.122
PATIENCE_MONTH    -0.2228      0.025     -8.823      0.000        -0.272    -0.173
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:           HEALTH_INSUR   No. Observations:                 5645
Model:                          Logit   Df Residuals:                     5643
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01579
Time:                        13:39:15   Log-Likelihood:                -2615.9
converged:                       True   LL-Null:                       -2657.8
                                        LLR p-value:                 5.128e-20
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          2.8182      0.155     18.186      0.000         2.514     3.122
PATIENCE_MONTH    -0.2228      0.025     -8.823      0.000        -0.272    -0.173
==================================================================================
0.810408214884
0.0
In [165]:
label = 'ENTREPRENEUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:           ENTREPRENEUR   No. Observations:                 5172
Model:                          Logit   Df Residuals:                     5169
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0009709
Time:                        13:39:22   Log-Likelihood:                -2514.2
converged:                       True   LL-Null:                       -2516.7
                                        LLR p-value:                   0.08686
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.6134      0.151    -10.705      0.000        -1.909    -1.318
PATIENCE_MONTH    -0.0017      0.024     -0.072      0.942        -0.048     0.045
RISK_1K            0.0003      0.000      2.196      0.028      3.05e-05     0.001
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:           ENTREPRENEUR   No. Observations:                 5172
Model:                          Logit   Df Residuals:                     5170
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               1.086e-05
Time:                        13:39:22   Log-Likelihood:                -2516.6
converged:                       True   LL-Null:                       -2516.7
                                        LLR p-value:                    0.8151
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.4795      0.138    -10.736      0.000        -1.750    -1.209
PATIENCE_MONTH     0.0055      0.023      0.234      0.815        -0.041     0.051
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:           ENTREPRENEUR   No. Observations:                 5172
Model:                          Logit   Df Residuals:                     5170
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               1.086e-05
Time:                        13:39:22   Log-Likelihood:                -2516.6
converged:                       True   LL-Null:                       -2516.7
                                        LLR p-value:                    0.8151
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.4795      0.138    -10.736      0.000        -1.750    -1.209
PATIENCE_MONTH     0.0055      0.023      0.234      0.815        -0.041     0.051
==================================================================================
1.34939005025e-06
0.995813721969
In [166]:
label = 'EXERCISE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:               EXERCISE   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5645
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               6.238e-05
Time:                        13:39:31   Log-Likelihood:                -3729.6
converged:                       True   LL-Null:                       -3729.8
                                        LLR p-value:                    0.7924
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.4617      0.116     -3.991      0.000        -0.688    -0.235
PATIENCE_MONTH    -0.0123      0.018     -0.677      0.499        -0.048     0.023
RISK_1K         1.793e-05   9.96e-05      0.180      0.857        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:               EXERCISE   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               5.804e-05
Time:                        13:39:31   Log-Likelihood:                -3729.6
converged:                       True   LL-Null:                       -3729.8
                                        LLR p-value:                    0.5106
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.4533      0.106     -4.283      0.000        -0.661    -0.246
PATIENCE_MONTH    -0.0119      0.018     -0.658      0.510        -0.047     0.023
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:               EXERCISE   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               5.804e-05
Time:                        13:39:31   Log-Likelihood:                -3729.6
converged:                       True   LL-Null:                       -3729.8
                                        LLR p-value:                    0.5106
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.4533      0.106     -4.283      0.000        -0.661    -0.246
PATIENCE_MONTH    -0.0119      0.018     -0.658      0.510        -0.047     0.023
==================================================================================
0.974154070705
0.647239283003
In [167]:
label = 'CCMISSPAY'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              CCMISSPAY   No. Observations:                 5408
Model:                          Logit   Df Residuals:                     5405
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.006435
Time:                        13:39:43   Log-Likelihood:                -2802.3
converged:                       True   LL-Null:                       -2820.4
                                        LLR p-value:                 1.312e-08
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -2.1273      0.148    -14.328      0.000        -2.418    -1.836
PATIENCE_MONTH     0.1264      0.023      5.448      0.000         0.081     0.172
RISK_1K            0.0002      0.000      1.485      0.137     -5.73e-05     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              CCMISSPAY   No. Observations:                 5408
Model:                          Logit   Df Residuals:                     5406
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.006043
Time:                        13:39:43   Log-Likelihood:                -2803.4
converged:                       True   LL-Null:                       -2820.4
                                        LLR p-value:                 5.262e-09
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -2.0466      0.138    -14.814      0.000        -2.317    -1.776
PATIENCE_MONTH     0.1315      0.023      5.720      0.000         0.086     0.177
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              CCMISSPAY   No. Observations:                 5408
Model:                          Logit   Df Residuals:                     5406
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.006043
Time:                        13:39:43   Log-Likelihood:                -2803.4
converged:                       True   LL-Null:                       -2820.4
                                        LLR p-value:                 5.262e-09
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -2.0466      0.138    -14.814      0.000        -2.317    -1.776
PATIENCE_MONTH     0.1315      0.023      5.720      0.000         0.086     0.177
==================================================================================
0.0272342495795
0.0
In [168]:
label = 'CCMAX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  CCMAX   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5645
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.007045
Time:                        13:39:55   Log-Likelihood:                -1803.9
converged:                       True   LL-Null:                       -1816.7
                                        LLR p-value:                 2.765e-06
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -3.1729      0.205    -15.472      0.000        -3.575    -2.771
PATIENCE_MONTH     0.1473      0.032      4.646      0.000         0.085     0.209
RISK_1K            0.0002      0.000      1.017      0.309        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  CCMAX   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.006760
Time:                        13:39:55   Log-Likelihood:                -1804.4
converged:                       True   LL-Null:                       -1816.7
                                        LLR p-value:                 7.202e-07
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -3.0991      0.192    -16.157      0.000        -3.475    -2.723
PATIENCE_MONTH     0.1522      0.031      4.848      0.000         0.091     0.214
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  CCMAX   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.006760
Time:                        13:39:55   Log-Likelihood:                -1804.4
converged:                       True   LL-Null:                       -1816.7
                                        LLR p-value:                 7.202e-07
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -3.0991      0.192    -16.157      0.000        -3.475    -2.723
PATIENCE_MONTH     0.1522      0.031      4.848      0.000         0.091     0.214
==================================================================================
0.299953748119
0.0
In [169]:
label = 'CCDEBT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                 CCDEBT   No. Observations:                 5403
Model:                          Logit   Df Residuals:                     5400
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01428
Time:                        13:40:03   Log-Likelihood:                -3596.3
converged:                       True   LL-Null:                       -3648.4
                                        LLR p-value:                 2.344e-23
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          1.3704      0.124     11.055      0.000         1.127     1.613
PATIENCE_MONTH    -0.1950      0.020     -9.876      0.000        -0.234    -0.156
RISK_1K            0.0002      0.000      2.014      0.044      5.49e-06     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                 CCDEBT   No. Observations:                 5403
Model:                          Logit   Df Residuals:                     5401
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01373
Time:                        13:40:03   Log-Likelihood:                -3598.3
converged:                       True   LL-Null:                       -3648.4
                                        LLR p-value:                 1.409e-23
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          1.4608      0.115     12.650      0.000         1.234     1.687
PATIENCE_MONTH    -0.1890      0.019     -9.713      0.000        -0.227    -0.151
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                 CCDEBT   No. Observations:                 5403
Model:                          Logit   Df Residuals:                     5401
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01373
Time:                        13:40:03   Log-Likelihood:                -3598.3
converged:                       True   LL-Null:                       -3648.4
                                        LLR p-value:                 1.409e-23
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          1.4608      0.115     12.650      0.000         1.234     1.687
PATIENCE_MONTH    -0.1890      0.019     -9.713      0.000        -0.227    -0.151
==================================================================================
4.90626963157e-05
0.0
In [170]:
label = 'DRUG'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   DRUG   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5645
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0005655
Time:                        13:40:17   Log-Likelihood:                -3668.3
converged:                       True   LL-Null:                       -3670.3
                                        LLR p-value:                    0.1255
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.3713      0.117     -3.187      0.001        -0.600    -0.143
PATIENCE_MONTH    -0.0300      0.018     -1.635      0.102        -0.066     0.006
RISK_1K        -9.914e-05      0.000     -0.984      0.325        -0.000  9.83e-05
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   DRUG   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0004336
Time:                        13:40:17   Log-Likelihood:                -3668.7
converged:                       True   LL-Null:                       -3670.3
                                        LLR p-value:                   0.07442
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.4178      0.107     -3.923      0.000        -0.627    -0.209
PATIENCE_MONTH    -0.0325      0.018     -1.786      0.074        -0.068     0.003
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                   DRUG   No. Observations:                 5648
Model:                          Logit   Df Residuals:                     5646
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0004336
Time:                        13:40:17   Log-Likelihood:                -3668.7
converged:                       True   LL-Null:                       -3670.3
                                        LLR p-value:                   0.07442
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.4178      0.107     -3.923      0.000        -0.627    -0.209
PATIENCE_MONTH    -0.0325      0.018     -1.786      0.074        -0.068     0.003
==================================================================================
0.332847760542
0.00767876806633
In [171]:
label = 'ALCOHOL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                ALCOHOL   No. Observations:                 5588
Model:                          Logit   Df Residuals:                     5585
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.006505
Time:                        13:40:35   Log-Likelihood:                -3837.3
converged:                       True   LL-Null:                       -3862.5
                                        LLR p-value:                 1.227e-11
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.7699      0.115      6.670      0.000         0.544     0.996
PATIENCE_MONTH    -0.1276      0.018     -6.982      0.000        -0.163    -0.092
RISK_1K            0.0001   9.77e-05      1.332      0.183     -6.13e-05     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                ALCOHOL   No. Observations:                 5588
Model:                          Logit   Df Residuals:                     5586
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.006275
Time:                        13:40:35   Log-Likelihood:                -3838.2
converged:                       True   LL-Null:                       -3862.5
                                        LLR p-value:                 3.349e-12
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.8297      0.106      7.803      0.000         0.621     1.038
PATIENCE_MONTH    -0.1241      0.018     -6.872      0.000        -0.160    -0.089
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                ALCOHOL   No. Observations:                 5588
Model:                          Logit   Df Residuals:                     5586
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.006275
Time:                        13:40:35   Log-Likelihood:                -3838.2
converged:                       True   LL-Null:                       -3862.5
                                        LLR p-value:                 3.349e-12
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.8297      0.106      7.803      0.000         0.621     1.038
PATIENCE_MONTH    -0.1241      0.018     -6.872      0.000        -0.160    -0.089
==================================================================================
0.0758562958793
0.0
In [172]:
label = 'BINGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d['BINGE'] = (d['BINGE'] > 0)
d['BINGE'] = d['BINGE'].astype(float)

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  BINGE   No. Observations:                 2966
Model:                          Logit   Df Residuals:                     2963
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0009420
Time:                        13:40:43   Log-Likelihood:                -1712.4
converged:                       True   LL-Null:                       -1714.0
                                        LLR p-value:                    0.1990
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.2592      0.174     -7.235      0.000        -1.600    -0.918
PATIENCE_MONTH     0.0489      0.027      1.781      0.075        -0.005     0.103
RISK_1K        -5.661e-05      0.000     -0.371      0.711        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  BINGE   No. Observations:                 2966
Model:                          Logit   Df Residuals:                     2964
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0009019
Time:                        13:40:43   Log-Likelihood:                -1712.4
converged:                       True   LL-Null:                       -1714.0
                                        LLR p-value:                   0.07869
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.2865      0.158     -8.161      0.000        -1.595    -0.978
PATIENCE_MONTH     0.0476      0.027      1.749      0.080        -0.006     0.101
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  BINGE   No. Observations:                 2966
Model:                          Logit   Df Residuals:                     2964
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0009019
Time:                        13:40:44   Log-Likelihood:                -1712.4
converged:                       True   LL-Null:                       -1714.0
                                        LLR p-value:                   0.07869
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.2865      0.158     -8.161      0.000        -1.595    -0.978
PATIENCE_MONTH     0.0476      0.027      1.749      0.080        -0.006     0.101
==================================================================================
0.890518137708
0.00133828472371
In [173]:
label = 'SMOKE_100_CIGS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:         SMOKE_100_CIGS   No. Observations:                 5251
Model:                          Logit   Df Residuals:                     5248
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.001140
Time:                        13:40:50   Log-Likelihood:                -3628.4
converged:                       True   LL-Null:                       -3632.5
                                        LLR p-value:                   0.01593
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.4179      0.117     -3.563      0.000        -0.648    -0.188
PATIENCE_MONTH     0.0348      0.019      1.880      0.060        -0.001     0.071
RISK_1K            0.0002      0.000      1.884      0.060     -7.63e-06     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:         SMOKE_100_CIGS   No. Observations:                 5251
Model:                          Logit   Df Residuals:                     5249
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0006505
Time:                        13:40:50   Log-Likelihood:                -3630.1
converged:                       True   LL-Null:                       -3632.5
                                        LLR p-value:                   0.02971
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.3307      0.108     -3.070      0.002        -0.542    -0.120
PATIENCE_MONTH     0.0398      0.018      2.170      0.030         0.004     0.076
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:         SMOKE_100_CIGS   No. Observations:                 5251
Model:                          Logit   Df Residuals:                     5249
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0006505
Time:                        13:40:50   Log-Likelihood:                -3630.1
converged:                       True   LL-Null:                       -3632.5
                                        LLR p-value:                   0.02971
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -0.3307      0.108     -3.070      0.002        -0.542    -0.120
PATIENCE_MONTH     0.0398      0.018      2.170      0.030         0.004     0.076
==================================================================================
0.000381563758676
0.000395447517749
In [174]:
label = 'CIGARETTE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.mnlogit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.mnlogit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.mnlogit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                          MNLogit Regression Results                          
==============================================================================
Dep. Variable:              CIGARETTE   No. Observations:                 2894
Model:                        MNLogit   Df Residuals:                     2888
Method:                           MLE   Df Model:                            4
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.003541
Time:                        13:41:01   Log-Likelihood:                -2745.2
converged:                       True   LL-Null:                       -2755.0
                                        LLR p-value:                 0.0006239
==================================================================================
   CIGARETTE=2       coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.4005      0.283     -4.953      0.000        -1.955    -0.846
PATIENCE_MONTH    -0.0226      0.044     -0.514      0.608        -0.109     0.064
RISK_1K            0.0004      0.000      1.874      0.061     -1.96e-05     0.001
----------------------------------------------------------------------------------
   CIGARETTE=3       coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.8131      0.172      4.731      0.000         0.476     1.150
PATIENCE_MONTH    -0.1043      0.027     -3.846      0.000        -0.158    -0.051
RISK_1K            0.0001      0.000      0.787      0.431        -0.000     0.000
==================================================================================
                          MNLogit Regression Results                          
==============================================================================
Dep. Variable:              CIGARETTE   No. Observations:                 2894
Model:                        MNLogit   Df Residuals:                     2890
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.002897
Time:                        13:41:01   Log-Likelihood:                -2747.0
converged:                       True   LL-Null:                       -2755.0
                                        LLR p-value:                 0.0003417
==================================================================================
   CIGARETTE=2       coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.2023      0.262     -4.589      0.000        -1.716    -0.689
PATIENCE_MONTH    -0.0108      0.044     -0.247      0.805        -0.096     0.075
----------------------------------------------------------------------------------
   CIGARETTE=3       coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.8630      0.159      5.420      0.000         0.551     1.175
PATIENCE_MONTH    -0.1012      0.027     -3.774      0.000        -0.154    -0.049
==================================================================================
                          MNLogit Regression Results                          
==============================================================================
Dep. Variable:              CIGARETTE   No. Observations:                 2894
Model:                        MNLogit   Df Residuals:                     2890
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.002897
Time:                        13:41:01   Log-Likelihood:                -2747.0
converged:                       True   LL-Null:                       -2755.0
                                        LLR p-value:                 0.0003417
==================================================================================
   CIGARETTE=2       coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -1.2023      0.262     -4.589      0.000        -1.716    -0.689
PATIENCE_MONTH    -0.0108      0.044     -0.247      0.805        -0.096     0.075
----------------------------------------------------------------------------------
   CIGARETTE=3       coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.8630      0.159      5.420      0.000         0.551     1.175
PATIENCE_MONTH    -0.1012      0.027     -3.774      0.000        -0.154    -0.049
==================================================================================
0.00185437506232
0.0
In [175]:
label = 'EMERGENCY_FUND'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:         EMERGENCY_FUND   No. Observations:                 5059
Model:                          Logit   Df Residuals:                     5056
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01288
Time:                        13:41:16   Log-Likelihood:                -3315.5
converged:                       True   LL-Null:                       -3358.8
                                        LLR p-value:                 1.632e-19
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.2936      0.123      2.395      0.017         0.053     0.534
PATIENCE_MONTH    -0.1760      0.020     -8.979      0.000        -0.214    -0.138
RISK_1K            0.0003      0.000      3.150      0.002         0.000     0.001
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:         EMERGENCY_FUND   No. Observations:                 5059
Model:                          Logit   Df Residuals:                     5057
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01140
Time:                        13:41:16   Log-Likelihood:                -3320.5
converged:                       True   LL-Null:                       -3358.8
                                        LLR p-value:                 2.134e-18
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.4495      0.112      4.013      0.000         0.230     0.669
PATIENCE_MONTH    -0.1672      0.019     -8.645      0.000        -0.205    -0.129
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:         EMERGENCY_FUND   No. Observations:                 5059
Model:                          Logit   Df Residuals:                     5057
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01140
Time:                        13:41:16   Log-Likelihood:                -3320.5
converged:                       True   LL-Null:                       -3358.8
                                        LLR p-value:                 2.134e-18
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.4495      0.112      4.013      0.000         0.230     0.669
PATIENCE_MONTH    -0.1672      0.019     -8.645      0.000        -0.205    -0.129
==================================================================================
0.0
0.0
In [176]:
label = 'FIT_LIT_4'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_4   No. Observations:                 4238
Model:                          Logit   Df Residuals:                     4235
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01088
Time:                        13:43:46   Log-Likelihood:                -2257.3
converged:                       True   LL-Null:                       -2282.1
                                        LLR p-value:                 1.665e-11
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -2.1810      0.168    -13.013      0.000        -2.510    -1.853
PATIENCE_MONTH     0.1787      0.026      6.830      0.000         0.127     0.230
RISK_1K        -9.491e-05      0.000     -0.707      0.479        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_4   No. Observations:                 4238
Model:                          Logit   Df Residuals:                     4236
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01077
Time:                        13:43:46   Log-Likelihood:                -2257.6
converged:                       True   LL-Null:                       -2282.1
                                        LLR p-value:                 2.388e-12
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         -2.2245      0.156    -14.270      0.000        -2.530    -1.919
PATIENCE_MONTH     0.1762      0.026      6.805      0.000         0.125     0.227
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_4   No. Observations:                 4238
Model:                          Logit   Df Residuals:                     4236
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               7.091e-06
Time:                        13:43:46   Log-Likelihood:                -2282.1
converged:                       True   LL-Null:                       -2282.1
                                        LLR p-value:                    0.8572
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -1.2264      0.088    -13.925      0.000        -1.399    -1.054
RISK_1K     2.378e-05      0.000      0.180      0.857        -0.000     0.000
==============================================================================
0.616730398331
0.0
In [177]:
label = 'FIT_LIT_5'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d[label] = (d[label] == 1)
d[label] = d[label].astype(float)

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_5   No. Observations:                 4918
Model:                          Logit   Df Residuals:                     4915
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.003910
Time:                        13:43:54   Log-Likelihood:                -2780.2
converged:                       True   LL-Null:                       -2791.1
                                        LLR p-value:                 1.824e-05
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          1.6647      0.143     11.610      0.000         1.384     1.946
PATIENCE_MONTH    -0.1023      0.023     -4.542      0.000        -0.146    -0.058
RISK_1K        -1.129e-05      0.000     -0.095      0.925        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_5   No. Observations:                 4918
Model:                          Logit   Df Residuals:                     4916
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.003908
Time:                        13:43:54   Log-Likelihood:                -2780.2
converged:                       True   LL-Null:                       -2791.1
                                        LLR p-value:                 3.003e-06
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          1.6596      0.133     12.503      0.000         1.399     1.920
PATIENCE_MONTH    -0.1026      0.022     -4.605      0.000        -0.146    -0.059
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_5   No. Observations:                 4918
Model:                          Logit   Df Residuals:                     4916
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0001044
Time:                        13:43:54   Log-Likelihood:                -2790.8
converged:                       True   LL-Null:                       -2791.1
                                        LLR p-value:                    0.4453
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      1.1283      0.079     14.225      0.000         0.973     1.284
RISK_1K    -8.997e-05      0.000     -0.763      0.445        -0.000     0.000
==============================================================================
0.992870134511
0.0
In [178]:
label = 'FIT_LIT_6'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d[label] = (d[label] == 3)
d[label] = d[label].astype(float)

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_6   No. Observations:                 4855
Model:                          Logit   Df Residuals:                     4852
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01076
Time:                        13:44:01   Log-Likelihood:                -2295.3
converged:                       True   LL-Null:                       -2320.3
                                        LLR p-value:                 1.432e-11
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          2.4746      0.170     14.578      0.000         2.142     2.807
PATIENCE_MONTH    -0.1818      0.027     -6.857      0.000        -0.234    -0.130
RISK_1K            0.0001      0.000      0.766      0.443        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_6   No. Observations:                 4855
Model:                          Logit   Df Residuals:                     4853
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01063
Time:                        13:44:01   Log-Likelihood:                -2295.6
converged:                       True   LL-Null:                       -2320.3
                                        LLR p-value:                 2.141e-12
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          2.5204      0.159     15.862      0.000         2.209     2.832
PATIENCE_MONTH    -0.1788      0.026     -6.829      0.000        -0.230    -0.127
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_6   No. Observations:                 4855
Model:                          Logit   Df Residuals:                     4853
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               1.353e-05
Time:                        13:44:01   Log-Likelihood:                -2320.3
converged:                       True   LL-Null:                       -2320.3
                                        LLR p-value:                    0.8021
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      1.5075      0.089     16.875      0.000         1.332     1.683
RISK_1K    -3.341e-05      0.000     -0.251      0.802        -0.000     0.000
==============================================================================
0.557006627823
0.0
In [179]:
label = 'FIT_LIT_7'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d[label] = (d[label] == 2)
d[label] = d[label].astype(float)

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_7   No. Observations:                 4013
Model:                          Logit   Df Residuals:                     4010
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.004751
Time:                        13:44:09   Log-Likelihood:                -2620.0
converged:                       True   LL-Null:                       -2632.5
                                        LLR p-value:                 3.702e-06
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.0533      0.137      0.389      0.697        -0.216     0.322
PATIENCE_MONTH    -0.1065      0.022     -4.932      0.000        -0.149    -0.064
RISK_1K        -1.689e-05      0.000     -0.141      0.888        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_7   No. Observations:                 4013
Model:                          Logit   Df Residuals:                     4011
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.004747
Time:                        13:44:09   Log-Likelihood:                -2620.0
converged:                       True   LL-Null:                       -2632.5
                                        LLR p-value:                 5.752e-07
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          0.0452      0.124      0.363      0.716        -0.198     0.289
PATIENCE_MONTH    -0.1068      0.021     -4.986      0.000        -0.149    -0.065
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_7   No. Observations:                 4013
Model:                          Logit   Df Residuals:                     4011
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0001076
Time:                        13:44:09   Log-Likelihood:                -2632.2
converged:                       True   LL-Null:                       -2632.5
                                        LLR p-value:                    0.4517
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -0.5015      0.079     -6.351      0.000        -0.656    -0.347
RISK_1K     -8.94e-05      0.000     -0.753      0.452        -0.000     0.000
==============================================================================
0.984224489883
0.0
In [180]:
label = 'FIT_LIT_8'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_8   No. Observations:                 4864
Model:                          Logit   Df Residuals:                     4861
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01916
Time:                        13:44:16   Log-Likelihood:                -1949.8
converged:                       True   LL-Null:                       -1987.9
                                        LLR p-value:                 2.853e-17
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          3.3130      0.197     16.801      0.000         2.927     3.700
PATIENCE_MONTH    -0.2482      0.030     -8.172      0.000        -0.308    -0.189
RISK_1K           -0.0001      0.000     -0.691      0.490        -0.000     0.000
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_8   No. Observations:                 4864
Model:                          Logit   Df Residuals:                     4862
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01904
Time:                        13:44:16   Log-Likelihood:                -1950.0
converged:                       True   LL-Null:                       -1987.9
                                        LLR p-value:                 3.280e-18
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          3.2674      0.186     17.589      0.000         2.903     3.631
PATIENCE_MONTH    -0.2514      0.030     -8.363      0.000        -0.310    -0.192
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              FIT_LIT_8   No. Observations:                 4864
Model:                          Logit   Df Residuals:                     4862
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0009689
Time:                        13:44:16   Log-Likelihood:                -1986.0
converged:                       True   LL-Null:                       -1987.9
                                        LLR p-value:                   0.04968
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      1.9776      0.101     19.505      0.000         1.779     2.176
RISK_1K       -0.0003      0.000     -1.960      0.050        -0.001 -3.61e-08
==============================================================================
0.633054073454
0.0
In [181]:
label = 'EMPLOYED'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                           Logit Regression Results                           
==============================================================================
Dep. Variable:               EMPLOYED   No. Observations:                 4777
Model:                          Logit   Df Residuals:                     4774
Method:                           MLE   Df Model:                            2
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                 0.01018
Time:                        13:44:22   Log-Likelihood:                -956.49
converged:                       True   LL-Null:                       -966.33
                                        LLR p-value:                 5.335e-05
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          3.8699      0.308     12.577      0.000         3.267     4.473
PATIENCE_MONTH    -0.2025      0.048     -4.213      0.000        -0.297    -0.108
RISK_1K            0.0004      0.000      1.568      0.117     -9.39e-05     0.001
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:               EMPLOYED   No. Observations:                 4777
Model:                          Logit   Df Residuals:                     4775
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:                0.008910
Time:                        13:44:22   Log-Likelihood:                -957.72
converged:                       True   LL-Null:                       -966.33
                                        LLR p-value:                 3.330e-05
==================================================================================
                     coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          4.0275      0.290     13.870      0.000         3.458     4.597
PATIENCE_MONTH    -0.1908      0.047     -4.041      0.000        -0.283    -0.098
==================================================================================
                           Logit Regression Results                           
==============================================================================
Dep. Variable:               EMPLOYED   No. Observations:                 4777
Model:                          Logit   Df Residuals:                     4775
Method:                           MLE   Df Model:                            1
Date:                Fri, 15 Jul 2016   Pseudo R-squ.:               0.0004595
Time:                        13:44:22   Log-Likelihood:                -965.89
converged:                       True   LL-Null:                       -966.33
                                        LLR p-value:                    0.3460
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      2.7835      0.155     17.942      0.000         2.479     3.088
RISK_1K        0.0002      0.000      0.942      0.346        -0.000     0.001
==============================================================================
0.013985694841
0.0
In [183]:
label = 'IQ'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                     IQ   R-squared:                       0.098
Model:                            OLS   Adj. R-squared:                  0.098
Method:                 Least Squares   F-statistic:                     295.7
Date:                Fri, 15 Jul 2016   Prob (F-statistic):          1.22e-122
Time:                        13:45:09   Log-Likelihood:                -63287.
No. Observations:                5432   AIC:                         1.266e+05
Df Residuals:                    5429   BIC:                         1.266e+05
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept       7.842e+04   1591.082     49.288      0.000      7.53e+04  8.15e+04
PATIENCE_MONTH -5972.0351    250.337    -23.856      0.000     -6462.796 -5481.275
RISK_1K           -1.9826      1.366     -1.451      0.147        -4.661     0.696
==============================================================================
Omnibus:                      901.981   Durbin-Watson:                   1.354
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              246.391
Skew:                           0.238   Prob(JB):                     3.14e-54
Kurtosis:                       2.071   Cond. No.                     2.86e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.86e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                     IQ   R-squared:                       0.098
Model:                            OLS   Adj. R-squared:                  0.098
Method:                 Least Squares   F-statistic:                     589.3
Date:                Fri, 15 Jul 2016   Prob (F-statistic):          1.15e-123
Time:                        13:45:09   Log-Likelihood:                -63288.
No. Observations:                5432   AIC:                         1.266e+05
Df Residuals:                    5430   BIC:                         1.266e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept       7.749e+04   1456.011     53.220      0.000      7.46e+04  8.03e+04
PATIENCE_MONTH -6021.3037    248.049    -24.275      0.000     -6507.579 -5535.029
==============================================================================
Omnibus:                      908.184   Durbin-Watson:                   1.354
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              247.401
Skew:                           0.239   Prob(JB):                     1.89e-54
Kurtosis:                       2.070   Cond. No.                         23.3
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                     IQ   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     20.26
Date:                Fri, 15 Jul 2016   Prob (F-statistic):           6.91e-06
Time:                        13:45:09   Log-Likelihood:                -63558.
No. Observations:                5432   AIC:                         1.271e+05
Df Residuals:                    5430   BIC:                         1.271e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept   4.726e+04    954.899     49.493      0.000      4.54e+04  4.91e+04
RISK_1K       -6.4036      1.423     -4.501      0.000        -9.193    -3.614
==============================================================================
Omnibus:                     2940.978   Durbin-Watson:                   1.232
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              367.599
Skew:                           0.285   Prob(JB):                     1.50e-80
Kurtosis:                       1.860   Cond. No.                     1.62e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.62e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
0.0351857664852
0.0
In [184]:
label = 'INCOME'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 INCOME   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     57.44
Date:                Fri, 15 Jul 2016   Prob (F-statistic):           2.08e-25
Time:                        13:45:22   Log-Likelihood:                -11891.
No. Observations:                5370   AIC:                         2.379e+04
Df Residuals:                    5367   BIC:                         2.381e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         11.4784      0.127     90.213      0.000        11.229    11.728
PATIENCE_MONTH    -0.2135      0.020    -10.674      0.000        -0.253    -0.174
RISK_1K            0.0003      0.000      2.399      0.016       4.8e-05     0.000
==============================================================================
Omnibus:                     3992.949   Durbin-Watson:                   1.862
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            55242.857
Skew:                          -3.590   Prob(JB):                         0.00
Kurtosis:                      16.976   Cond. No.                     2.85e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.85e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 INCOME   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     109.0
Date:                Fri, 15 Jul 2016   Prob (F-statistic):           2.79e-25
Time:                        13:45:22   Log-Likelihood:                -11894.
No. Observations:                5370   AIC:                         2.379e+04
Df Residuals:                    5368   BIC:                         2.381e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         11.6019      0.116     99.666      0.000        11.374    11.830
PATIENCE_MONTH    -0.2071      0.020    -10.442      0.000        -0.246    -0.168
==============================================================================
Omnibus:                     3996.083   Durbin-Watson:                   1.863
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            55366.547
Skew:                          -3.594   Prob(JB):                         0.00
Kurtosis:                      16.993   Cond. No.                         23.2
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 INCOME   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                    0.9274
Date:                Fri, 15 Jul 2016   Prob (F-statistic):              0.336
Time:                        13:45:22   Log-Likelihood:                -11948.
No. Observations:                5370   AIC:                         2.390e+04
Df Residuals:                    5368   BIC:                         2.391e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     10.3639      0.073    141.071      0.000        10.220    10.508
RISK_1K        0.0001      0.000      0.963      0.336        -0.000     0.000
==============================================================================
Omnibus:                     3993.249   Durbin-Watson:                   1.834
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            54968.841
Skew:                          -3.593   Prob(JB):                         0.00
Kurtosis:                      16.929   Cond. No.                     1.61e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.61e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
8.67817950834e-09
0.0
In [185]:
label = 'DOC_VISIT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                            OLS Regression Results                            
==============================================================================
Dep. Variable:              DOC_VISIT   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     5.552
Date:                Fri, 15 Jul 2016   Prob (F-statistic):            0.00390
Time:                        13:45:35   Log-Likelihood:                -9328.2
No. Observations:                5630   AIC:                         1.866e+04
Df Residuals:                    5627   BIC:                         1.868e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          1.9724      0.071     27.689      0.000         1.833     2.112
PATIENCE_MONTH    -0.0328      0.011     -2.923      0.003        -0.055    -0.011
RISK_1K        -7.249e-05   6.12e-05     -1.184      0.236        -0.000  4.75e-05
==============================================================================
Omnibus:                     1524.833   Durbin-Watson:                   1.912
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             3121.536
Skew:                           1.657   Prob(JB):                         0.00
Kurtosis:                       4.523   Cond. No.                     2.85e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.85e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:              DOC_VISIT   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     9.701
Date:                Fri, 15 Jul 2016   Prob (F-statistic):            0.00185
Time:                        13:45:35   Log-Likelihood:                -9328.9
No. Observations:                5630   AIC:                         1.866e+04
Df Residuals:                    5628   BIC:                         1.868e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          1.9384      0.065     29.727      0.000         1.811     2.066
PATIENCE_MONTH    -0.0346      0.011     -3.115      0.002        -0.056    -0.013
==============================================================================
Omnibus:                     1525.530   Durbin-Watson:                   1.912
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             3124.036
Skew:                           1.658   Prob(JB):                         0.00
Kurtosis:                       4.524   Cond. No.                         23.3
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:              DOC_VISIT   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     2.555
Date:                Fri, 15 Jul 2016   Prob (F-statistic):              0.110
Time:                        13:45:35   Log-Likelihood:                -9332.5
No. Observations:                5630   AIC:                         1.867e+04
Df Residuals:                    5628   BIC:                         1.868e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      1.8015      0.041     44.220      0.000         1.722     1.881
RISK_1K      -9.7e-05   6.07e-05     -1.598      0.110        -0.000   2.2e-05
==============================================================================
Omnibus:                     1523.770   Durbin-Watson:                   1.908
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             3115.774
Skew:                           1.658   Prob(JB):                         0.00
Kurtosis:                       4.512   Cond. No.                     1.62e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.62e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
0.160645926257
0.0
In [186]:
label = 'BMI'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    BMI   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     20.60
Date:                Fri, 15 Jul 2016   Prob (F-statistic):           1.22e-09
Time:                        13:45:45   Log-Likelihood:                 1144.1
No. Observations:                5486   AIC:                            -2282.
Df Residuals:                    5483   BIC:                            -2262.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          3.2788      0.011    293.772      0.000         3.257     3.301
PATIENCE_MONTH     0.0112      0.002      6.388      0.000         0.008     0.015
RISK_1K        -1.448e-05   9.61e-06     -1.506      0.132     -3.33e-05  4.37e-06
==============================================================================
Omnibus:                      436.150   Durbin-Watson:                   1.855
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              786.590
Skew:                           0.569   Prob(JB):                    1.56e-171
Kurtosis:                       4.466   Cond. No.                     2.84e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.84e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    BMI   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     38.92
Date:                Fri, 15 Jul 2016   Prob (F-statistic):           4.74e-10
Time:                        13:45:45   Log-Likelihood:                 1143.0
No. Observations:                5486   AIC:                            -2282.
Df Residuals:                    5484   BIC:                            -2269.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept          3.2721      0.010    319.988      0.000         3.252     3.292
PATIENCE_MONTH     0.0109      0.002      6.239      0.000         0.007     0.014
==============================================================================
Omnibus:                      438.380   Durbin-Watson:                   1.856
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              793.512
Skew:                           0.570   Prob(JB):                    4.91e-173
Kurtosis:                       4.474   Cond. No.                         23.2
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    BMI   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                    0.3889
Date:                Fri, 15 Jul 2016   Prob (F-statistic):              0.533
Time:                        13:45:45   Log-Likelihood:                 1123.8
No. Observations:                5486   AIC:                            -2244.
Df Residuals:                    5484   BIC:                            -2230.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      3.3373      0.006    521.233      0.000         3.325     3.350
RISK_1K    -5.959e-06   9.56e-06     -0.624      0.533     -2.47e-05  1.28e-05
==============================================================================
Omnibus:                      430.275   Durbin-Watson:                   1.841
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              768.401
Skew:                           0.566   Prob(JB):                    1.39e-167
Kurtosis:                       4.443   Cond. No.                     1.61e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.61e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
0.0232891473895
0.0
In [188]:
label = 'AGE_START_SMOKE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                            OLS Regression Results                            
==============================================================================
Dep. Variable:        AGE_START_SMOKE   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     2.372
Date:                Fri, 15 Jul 2016   Prob (F-statistic):             0.0935
Time:                        13:46:04   Log-Likelihood:                -6476.6
No. Observations:                2349   AIC:                         1.296e+04
Df Residuals:                    2346   BIC:                         1.298e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         16.3578      0.340     48.076      0.000        15.691    17.025
PATIENCE_MONTH    -0.0312      0.054     -0.582      0.561        -0.136     0.074
RISK_1K            0.0006      0.000      2.161      0.031      5.62e-05     0.001
==============================================================================
Omnibus:                      499.205   Durbin-Watson:                   1.892
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1195.385
Skew:                           1.166   Prob(JB):                    2.66e-260
Kurtosis:                       5.603   Cond. No.                     2.97e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.97e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:        AGE_START_SMOKE   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                   0.07660
Date:                Fri, 15 Jul 2016   Prob (F-statistic):              0.782
Time:                        13:46:04   Log-Likelihood:                -6479.0
No. Observations:                2349   AIC:                         1.296e+04
Df Residuals:                    2347   BIC:                         1.297e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         16.6394      0.315     52.900      0.000        16.023    17.256
PATIENCE_MONTH    -0.0147      0.053     -0.277      0.782        -0.119     0.089
==============================================================================
Omnibus:                      510.030   Durbin-Watson:                   1.890
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1246.398
Skew:                           1.182   Prob(JB):                    2.23e-271
Kurtosis:                       5.673   Cond. No.                         24.3
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:        AGE_START_SMOKE   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     4.407
Date:                Fri, 15 Jul 2016   Prob (F-statistic):             0.0359
Time:                        13:46:04   Log-Likelihood:                -6476.8
No. Observations:                2349   AIC:                         1.296e+04
Df Residuals:                    2347   BIC:                         1.297e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     16.1933      0.189     85.470      0.000        15.822    16.565
RISK_1K        0.0006      0.000      2.099      0.036      3.85e-05     0.001
==============================================================================
Omnibus:                      498.471   Durbin-Watson:                   1.893
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1193.534
Skew:                           1.165   Prob(JB):                    6.72e-260
Kurtosis:                       5.602   Cond. No.                     1.64e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.64e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
3.02084954518e-06
0.734366952108
In [189]:
label = 'AGE_FIRST_SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])

mod1  = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2  = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3  = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
                            OLS Regression Results                            
==============================================================================
Dep. Variable:          AGE_FIRST_SEX   R-squared:                       0.034
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     40.67
Date:                Fri, 15 Jul 2016   Prob (F-statistic):           4.35e-18
Time:                        13:46:14   Log-Likelihood:                -5449.9
No. Observations:                2334   AIC:                         1.091e+04
Df Residuals:                    2331   BIC:                         1.092e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         17.6584      0.222     79.637      0.000        17.224    18.093
PATIENCE_MONTH    -0.2996      0.035     -8.538      0.000        -0.368    -0.231
RISK_1K           -0.0004      0.000     -1.902      0.057        -0.001  1.12e-05
==============================================================================
Omnibus:                      195.528   Durbin-Watson:                   1.699
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              388.842
Skew:                          -0.554   Prob(JB):                     3.66e-85
Kurtosis:                       4.664   Cond. No.                     2.88e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.88e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:          AGE_FIRST_SEX   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.032
Method:                 Least Squares   F-statistic:                     77.64
Date:                Fri, 15 Jul 2016   Prob (F-statistic):           2.37e-18
Time:                        13:46:14   Log-Likelihood:                -5451.7
No. Observations:                2334   AIC:                         1.091e+04
Df Residuals:                    2332   BIC:                         1.092e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------
Intercept         17.4842      0.202     86.539      0.000        17.088    17.880
PATIENCE_MONTH    -0.3073      0.035     -8.811      0.000        -0.376    -0.239
==============================================================================
Omnibus:                      192.950   Durbin-Watson:                   1.696
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              379.548
Skew:                          -0.551   Prob(JB):                     3.82e-83
Kurtosis:                       4.639   Cond. No.                         23.2
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:          AGE_FIRST_SEX   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     8.195
Date:                Fri, 15 Jul 2016   Prob (F-statistic):            0.00424
Time:                        13:46:14   Log-Likelihood:                -5485.8
No. Observations:                2334   AIC:                         1.098e+04
Df Residuals:                    2332   BIC:                         1.099e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     16.0935      0.127    127.037      0.000        15.845    16.342
RISK_1K       -0.0005      0.000     -2.863      0.004        -0.001    -0.000
==============================================================================
Omnibus:                      201.602   Durbin-Watson:                   1.652
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              396.856
Skew:                          -0.572   Prob(JB):                     6.66e-87
Kurtosis:                       4.665   Cond. No.                     1.61e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.61e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
0.000297393204427
0.0