import pandas as pd
import numpy as np
import sys
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn import preprocessing
import scipy.stats as stats
import corrstats # allows for comparison of 2 correlation coefficients
import a12 # provides Vargha and Delaney's A (non-parametric common language effect size)
import geometric
import seaborn as sns
sns.set_style("whitegrid")
def cohen_d(x,y):
return (np.mean(x) - np.mean(y)) / math.sqrt((np.std(x, ddof=1) ** 2. + np.std(y, ddof=1) ** 2.) / 2.)
# Load data, excluding "non-response" data
path = 'C:\\Users\\cadlab\\Desktop\\xian\\research\\projects\\risk-delay\\'
df = pd.read_csv(path+'6-17-15.csv', na_values=[-5,-4,-3,-2,-1])
# Rename (some) columns to something sensible
df.rename(columns={'T0961700' : 'PATIENCE_MONTH',\
'T0962000' : 'PATIENCE_YEAR',\
'T0960500' : 'RISK_10K',\
'T0961100' : 'RISK_1K',\
'R4395800' : 'RISK_INCOME_MID_RISK',\
'R4395900' : 'RISK_INCOME_MORE_RISK',\
'R4396000' : 'RISK_INCOME_LESS_RISK',\
'T3094800' : 'RISK',\
'T3094900' : 'DRIVING',\
'T3094901' : 'FINANCIAL',\
'T3094902' : 'OCCUPATION',\
'T3094903' : 'HEALTH',\
'T3094904' : 'FAITH',\
'T3094905' : 'ROMANCE',\
'T3094906' : 'MAJOR_LIFE',\
'T3095000' : 'BETS',\
'T0911100' : 'ALCOHOL',\
'T0911400' : 'BINGE',\
'T2074900' : 'CIGARETTE',\
'R3914300' : 'AGE_START_SMOKE',\
'R3914200' : 'SMOKE_100_CIGS',\
'H0020800' : 'EVER_DEPRESSED',\
'H0021000' : 'DEPRESSED_IN_LAST_YEAR',\
'H0008600' : 'WORRY_ANX_DEPRESS',\
'R6430800' : 'COCAINE',\
'R6431200' : 'CRACK',\
'R6432000' : 'SEDATIVES',\
'R6432100' : 'TRANQUILIZERS',\
'R6432200' : 'STIMULANTS',\
'R6432300' : 'PAINKILLERS',\
'R6432400' : 'INHALANTS',\
'R6432500' : 'HALLUCINOGENS',\
'R6432600' : 'HEROIN',\
'R6432700' : 'STEROIDS',\
'R6432800' : 'INJECTIONS',\
'R6432900' : 'MDMA',\
'R6433000' : 'METH',\
'R0006500' : 'SES',\
'T0988600' : 'EDU',\
'T0989000' : 'AGE',\
'T0990700' : 'NJOBS',\
'T0988200' : 'EMPLOYED',\
'T0910800' : 'MEMORY',\
'T0899200' : 'TRYING_LOSE_WEIGHT',\
'T0897300' : 'WEIGHT',\
'T0897400' : 'HEIGHT_FT',\
'T0897500' : 'HEIGHT_IN',\
'T0897600' : 'EXERCISE_VIG_FREQ',\
'T0897700' : 'EXERCISE_VIG_UNIT',\
'T0898100' : 'EXERCISE_MOD_FREQ',\
'T0898200' : 'EXERCISE_MOD_UNIT',\
'T0898800' : 'DOC_VISIT',\
'R0988000' : 'AGE_FIRST_SEX',\
'T0899900' : 'HEALTH_INSUR',\
'T2176700' : 'CCDEBT',\
'T2181800' : 'CCMISSPAY',\
'R8417600' : 'CCMAX04',\
'T2181900' : 'CCMAX08',\
'R6945200' : 'RETIREMENT',\
'R8046500' : 'INVEST_STOCK',\
'R8046501' : 'INVEST_PRIV_BOND',\
'R8046502' : 'INVEST_GOV_BOND',\
'T4100000' : 'EMERGENCY_FUND',\
'T4100300' : 'FIT_LIT_4',\
'T4100400' : 'FIT_LIT_5',\
'T4100500' : 'FIT_LIT_6',\
'T4100600' : 'FIT_LIT_7',\
'T4100700' : 'FIT_LIT_8',\
'T2463100' : 'ENTREPRENEUR',\
'R0214800' : 'SEX',\
'R0214700' : 'RACE',\
'T0987900' : 'POVERTY',\
'T0987800' : 'INCOME',\
'R0618301' : 'IQ',\
'R0305100' : 'SHOPLIFT',\
'R0305200' : 'STEAL_SMALL',\
'R0305300' : 'STEAL_LARGE',\
'R0307800' : 'CONVICT',\
'R0305000' : 'FIGHT'\
}, inplace=True)
# Frequeny of 0 responses to various combinations of the risk items
print(len(df.query('(RISK_10K > 0) & (RISK_1K > 0)')))
print(len(df.query('(RISK_10K == 0) | (RISK_1K == 0)')))
print(len(df.query('(RISK_10K == 0) & (RISK_1K == 0)')))
print(len(df.query('(RISK_10K == 0) & (RISK_1K == 0) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_10K == 0) & (RISK_1K == 0) & (PATIENCE_YEAR == 0)')))
print(len(df.query('(RISK_1K >= 0) & (PATIENCE_MONTH >= 0)')))
print(len(df.query('(RISK_1K == 0) | (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_1K == 0) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_1K > 0) & (PATIENCE_MONTH > 0)')))
print(len(df.query('(RISK_10K == 0) | (PATIENCE_YEAR == 0)')))
print(len(df.query('(RISK_10K > 0) & (PATIENCE_YEAR > 0)')))
# Frequeny of 0 responses to various combinations of the delay items
print(len(df.query('(PATIENCE_YEAR > 0) & (PATIENCE_MONTH > 0)')))
print(len(df.query('(PATIENCE_MONTH == 0)')))
print(len(df.query('(PATIENCE_YEAR == 0)')))
print(len(df.query('(PATIENCE_YEAR == 0) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_10K == 0) & (RISK_1K == 0) & (PATIENCE_YEAR == 0) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(PATIENCE_YEAR > 0) | (PATIENCE_MONTH > 0)')))
print(len(df.query('(RISK_10K > 0) | (RISK_1K > 0)')))
# Frequeny of maximal responses to various combinations of the beh. econ. items
print(len(df.query('(RISK_10K == 10000) & (RISK_1K == 1000)')))
print(len(df.query('(RISK_10K == 10000) | (RISK_1K == 1000)')))
print(len(df.query('(RISK_10K == 10000) & (RISK_1K == 1000) & (PATIENCE_MONTH == 0)')))
print(len(df.query('(RISK_10K == 10000) & (RISK_1K == 1000) & (PATIENCE_YEAR == 0)')))
print(len(df.query('(RISK_10K == 10000) & (RISK_1K == 1000) & (PATIENCE_MONTH == 0) & (PATIENCE_YEAR == 0)')))
# Correlation of raw responses
print(df['PATIENCE_MONTH'].corr(df['PATIENCE_YEAR']))
print(df['RISK_1K'].corr(df['RISK_10K']))
data = df.copy()
# Convert zero responses on beh. econ. items to NaN
data.replace({'RISK_1K': {0: np.nan}}, inplace=True)
data.replace({'RISK_10K': {0: np.nan}}, inplace=True)
data.replace({'PATIENCE_MONTH': {0: np.nan}}, inplace=True)
data.replace({'PATIENCE_YEAR': {0: np.nan}}, inplace=True)
# Modify DOC_VISIT so that it's monotonic with recency of doctor visit
data.replace({'DOC_VISIT': {0: 6}}, inplace=True)
# Modify EMPLOYED so that it's only employed/not
data.replace({'EMPLOYED': {2: 0}}, inplace=True)
data.replace({'EMPLOYED': {3: np.nan}}, inplace=True)
data.replace({'EMPLOYED': {4: np.nan}}, inplace=True)
# Log transform the patience items
data['PATIENCE_MONTH'] = np.log(data['PATIENCE_MONTH'].values)
data['PATIENCE_YEAR'] = np.log(data['PATIENCE_YEAR'].values)
# Log transform income
# some response income as $0, so add $1 before transforming
print(len(data.query('(INCOME == 0)')))
data['INCOME'] = np.log(1+data['INCOME'])
# Construct a substance use variable from idividual items
data['DRUG'] = (
(data['COCAINE'] > 0) |
(data['CRACK'] > 0) |
(data['STIMULANTS'] > 0) |
(data['PAINKILLERS'] > 0) |
(data['INHALANTS'] > 0) |
(data['HALLUCINOGENS'] > 0) |
(data['HEROIN'] > 0) |
(data['MDMA'] > 0) |
(data['METH'] > 0)
)
data['DRUG'] = data['DRUG'].astype(float)
# Construct a maxed-out credit card variable
data['CCMAX'] = (data['CCMAX04'] > 0)
data['CCMAX'] = data['CCMAX'].astype(float)
# compute BMI
data['BMI'] = 703.06958 * data['WEIGHT']/((data['HEIGHT_FT'] * 12) + data['HEIGHT_IN'])**2
data['BMI'] = np.log(data['BMI'])
p = plt.hist(data.dropna(subset = ['BMI'])['BMI'].values, 100)
exerData = data.copy()
exerData['EXERCISE'] = exerData['EXERCISE_VIG_FREQ']
exerData[exerData['EXERCISE_VIG_FREQ'] == 5] = 0.
exerData[exerData['EXERCISE_VIG_FREQ'] == 2] = exerData[exerData['EXERCISE_VIG_UNIT'] == 2] / 7.
exerData[exerData['EXERCISE_VIG_FREQ'] == 3] = exerData[exerData['EXERCISE_VIG_UNIT'] == 3] / 30.
exerData[exerData['EXERCISE_VIG_FREQ'] == 4] = exerData[exerData['EXERCISE_VIG_UNIT'] == 4] / 365.
exerData['EXERCISE'] = exerData['EXERCISE'].astype(float)
labels = ['EXERCISE']
p = plt.hist(exerData.dropna(subset = [labels])[labels].values, 10, range=[0,10])
print(len(exerData[exerData['EXERCISE'] == 0]))
print(len(exerData[exerData['EXERCISE'] > 0]))
# convert exercise to a binary variable
data['EXERCISE'] = (exerData['EXERCISE']> 0)
data['EXERCISE'] = data['EXERCISE'].astype(float)
labels = ['EXERCISE']
p = plt.hist(data.dropna(subset = [labels])[labels].values, 2, range=[0,1])
# how many would accept/decline the intial job offer
print(len(data[data['RISK_INCOME_MID_RISK'] == 0]))
print(len(data[data['RISK_INCOME_MID_RISK'] == 1]))
# how many would accept/decline the worse job offer
print(len(data[data['RISK_INCOME_MORE_RISK'] == 0]))
print(len(data[data['RISK_INCOME_MORE_RISK'] == 1]))
# how many would accept/decline the better job offer
print(len(data[data['RISK_INCOME_LESS_RISK'] == 0]))
print(len(data[data['RISK_INCOME_LESS_RISK'] == 1]))
# Bin Rs by how they responded to the income risk items (branching logic)
data.loc[data.loc[:, 'RISK_INCOME_LESS_RISK']==0, 'RISK_INCOME'] = -1.5
data.loc[data.loc[:, 'RISK_INCOME_LESS_RISK']==1, 'RISK_INCOME'] = -.5
data.loc[data.loc[:, 'RISK_INCOME_MORE_RISK']==0, 'RISK_INCOME'] = .5
data.loc[data.loc[:, 'RISK_INCOME_MORE_RISK']==1, 'RISK_INCOME'] = 1.5
labels = ['RISK_INCOME']
p = plt.hist(data.dropna(subset = [labels])[labels].values, 4, range=[-2,2])
print(data['RISK_INCOME'].corr(data['RISK_1K']))
print(data['RISK_INCOME'].corr(data['RISK_10K']))
print(data['RISK_INCOME'].corr(data['PATIENCE_MONTH']))
print(data['RISK_INCOME'].corr(data['PATIENCE_YEAR']))
g = sns.PairGrid(data, y_vars=['RISK_1K', 'RISK_10K', 'PATIENCE_MONTH', 'PATIENCE_YEAR'], x_vars='RISK_INCOME')
g.map(sns.pointplot)
labels = ['BINGE']
p = plt.hist(data.dropna(subset = [labels])[labels].values, 10, range=[0,10])
p = plt.hist(data.dropna(subset = ['AGE_FIRST_SEX'])['AGE_FIRST_SEX'].values, 25)
p = plt.hist(data.dropna(subset = ['AGE_START_SMOKE'])['AGE_START_SMOKE'].values, 35)
data.replace({'AGE_START_SMOKE': {0: np.nan}}, inplace=True)
fig, axes = plt.subplots(nrows=1, ncols=2)
p = sns.violinplot(data.dropna(subset = [['RISK_1K']])[['RISK_1K']], ax=axes[0])
p.axes.set_ylim(0,1000)
p = sns.violinplot(data.dropna(subset = [['RISK_10K']])[['RISK_10K']], ax=axes[1])
p.axes.set_ylim(0,10000)
labels = ['PATIENCE_MONTH', 'PATIENCE_YEAR']
plt.figure()
for i in range(len(labels)):
p = sns.distplot(data.dropna(subset = [[labels[i]]])[[labels[i]]], hist=False, label=labels[i])
plt.xlim(0, 16)
plt.legend()
# RISK_1K as related to income risk
fig, axes = plt.subplots(nrows=1, ncols=2)
takers = data[data['RISK_INCOME'] > 0]
nontakers = data[data['RISK_INCOME'] < 0]
print(np.mean(takers['RISK_1K']))
p = sns.violinplot(takers.dropna(subset=[['RISK_1K']])[['RISK_1K']], ax=axes[0])
p.axes.set_ylim(0,1000)
print(np.mean(nontakers['RISK_1K']))
p = sns.violinplot(nontakers.dropna(subset=[['RISK_1K']])[['RISK_1K']], ax=axes[1])
p.axes.set_ylim(0,1000)
print(stats.ttest_ind(takers.dropna(subset=[['RISK_1K']])['RISK_1K'], \
nontakers.dropna(subset=[['RISK_1K']])['RISK_1K'], equal_var=False))
# RISK_10K as related to income risk
fig, axes = plt.subplots(nrows=1, ncols=2)
print(np.mean(takers['RISK_10K']))
p = sns.violinplot(takers.dropna(subset=[['RISK_10K']])[['RISK_10K']], ax=axes[0])
p.axes.set_ylim(0,10000)
print(np.mean(nontakers['RISK_10K']))
p = sns.violinplot(nontakers.dropna(subset=[['RISK_10K']])[['RISK_10K']], ax=axes[1])
p.axes.set_ylim(0,10000)
print(stats.ttest_ind(takers.dropna(subset=[['RISK_10K']])['RISK_10K'], \
nontakers.dropna(subset=[['RISK_10K']])['RISK_10K'], equal_var=False))
# PATIENCE_MONTH as related to income risk
fig, axes = plt.subplots(nrows=1, ncols=2)
print(np.mean(takers['PATIENCE_MONTH']))
p = sns.violinplot(takers.dropna(subset=[['PATIENCE_MONTH']])[['PATIENCE_MONTH']], ax=axes[0])
p.axes.set_ylim(0,14)
print(np.mean(nontakers['PATIENCE_MONTH']))
p = sns.violinplot(nontakers.dropna(subset=[['PATIENCE_MONTH']])[['PATIENCE_MONTH']], ax=axes[1])
p.axes.set_ylim(0,14)
print(stats.ttest_ind(takers.dropna(subset=[['PATIENCE_MONTH']])['PATIENCE_MONTH'], \
nontakers.dropna(subset=[['PATIENCE_MONTH']])['PATIENCE_MONTH'], equal_var=False))
# PATIENCE_YEAR as related to income risk
fig, axes = plt.subplots(nrows=1, ncols=2)
print(np.mean(takers['PATIENCE_YEAR']))
p = sns.violinplot(takers.dropna(subset=[['PATIENCE_YEAR']])[['PATIENCE_YEAR']], ax=axes[0])
p.axes.set_ylim(0,14)
print(np.mean(nontakers['PATIENCE_YEAR']))
p = sns.violinplot(nontakers.dropna(subset=[['PATIENCE_YEAR']])[['PATIENCE_YEAR']], ax=axes[1])
p.axes.set_ylim(0,14)
print(stats.ttest_ind(takers.dropna(subset=[['PATIENCE_YEAR']])[['PATIENCE_YEAR']]['PATIENCE_YEAR'], \
nontakers.dropna(subset=[['PATIENCE_YEAR']])[['PATIENCE_YEAR']]['PATIENCE_YEAR'], equal_var=False))
labels = ['IQ']
p = sns.violinplot(data.dropna(subset = [labels])[labels])
p.axes.set_ylim(0,120000)
labels = ['CCMAX04']
p = plt.hist(data.dropna(subset = [labels])[labels].values, 10, range=[0,10])
labels = ['INCOME']
p = sns.violinplot(data.dropna(subset = [labels])[labels])
p.axes.set_ylim(0,15)
labels = ['RISK', 'DRIVING', 'FINANCIAL', 'OCCUPATION', 'HEALTH', 'FAITH', 'ROMANCE', 'MAJOR_LIFE', 'BETS']
plt.figure(figsize=(12, 5))
p = sns.violinplot(data.dropna(subset = [labels])[labels])
p.axes.set_ylim(0,10)
# correlation matrix
# some of the are categorical, beware of interpretation in those cases
labels = ['RISK_1K', 'RISK_10K', 'RISK_INCOME', 'PATIENCE_MONTH', 'PATIENCE_YEAR', 'AGE', 'SEX', 'IQ',\
'INCOME', 'EXERCISE', 'DOC_VISIT', 'HEALTH_INSUR', 'INVEST_STOCK', 'INVEST_PRIV_BOND', 'INVEST_GOV_BOND',\
'CCMAX', 'DRUG', 'ALCOHOL', 'BINGE', 'CONVICT', 'CIGARETTE',\
'RISK', 'DRIVING', 'FINANCIAL', 'OCCUPATION', 'HEALTH', 'FAITH', 'ROMANCE', 'MAJOR_LIFE', 'BETS']
justcorrs = data[labels]
justcorrs = justcorrs.corr()
fig, ax = plt.subplots()
fig.set_size_inches(12,8)
ax.pcolor(justcorrs, cmap='RdBu', edgecolors='k', vmin=-1., vmax=1.)
ax.invert_yaxis()
plt.tick_params(
axis='x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off',
labeltop='on') # labels along the bottom edge are off
plt.tick_params(
axis='y', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
left='off', # ticks along the bottom edge are off
right='off', # ticks along the top edge are off
labelright='off',
labelleft='on') # labels along the bottom edge are off
ytick = plt.yticks(np.arange(0.5, len(labels), 1), labels)
xtick = plt.xticks(np.arange(0.5, len(labels), 1), labels, rotation=90)
# where are the large correlations?
fig, ax = plt.subplots()
fig.set_size_inches(12,8)
threshold = .1
slammedcorrs = justcorrs.copy()
slammedcorrs[slammedcorrs[:] < -threshold] = -1
slammedcorrs[slammedcorrs[:] > threshold] = 1
slammedcorrs[(slammedcorrs[:] < threshold) & (slammedcorrs[:] > -threshold)] = 0
ax.pcolor(slammedcorrs, cmap='RdBu', edgecolors='k', vmin=-1., vmax=1.)
ax.invert_yaxis()
plt.tick_params(
axis='x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off',
labeltop='on') # labels along the bottom edge are off
plt.tick_params(
axis='y', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
left='off', # ticks along the bottom edge are off
right='off', # ticks along the top edge are off
labelright='off',
labelleft='on') # labels along the bottom edge are off
ytick = plt.yticks(np.arange(0.5, len(labels), 1), labels)
xtick = plt.xticks(np.arange(0.5, len(labels), 1), labels, rotation=90)
print(data[['PATIENCE_MONTH', 'RISK_1K']].corr())
p = sns.lmplot('RISK_1K', 'PATIENCE_MONTH', data)
p.axes[0,0].set_xlim(0,1000)
p.axes[0,0].set_ylim(0,3)
# Waiting a year
print(data[['PATIENCE_YEAR', 'RISK_1K']].corr())
p = sns.lmplot('RISK_1K', 'PATIENCE_YEAR', data)
p.axes[0,0].set_xlim(0,1000)
p.axes[0,0].set_ylim(0,)
# $10,000 lottery ticket
print(data[['PATIENCE_MONTH', 'RISK_10K']].corr())
p = sns.lmplot('RISK_10K', 'PATIENCE_MONTH', data)
p.axes[0,0].set_xlim(0,10000)
# Waiting a year and $10,000 lottery ticket
print(data[['PATIENCE_YEAR', 'RISK_10K']].corr())
p = sns.lmplot('RISK_10K', 'PATIENCE_YEAR', data)
p.axes[0,0].set_xlim(0,10000)
p.axes[0,0].set_ylim(0,)
#RISK
#DRIVING
#FINANCIAL
#OCCUPATION
#HEALTH
#FAITH
#ROMANCE
#MAJOR_LIFE
#BETS
# Eliminate subjects not reporting risk
r = 'RISK'
# beh econ risk vs. self-reported risk
print(data[r].corr(data['RISK_1K']))
p = sns.lmplot(r, 'RISK_1K', data, x_jitter=.3, y_jitter=50)
p.set(ylim=(0, 1000), xlim=(0, 10))
# beh econ patience vs. self-reported risk
print(data[r].corr(data['PATIENCE_MONTH']))
p = sns.lmplot(r, 'PATIENCE_MONTH', data, x_jitter=.3)
p.set(ylim=(0, 10), xlim=(0, 10))
finlit4Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_4'])
highGroup = finlit4Data[finlit4Data['FIT_LIT_4'] == 1]
lowGroup = finlit4Data[finlit4Data['FIT_LIT_4'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', finlit4Data, hue='FIT_LIT_4')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
finlit4Data['RISK_1K'] = preprocessing.scale(finlit4Data['RISK_1K'].values)
finlit4Data['PATIENCE_MONTH'] = preprocessing.scale(finlit4Data['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_4', data=finlit4Data).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_4', data=finlit4Data).fit(disp=False)
print mod.summary()
finlit5Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_5'])
highGroup = finlit5Data[finlit5Data['FIT_LIT_5'] > 1]
lowGroup = finlit5Data[finlit5Data['FIT_LIT_5'] == 1]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
finlit5Data.replace({'FIT_LIT_5': {2:0, 3:0}}, inplace=True)
# traditional moderation analyses
finlit5Data['RISK_1K'] = preprocessing.scale(finlit5Data['RISK_1K'].values)
finlit5Data['PATIENCE_MONTH'] = preprocessing.scale(finlit5Data['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_5', data=finlit5Data).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_5', data=finlit5Data).fit(disp=False)
print mod.summary()
finlit6Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_6'])
highGroup = finlit6Data[finlit6Data['FIT_LIT_6'] < 3]
lowGroup = finlit6Data[finlit6Data['FIT_LIT_6'] == 3]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', finlit6Data, hue='FIT_LIT_6')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# Make responses binary (correct vs. incorrect)
finlit6Data.replace({'FIT_LIT_6': {3:5, 1:6, 2:6}}, inplace=True)
finlit6Data.replace({'FIT_LIT_6': {5:1, 6:0}}, inplace=True)
# traditional moderation analyses
finlit6Data['RISK_1K'] = preprocessing.scale(finlit6Data['RISK_1K'].values)
finlit6Data['PATIENCE_MONTH'] = preprocessing.scale(finlit6Data['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_6', data=finlit6Data).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_6', data=finlit6Data).fit(disp=False)
print mod.summary()
finlit7Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_7'])
highGroup = finlit7Data[finlit7Data['FIT_LIT_7'] == 2]
lowGroup = finlit7Data[(finlit7Data['FIT_LIT_7'] < 2) | (finlit7Data['FIT_LIT_7'] > 2)]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', finlit7Data, hue='FIT_LIT_7')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# Make responses binary (correct vs. incorrect)
finlit7Data.replace({'FIT_LIT_7': {2:5, 1:6, 3:6, 4:6}}, inplace=True)
finlit7Data.replace({'FIT_LIT_7': {5:1, 6:0}}, inplace=True)
# traditional moderation analyses
finlit7Data['RISK_1K'] = preprocessing.scale(finlit7Data['RISK_1K'].values)
finlit7Data['PATIENCE_MONTH'] = preprocessing.scale(finlit7Data['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_7', data=finlit7Data).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_7', data=finlit7Data).fit(disp=False)
print mod.summary()
finlit8Data = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIT_LIT_8'])
highGroup = finlit8Data[finlit8Data['FIT_LIT_8'] == 1]
lowGroup = finlit8Data[finlit8Data['FIT_LIT_8'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', finlit8Data, hue='FIT_LIT_8')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
finlit8Data['RISK_1K'] = preprocessing.scale(finlit8Data['RISK_1K'].values)
finlit8Data['PATIENCE_MONTH'] = preprocessing.scale(finlit8Data['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIT_LIT_8', data=finlit8Data).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIT_LIT_8', data=finlit8Data).fit(disp=False)
print mod.summary()
emergData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'EMERGENCY_FUND'])
highGroup = emergData[emergData['EMERGENCY_FUND'] == 1]
lowGroup = emergData[emergData['EMERGENCY_FUND'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', emergData, hue='EMERGENCY_FUND')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
emergData['RISK_1K'] = preprocessing.scale(emergData['RISK_1K'].values)
emergData['PATIENCE_MONTH'] = preprocessing.scale(emergData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*EMERGENCY_FUND', data=emergData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*EMERGENCY_FUND', data=emergData).fit(disp=False)
print mod.summary()
firstsexData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'AGE_FIRST_SEX'])
len(firstsexData)
sexdelayCorr = firstsexData['PATIENCE_MONTH'].corr(firstsexData['AGE_FIRST_SEX'])
sexriskCorr = firstsexData['RISK_1K'].corr(firstsexData['AGE_FIRST_SEX'])
riskdelayCorr = firstsexData['PATIENCE_MONTH'].corr(firstsexData['RISK_1K'])
z,p = corrstats.dependent_corr(sexdelayCorr, sexriskCorr, riskdelayCorr, len(firstsexData))
print([sexdelayCorr, sexriskCorr])
print([z, p])
nCat = 5
bins = []
for i in range(nCat):
i = float(i)
bins.append(firstsexData['AGE_FIRST_SEX'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)
binned = np.digitize(firstsexData['AGE_FIRST_SEX'], bins)
firstsexData['bin'] = binned
for i in range(nCat):
print(firstsexData[firstsexData['bin'] == i]['PATIENCE_MONTH'].corr(firstsexData[firstsexData['bin'] == i]['RISK_1K']))
g = sns.PairGrid(firstsexData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
sns.lmplot('RISK_1K', 'PATIENCE_MONTH', firstsexData, col='bin', palette="PuBuGn_d", size=3, x_jitter=50)
# traditional moderation analyses
firstsexData['RISK_1K'] = preprocessing.scale(firstsexData['RISK_1K'].values)
firstsexData['PATIENCE_MONTH'] = preprocessing.scale(firstsexData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*AGE_FIRST_SEX', data=firstsexData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*AGE_FIRST_SEX', data=firstsexData).fit(disp=False)
print mod.summary()
retireData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'RETIREMENT'])
highGroup = retireData[retireData['RETIREMENT'] == 1]
lowGroup = retireData[retireData['RETIREMENT'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', retireData, hue='RETIREMENT')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
retireData['RISK_1K'] = preprocessing.scale(retireData['RISK_1K'].values)
retireData['PATIENCE_MONTH'] = preprocessing.scale(retireData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*RETIREMENT', data=retireData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*RETIREMENT', data=retireData).fit(disp=False)
print mod.summary()
lgTheftData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'STEAL_LARGE'])
# Make theft a binary variable
lgTheftData.replace({'STEAL_LARGE': {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
highGroup = lgTheftData[lgTheftData['STEAL_LARGE'] == 1]
lowGroup = lgTheftData[lgTheftData['STEAL_LARGE'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
4, range=[-2,2], normed=True, alpha=.5, label='Thieves')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', lgTheftData, hue='STEAL_LARGE')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
lgTheftData['RISK_1K'] = preprocessing.scale(lgTheftData['RISK_1K'].values)
lgTheftData['PATIENCE_MONTH'] = preprocessing.scale(lgTheftData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*STEAL_LARGE', data=lgTheftData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*STEAL_LARGE', data=lgTheftData).fit(disp=False)
print mod.summary()
theftData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'STEAL_SMALL'])
# Make theft a binary variable
theftData.replace({'STEAL_SMALL': {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
highGroup = theftData[theftData['STEAL_SMALL'] == 1]
lowGroup = theftData[theftData['STEAL_SMALL'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
4, range=[-2,2], normed=True, alpha=.5, label='Thieves')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', theftBinData, hue='STEAL_SMALL')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
theftData['RISK_1K'] = preprocessing.scale(theftData['RISK_1K'].values)
theftData['PATIENCE_MONTH'] = preprocessing.scale(theftData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*STEAL_SMALL', data=theftData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*STEAL_SMALL', data=theftData).fit(disp=False)
print mod.summary()
anxData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'WORRY_ANX_DEPRESS'])
highGroup = anxData[anxData['WORRY_ANX_DEPRESS'] == 1]
lowGroup = anxData[anxData['WORRY_ANX_DEPRESS'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
4, range=[-2,2], normed=True, alpha=.5, label='Anxious')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
# traditional moderation analyses
anxData['RISK_1K'] = preprocessing.scale(anxData['RISK_1K'].values)
anxData['PATIENCE_MONTH'] = preprocessing.scale(anxData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*WORRY_ANX_DEPRESS', data=anxData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*WORRY_ANX_DEPRESS', data=anxData).fit(disp=False)
print mod.summary()
depressData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'EVER_DEPRESSED'])
highGroup = depressData[depressData['EVER_DEPRESSED'] == 1]
lowGroup = depressData[depressData['EVER_DEPRESSED'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
# traditional moderation analyses
depressData['RISK_1K'] = preprocessing.scale(depressData['RISK_1K'].values)
depressData['PATIENCE_MONTH'] = preprocessing.scale(depressData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*EVER_DEPRESSED', data=depressData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*EVER_DEPRESSED', data=depressData).fit(disp=False)
print mod.summary()
#1 Lose weight
#2 Gain weight
#3 Stay about the same
#4 Not trying to do anything
weightData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'TRYING_LOSE_WEIGHT'])
highGroup = weightData[weightData['TRYING_LOSE_WEIGHT'] > 1]
lowGroup = weightData[weightData['TRYING_LOSE_WEIGHT'] == 1]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
# traditional moderation analyses
weightData['RISK_1K'] = preprocessing.scale(weightData['RISK_1K'].values)
weightData['PATIENCE_MONTH'] = preprocessing.scale(weightData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*TRYING_LOSE_WEIGHT', data=weightData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*TRYING_LOSE_WEIGHT', data=weightData).fit(disp=False)
print mod.summary()
fightData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'FIGHT'])
# Make shoplifting a binary variable
fightData.replace({'FIGHT': {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
highGroup = fightData[fightData['FIGHT'] == 1]
lowGroup = fightData[fightData['FIGHT'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
4, range=[-2,2], normed=True, alpha=.5, label='Shoplifters')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values,\
4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', fightData, hue='FIGHT', ci=0, palette=sns.cubehelix_palette(7, rot=-.5))
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
fightData['RISK_1K'] = preprocessing.scale(fightData['RISK_1K'].values)
fightData['PATIENCE_MONTH'] = preprocessing.scale(fightData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*FIGHT', data=fightData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*FIGHT', data=fightData).fit(disp=False)
print mod.summary()
shopliftData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'SHOPLIFT'])
highGroup = shopliftData[shopliftData['SHOPLIFT'] > 0]
lowGroup = shopliftData[shopliftData['SHOPLIFT'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
p = plt.hist(highGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values, 4, range=[-2,2], normed=True, alpha=.5, label='Shoplifters')
p = plt.hist(lowGroup.dropna(subset = ['RISK_INCOME'])['RISK_INCOME'].values, 4, range=[-2,2], normed=True, alpha=.5, label='Not')
plt.legend()
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', shopliftData, hue='SHOPLIFT', ci=0, palette=sns.cubehelix_palette(7, rot=-.5))
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# Make shoplifting a binary variable
shopliftData.replace({'SHOPLIFT': {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
# traditional moderation analyses
shopliftData['RISK_1K'] = preprocessing.scale(shopliftData['RISK_1K'].values)
shopliftData['PATIENCE_MONTH'] = preprocessing.scale(shopliftData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*SHOPLIFT', data=shopliftData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*SHOPLIFT', data=shopliftData).fit(disp=False)
print mod.summary()
healthinsurData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'HEALTH_INSUR'])
highGroup = healthinsurData[healthinsurData['HEALTH_INSUR'] == 1]
lowGroup = healthinsurData[healthinsurData['HEALTH_INSUR'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
print(np.mean(highGroup['RISK_INCOME_MID_RISK']))
print(np.mean(lowGroup['RISK_INCOME_MID_RISK']))
print(stats.ranksums(highGroup['RISK_INCOME_MID_RISK'], lowGroup['RISK_INCOME_MID_RISK']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', data, hue='HEALTH_INSUR')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# tradtiional moderation analyses
healthinsurData['RISK_1K'] = preprocessing.scale(healthinsurData['RISK_1K'].values)
healthinsurData['PATIENCE_MONTH'] = preprocessing.scale(healthinsurData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*HEALTH_INSUR', data=healthinsurData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*HEALTH_INSUR', data=healthinsurData).fit(disp=False)
print mod.summary()
docData = data.dropna(subset=['RISK_1K', 'PATIENCE_MONTH', 'DOC_VISIT'])
len(docData)
#sns.factorplot('DOC_VISIT', 'RISK_INCOME', data=data.dropna(subset=['DOC_VISIT', 'RISK_INCOME']))
#sns.factorplot('DOC_VISIT', 'PATIENCE_MONTH', data=data.dropna(subset=['DOC_VISIT', 'PATIENCE_MONTH']))
g = sns.PairGrid(docData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='DOC_VISIT')
g.map(sns.pointplot)
delaydocCorr = docData['PATIENCE_MONTH'].corr(docData['DOC_VISIT'])
riskdocCorr = docData['RISK_1K'].corr(docData['DOC_VISIT'])
delayriskCorr = docData['PATIENCE_MONTH'].corr(docData['RISK_1K'])
z,p = corrstats.dependent_corr(delaydocCorr, riskdocCorr, delayriskCorr, len(docData))
print([delaydocCorr, riskdocCorr])
print([z, p])
# tradtiional moderation analyses
docData['RISK_1K'] = preprocessing.scale(docData['RISK_1K'].values)
docData['PATIENCE_MONTH'] = preprocessing.scale(docData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*DOC_VISIT', data=docData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*DOC_VISIT', data=docData).fit(disp=False)
print mod.summary()
print(np.mean(data['INVEST_STOCK']))
print(np.mean(data['INVEST_PRIV_BOND']))
print(np.mean(data['INVEST_GOV_BOND']))
investData = data.dropna(subset=['PATIENCE_MONTH', 'RISK_1K', 'INVEST_STOCK', 'INVEST_GOV_BOND'])
investData['bin'] = (investData['INVEST_STOCK'] == 1) & (investData['INVEST_GOV_BOND'] == 0)
highGroup = investData[investData['bin'] == 1]
lowGroup = investData[investData['bin'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr, p])
# traditional moderation analyses
investData['RISK_1K'] = preprocessing.scale(investData['RISK_1K'].values)
investData['PATIENCE_MONTH'] = preprocessing.scale(investData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*bin', data=investData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*bin', data=investData).fit(disp=False)
print mod.summary()
enetreData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'ENTREPRENEUR'])
highGroup = enetreData[enetreData['ENTREPRENEUR'] == 1]
lowGroup = enetreData[enetreData['ENTREPRENEUR'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
p = plt.hist(highGroup['RISK_1K'].values, normed=True, bins=10, alpha = .5, label=['Entrepreneurs'])
p = plt.hist(lowGroup['RISK_1K'].values, normed=True, bins=10, alpha = .5, label=['Not'])
p = plt.legend()
p = plt.hist(highGroup.dropna(subset = ['RISK_10K'])['RISK_10K'].values, normed=True, alpha = .5, label=['Entrepreneurs'])
p = plt.hist(lowGroup.dropna(subset = ['RISK_10K'])['RISK_10K'].values, normed=True, alpha = .5, label=['Not'])
p = plt.legend()
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
# traditional moderation analyses
enetreData['RISK_1K'] = preprocessing.scale(enetreData['RISK_1K'].values)
enetreData['PATIENCE_MONTH'] = preprocessing.scale(enetreData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*ENTREPRENEUR', data=enetreData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*ENTREPRENEUR', data=enetreData).fit(disp=False)
print mod.summary()
exerciseData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'EXERCISE'])
highGroup = exerciseData[exerciseData['EXERCISE'] > 0]
lowGroup = exerciseData[exerciseData['EXERCISE'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', data, hue='EXERCISE')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,16)
# traditional moderation analyses
exerciseData['RISK_1K'] = preprocessing.scale(exerciseData['RISK_1K'].values)
exerciseData['PATIENCE_MONTH'] = preprocessing.scale(exerciseData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*EXERCISE', data=exerciseData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*EXERCISE', data=exerciseData).fit(disp=False)
print mod.summary()
sexData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'SEX'])
maleData = sexData[sexData['SEX'] == 1]
femaleData = sexData[sexData['SEX'] == 2]
nMales = len(maleData)
nFemales = len(femaleData)
print([nMales, nFemales])
print(np.mean(maleData['PATIENCE_MONTH']))
print(np.mean(femaleData['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(maleData['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(femaleData['PATIENCE_MONTH'])))
print(stats.ttest_ind(maleData['PATIENCE_MONTH'], femaleData['PATIENCE_MONTH'], equal_var=False))
print(np.mean(maleData['RISK_1K']))
print(np.mean(femaleData['RISK_1K']))
print(np.std(maleData['RISK_1K']))
print(np.std(femaleData['RISK_1K']))
print(stats.ttest_ind(maleData['RISK_1K'], femaleData['RISK_1K'], equal_var=False))
print(np.mean(maleData['RISK_INCOME']))
print(np.mean(femaleData['RISK_INCOME']))
print(stats.ranksums(maleData['RISK_INCOME'], femaleData['RISK_INCOME']))
maleCorr = maleData['PATIENCE_MONTH'].corr(maleData['RISK_1K'])
femaleCorr = femaleData['PATIENCE_MONTH'].corr(femaleData['RISK_1K'])
print([maleCorr, femaleCorr])
z,p = corrstats.independent_corr(maleCorr, femaleCorr, nMales, nFemales)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', sexData, hue='SEX')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# tradtiional moderation analyses
sexData['RISK_1K'] = preprocessing.scale(sexData['RISK_1K'].values)
sexData['PATIENCE_MONTH'] = preprocessing.scale(sexData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*SEX', data=sexData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*SEX', data=sexData).fit(disp=False)
print mod.summary()
bmiData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'BMI'])
nBMI = len(bmiData)
pCorr = bmiData['PATIENCE_MONTH'].corr(bmiData['BMI'])
rCorr = bmiData['RISK_1K'].corr(bmiData['BMI'])
print(nBMI)
print(pCorr)
print(rCorr)
prCorr = bmiData['RISK_1K'].corr(bmiData['PATIENCE_MONTH'])
z,p = corrstats.dependent_corr(pCorr, rCorr, prCorr, nBMI)
print([z, p])
nCat = 5
bins = []
for i in range(nCat):
i = float(i)
bins.append(bmiData['BMI'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)
binned = np.digitize(bmiData['BMI'], bins)
bmiData['bin'] = binned
g = sns.PairGrid(bmiData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
sns.lmplot('RISK_1K', 'PATIENCE_MONTH', bmiData, col='bin', palette="PuBuGn_d", size=3, x_jitter=50)
corrs = []
for i in range(nCat):
corrs.append(bmiData[bmiData['bin'] == i]['PATIENCE_MONTH'].corr(bmiData[bmiData['bin'] == i]['RISK_1K']))
p = sns.pointplot('bin', 'correlation', data=pd.DataFrame({'bin' : range(len(corrs)), 'correlation' : corrs}))
# Traditional moderation analyses
bmiData['RISK_1K'] = preprocessing.scale(bmiData['RISK_1K'].values)
bmiData['PATIENCE_MONTH'] = preprocessing.scale(bmiData['PATIENCE_MONTH'].values)
bmiData['BMI'] = preprocessing.scale(bmiData['BMI'].values)
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*BMI', data=bmiData).fit()
print mod.summary()
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*BMI', data=bmiData).fit()
print mod.summary()
incomeData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'INCOME'])
print(len(incomeData))
delayincomeCorr = incomeData['PATIENCE_MONTH'].corr(incomeData['INCOME'])
riskincomeCorr = incomeData['RISK_1K'].corr(incomeData['INCOME'])
delayriskCorr = incomeData['PATIENCE_MONTH'].corr(incomeData['RISK_1K'])
z,p = corrstats.dependent_corr(delayincomeCorr, riskincomeCorr, delayriskCorr, len(incomeData))
print([delayincomeCorr, riskincomeCorr])
print([z, p])
nCat = 5
bins = []
for i in range(nCat):
i = float(i)
bins.append(incomeData['INCOME'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)
binned = np.digitize(incomeData['INCOME'], bins)
incomeData['bin'] = binned
g = sns.PairGrid(incomeData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
sns.lmplot('RISK_1K', 'PATIENCE_MONTH', incomeData, col='bin', palette="PuBuGn_d", size=3, x_jitter=50)
corrs = []
for i in range(nCat):
corrs.append(incomeData[incomeData['bin'] == i]['PATIENCE_MONTH'].corr(incomeData[incomeData['bin'] == i]['RISK_1K']))
p = sns.pointplot('bin', 'correlation', data=pd.DataFrame({'bin' : range(len(corrs)), 'correlation' : corrs}))
# Traditional moderation analyses
incomeData['RISK_1K'] = preprocessing.scale(incomeData['RISK_1K'].values)
incomeData['PATIENCE_MONTH'] = preprocessing.scale(incomeData['PATIENCE_MONTH'].values)
incomeData['INCOME'] = preprocessing.scale(incomeData['INCOME'].values)
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*INCOME', data=incomeData).fit()
print mod.summary()
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*INCOME', data=incomeData).fit()
print mod.summary()
iqData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'IQ'])
nIQ = len(iqData)
pCorr = iqData['PATIENCE_MONTH'].corr(iqData['IQ'])
rCorr = iqData['RISK_1K'].corr(iqData['IQ'])
print()
print(pCorr)
print(rCorr)
prCorr = iqData['RISK_1K'].corr(iqData['PATIENCE_MONTH'])
z,p = corrstats.dependent_corr(pCorr, rCorr, prCorr, nIQ)
print([z, p])
nCat = 5
bins = []
for i in range(nCat):
i = float(i)
bins.append(iqData['IQ'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)
binned = np.digitize(iqData['IQ'], bins)
iqData['bin'] = binned
for i in range(nCat):
print(iqData[iqData['bin'] == i]['PATIENCE_MONTH'].corr(iqData[iqData['bin'] == i]['RISK_1K']))
g = sns.PairGrid(iqData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
sns.lmplot('RISK_1K', 'PATIENCE_MONTH', iqData, col='bin', palette="PuBuGn_d", size=3, x_jitter=50)
corrs = []
for i in range(nCat):
corrs.append(iqData[iqData['bin'] == i]['PATIENCE_MONTH'].corr(iqData[iqData['bin'] == i]['RISK_1K']))
p = sns.pointplot('bin', 'correlation', data=pd.DataFrame({'bin' : range(len(corrs)), 'correlation' : corrs}))
# traditional moderation analyses
iqData['RISK_1K'] = preprocessing.scale(iqData['RISK_1K'].values)
iqData['PATIENCE_MONTH'] = preprocessing.scale(iqData['PATIENCE_MONTH'].values)
iqData['IQ'] = iqData['IQ'].astype(float)
iqData['IQ'] = preprocessing.scale(iqData['IQ'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*IQ', data=iqData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*IQ', data=iqData).fit(disp=False)
print mod.summary()
conData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CONVICT'])
highGroup = conData[conData['CONVICT'] == 1]
lowGroup = conData[conData['CONVICT'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', conData, hue='CONVICT')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
conData['RISK_1K'] = preprocessing.scale(conData['RISK_1K'].values)
conData['PATIENCE_MONTH'] = preprocessing.scale(conData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*CONVICT', data=conData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*CONVICT', data=conData).fit(disp=False)
print mod.summary()
misssedpayData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CCMISSPAY'])
highGroup = misssedpayData[misssedpayData['CCMISSPAY'] == 1]
lowGroup = misssedpayData[misssedpayData['CCMISSPAY'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', misssedpayData, hue='CCMISSPAY')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
misssedpayData['RISK_1K'] = preprocessing.scale(misssedpayData['RISK_1K'].values)
misssedpayData['PATIENCE_MONTH'] = preprocessing.scale(misssedpayData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*CCMISSPAY', data=misssedpayData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*CCMISSPAY', data=misssedpayData).fit(disp=False)
print mod.summary()
ccdebtData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CCDEBT'])
highGroup = ccdebtData[ccdebtData['CCDEBT'] == 1]
lowGroup = ccdebtData[ccdebtData['CCDEBT'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', ccdebtData, hue='CCDEBT')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
ccdebtData['RISK_1K'] = preprocessing.scale(ccdebtData['RISK_1K'].values)
ccdebtData['PATIENCE_MONTH'] = preprocessing.scale(ccdebtData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*CCDEBT', data=ccdebtData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*CCDEBT', data=ccdebtData).fit(disp=False)
print mod.summary()
ccData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CCMAX04'])
# Construct a binary maxed-out credit card variable
data['CCMAX'] = (data['CCMAX04'] > 0)
data['CCMAX'] = data['CCMAX'].astype(float)
highGroup = ccData[ccData['CCMAX'] == 1]
lowGroup = ccData[ccData['CCMAX'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', ccData, hue='CCMAX')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
ccData['RISK_1K'] = preprocessing.scale(ccData['RISK_1K'].values)
ccData['PATIENCE_MONTH'] = preprocessing.scale(ccData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*CCMAX', data=ccData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*CCMAX', data=ccData).fit(disp=False)
print mod.summary()
raceData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'RACE'])
hispanicGroup = raceData[raceData['RACE'] == 1]
blackGroup = raceData[raceData['RACE'] == 2]
whiteGroup = raceData[raceData['RACE'] == 3]
nhispanicGroup = len(hispanicGroup)
nblackhGroup = len(blackGroup)
nwhiteGroup = len(whiteGroup)
print([nhispanicGroup, nblackhGroup, nwhiteGroup])
print(np.mean(hispanicGroup['PATIENCE_MONTH']))
print(np.mean(blackGroup['PATIENCE_MONTH']))
print(np.mean(whiteGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(hispanicGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(blackGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(whiteGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(hispanicGroup['PATIENCE_MONTH'], blackGroup['PATIENCE_MONTH'], equal_var=False))
print(stats.ttest_ind(blackGroup['PATIENCE_MONTH'], whiteGroup['PATIENCE_MONTH'], equal_var=False))
print(stats.ttest_ind(hispanicGroup['PATIENCE_MONTH'], whiteGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(hispanicGroup['RISK_1K']))
print(np.mean(blackGroup['RISK_1K']))
print(np.mean(whiteGroup['RISK_1K']))
print(np.std(hispanicGroup['RISK_1K']))
print(np.std(blackGroup['RISK_1K']))
print(np.std(whiteGroup['RISK_1K']))
print(stats.ttest_ind(hispanicGroup['RISK_1K'], blackGroup['RISK_1K'], equal_var=False))
print(stats.ttest_ind(blackGroup['RISK_1K'], whiteGroup['RISK_1K'], equal_var=False))
print(stats.ttest_ind(hispanicGroup['RISK_1K'], whiteGroup['RISK_1K'], equal_var=False))
print(np.mean(hispanicGroup['RISK_INCOME']))
print(np.mean(blackGroup['RISK_INCOME']))
print(np.mean(whiteGroup['RISK_INCOME']))
print(stats.ranksums(hispanicGroup['RISK_INCOME'], blackGroup['RISK_INCOME']))
print(stats.ranksums(blackGroup['RISK_INCOME'], whiteGroup['RISK_INCOME']))
print(stats.ranksums(hispanicGroup['RISK_INCOME'], whiteGroup['RISK_INCOME']))
hispanicCorr = hispanicGroup['PATIENCE_MONTH'].corr(hispanicGroup['RISK_1K'])
blackCorr = blackGroup['PATIENCE_MONTH'].corr(blackGroup['RISK_1K'])
whiteCorr = whiteGroup['PATIENCE_MONTH'].corr(whiteGroup['RISK_1K'])
print([hispanicCorr, blackCorr, whiteCorr])
z,p = corrstats.independent_corr(hispanicCorr, blackCorr, nhispanicGroup, nblackhGroup)
print([z, p])
z,p = corrstats.independent_corr(blackCorr, whiteCorr, nblackhGroup, nwhiteGroup)
print([z, p])
z,p = corrstats.independent_corr(hispanicCorr, whiteCorr, nhispanicGroup, nwhiteGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', raceData, hue='RACE')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
raceData['RISK_1K'] = preprocessing.scale(raceData['RISK_1K'].values)
raceData['PATIENCE_MONTH'] = preprocessing.scale(raceData['PATIENCE_MONTH'].values)
# make non-black/non-hispanic category the pivot
raceData['RACE'] = raceData['RACE'] * -1
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*C(RACE)', data=raceData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*C(RACE)', data=raceData).fit(disp=False)
print mod.summary()
drugData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'DRUG'])
highGroup = drugData[drugData['DRUG'] == 1]
lowGroup = drugData[drugData['DRUG'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', drugData, hue='DRUG')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
drugData['RISK_1K'] = preprocessing.scale(drugData['RISK_1K'].values)
drugData['PATIENCE_MONTH'] = preprocessing.scale(drugData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*DRUG', data=drugData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*DRUG', data=drugData).fit(disp=False)
print mod.summary()
alcData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'ALCOHOL'])
highGroup = alcData[alcData['ALCOHOL'] == 1]
lowGroup = alcData[alcData['ALCOHOL'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', alcData, hue='ALCOHOL')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation anaylses
alcData['RISK_1K'] = preprocessing.scale(alcData['RISK_1K'].values)
alcData['PATIENCE_MONTH'] = preprocessing.scale(alcData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*ALCOHOL', data=alcData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*ALCOHOL', data=alcData).fit(disp=False)
print mod.summary()
bingData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'BINGE'])
# Discretize this variable
bingData['BINGE'] = (bingData['BINGE'] > 0)
bingData['BINGE'] = bingData['BINGE'].astype(float)
highGroup = bingData[bingData['BINGE'] == 1]
lowGroup = bingData[bingData['BINGE'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print([highCorr, lowCorr])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', bingData, hue='BINGE')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
bingData['RISK_1K'] = preprocessing.scale(bingData['RISK_1K'].values)
bingData['PATIENCE_MONTH'] = preprocessing.scale(bingData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*BINGE', data=bingData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*BINGE', data=bingData).fit(disp=False)
print mod.summary()
Whether subjects report having smoked > 100 cigarettes in their lifetimes
eversmokData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'SMOKE_100_CIGS'])
highGroup = eversmokData[eversmokData['SMOKE_100_CIGS'] == 1]
lowGroup = eversmokData[eversmokData['SMOKE_100_CIGS'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', eversmokData, hue='SMOKE_100_CIGS')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
eversmokData['RISK_1K'] = preprocessing.scale(eversmokData['RISK_1K'].values)
eversmokData['PATIENCE_MONTH'] = preprocessing.scale(eversmokData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*SMOKE_100_CIGS', data=eversmokData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*SMOKE_100_CIGS', data=eversmokData).fit(disp=False)
print mod.summary()
smokageData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'AGE_START_SMOKE'])
len(smokageData)
nCat = 5
bins = []
for i in range(nCat):
i = float(i)
bins.append(smokageData['AGE_START_SMOKE'].quantile((i+1)/(nCat+1)))
bins = np.array(bins)
binned = np.digitize(smokageData['AGE_START_SMOKE'], bins)
smokageData['bin'] = binned
for i in range(nCat):
print(smokageData[smokageData['bin'] == i]['PATIENCE_MONTH'].corr(smokageData[smokageData['bin'] == i]['RISK_1K']))
g = sns.PairGrid(smokageData, y_vars=['PATIENCE_MONTH', 'RISK_1K', 'RISK_INCOME'], x_vars='bin')
g.map(sns.pointplot)
delaySAcorr = smokageData['PATIENCE_MONTH'].corr(smokageData['AGE_START_SMOKE'])
riskSAcorr = smokageData['RISK_1K'].corr(smokageData['AGE_START_SMOKE'])
delayriskcorr = smokageData['RISK_1K'].corr(smokageData['PATIENCE_MONTH'])
print([delaySAcorr, riskSAcorr])
z,p = corrstats.dependent_corr(delaySAcorr, riskSAcorr, delayriskcorr, len(smokageData))
print([z, p])
# traditional moderation analyses
smokageData['RISK_1K'] = preprocessing.scale(smokageData['RISK_1K'].values)
smokageData['PATIENCE_MONTH'] = preprocessing.scale(smokageData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*AGE_START_SMOKE', data=smokageData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*AGE_START_SMOKE', data=smokageData).fit(disp=False)
print mod.summary()
Note that this is among subjects reporting that they have smoked > 100 cigarettes in their lifetimes
smokData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'CIGARETTE'])
upperThresh = 2.1
lowerThresh = 1.1
#bins = np.array([lowerThresh, upperThresh])
#smokData['bin'] = np.digitize(smokData['CIGARETTE'], bins)
#
#lowGroup = smokData[smokData['bin'] == 2]
#midGroup = smokData[smokData['bin'] == 1]
#highGroup = smokData[smokData['bin'] == 0]
lowGroup = smokData[smokData['CIGARETTE'] == 3]
midGroup = smokData[smokData['CIGARETTE'] == 2]
highGroup = smokData[smokData['CIGARETTE'] == 1]
nHighGroup = len(highGroup)
nMidGroup = len(midGroup)
nLowGroup = len(lowGroup)
print([nHighGroup, nMidGroup, nLowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(midGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(midGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], midGroup['PATIENCE_MONTH'], equal_var=False))
print(stats.ttest_ind(midGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(midGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(midGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], midGroup['RISK_1K'], equal_var=False))
print(stats.ttest_ind(midGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(midGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_1K'], midGroup['RISK_1K']))
print(stats.ranksums(midGroup['RISK_1K'], lowGroup['RISK_1K']))
print(stats.ranksums(highGroup['RISK_1K'], lowGroup['RISK_1K']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
print(highCorr)
midCorr = midGroup['PATIENCE_MONTH'].corr(midGroup['RISK_1K'])
print(midCorr)
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
print(lowCorr)
print('')
z,p = corrstats.independent_corr(highCorr, midCorr, nHighGroup, nMidGroup)
print([z, p])
z,p = corrstats.independent_corr(midCorr, lowCorr, nMidGroup, nLowGroup)
print([z, p])
z,p = corrstats.independent_corr(highCorr, lowCorr, nHighGroup, nLowGroup)
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', smokData, hue='bin')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
smokData['RISK_1K'] = preprocessing.scale(smokData['RISK_1K'].values)
smokData['PATIENCE_MONTH'] = preprocessing.scale(smokData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*C(CIGARETTE)', data=smokData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*C(CIGARETTE)', data=smokData).fit(disp=False)
print mod.summary()
employmentData = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', 'EMPLOYED'])
highGroup = employmentData[employmentData['EMPLOYED'] == 1]
lowGroup = employmentData[employmentData['EMPLOYED'] == 0]
nhighGroup = len(highGroup)
nlowGroup = len(lowGroup)
print([nhighGroup, nlowGroup])
print(np.mean(highGroup['PATIENCE_MONTH']))
print(np.mean(lowGroup['PATIENCE_MONTH']))
print(geometric.geometric_std(np.exp(highGroup['PATIENCE_MONTH'])))
print(geometric.geometric_std(np.exp(lowGroup['PATIENCE_MONTH'])))
print(stats.ttest_ind(highGroup['PATIENCE_MONTH'], lowGroup['PATIENCE_MONTH'], equal_var=False))
print(np.mean(highGroup['RISK_1K']))
print(np.mean(lowGroup['RISK_1K']))
print(np.std(highGroup['RISK_1K']))
print(np.std(lowGroup['RISK_1K']))
print(stats.ttest_ind(highGroup['RISK_1K'], lowGroup['RISK_1K'], equal_var=False))
print(np.mean(highGroup['RISK_INCOME']))
print(np.mean(lowGroup['RISK_INCOME']))
print(stats.ranksums(highGroup['RISK_INCOME'], lowGroup['RISK_INCOME']))
highCorr = highGroup['PATIENCE_MONTH'].corr(highGroup['RISK_1K'])
lowCorr = lowGroup['PATIENCE_MONTH'].corr(lowGroup['RISK_1K'])
z,p = corrstats.independent_corr(highCorr, lowCorr, nhighGroup, nlowGroup)
print([highCorr, lowCorr])
print([z, p])
p = sns.lmplot('PATIENCE_MONTH', 'RISK_1K', employmentData, hue='EMPLOYED')
p.axes[0,0].set_ylim(0,1000)
p.axes[0,0].set_xlim(0,15)
# traditional moderation analyses
employmentData['RISK_1K'] = preprocessing.scale(employmentData['RISK_1K'].values)
employmentData['PATIENCE_MONTH'] = preprocessing.scale(employmentData['PATIENCE_MONTH'].values)
mod = smf.ols(formula='RISK_1K~PATIENCE_MONTH*EMPLOYED', data=employmentData).fit(disp=False)
print mod.summary()
mod = smf.ols(formula='PATIENCE_MONTH~RISK_1K*EMPLOYED', data=employmentData).fit(disp=False)
print mod.summary()
from effect_sizes import cohen_d
from scipy.stats import pearsonr
# Gender
label = 'SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 2]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 2]['RISK_1K']))
# Retirement savings
label = 'RETIREMENT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Theft (> $50)
label = 'STEAL_LARGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Theft (< $50)
label = 'STEAL_SMALL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Worry/Anxiety/Depression
label = 'WORRY_ANX_DEPRESS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Weight loss
label = 'TRYING_LOSE_WEIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 1]['PATIENCE_MONTH'],d[d[label] == 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 1]['RISK_1K'],d[d[label] == 1]['RISK_1K']))
# Fighting
label = 'FIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 0]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 0]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Health Insurance
label = 'HEALTH_INSUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Entrepreneurs
label = 'ENTREPRENEUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Exercise
label = 'EXERCISE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 0]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 0]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Credit Card Missed Payment
label = 'CCMISSPAY'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Credit Cards Maxed
label = 'CCMAX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Credit Card Debt
label = 'CCDEBT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Drug use
label = 'DRUG'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Alcohol
label = 'ALCOHOL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Binge drinking
label = 'BINGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 0]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 0]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Smoking (ever)
label = 'SMOKE_100_CIGS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Smoking (current)
label = 'CIGARETTE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] > 1]['PATIENCE_MONTH'],d[d[label] == 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] > 1]['RISK_1K'],d[d[label] == 1]['RISK_1K']))
# Emergency fund
label = 'EMERGENCY_FUND'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 0]['PATIENCE_MONTH'],d[d[label] == 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 0]['RISK_1K'],d[d[label] == 1]['RISK_1K']))
# Financial lit, #4
label = 'FIT_LIT_4'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 0]['PATIENCE_MONTH'],d[d[label] == 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 0]['RISK_1K'],d[d[label] == 1]['RISK_1K']))
# Financial lit, #5
label = 'FIT_LIT_5'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] > 1]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] > 1]['RISK_1K']))
# Financial lit, #6
label = 'FIT_LIT_6'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 3]['PATIENCE_MONTH'],d[d[label] < 3]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 3]['RISK_1K'],d[d[label] < 3]['RISK_1K']))
# Financial lit, #7
label = 'FIT_LIT_7'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 2]['PATIENCE_MONTH'],d[(d[label] < 2) | (d[label] > 2)]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 2]['RISK_1K'],d[(d[label] < 2) | (d[label] > 2)]['RISK_1K']))
# Financial lit, #8
label = 'FIT_LIT_8'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# Employment
label = 'EMPLOYED'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
print(cohen_d(d[d[label] == 1]['PATIENCE_MONTH'],d[d[label] == 0]['PATIENCE_MONTH']))
print(cohen_d(d[d[label] == 1]['RISK_1K'],d[d[label] == 0]['RISK_1K']))
# IQ
label = 'IQ'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
# Income
label = 'INCOME'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
# Doctor Visit
label = 'DOC_VISIT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
# BMI
label = 'BMI'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr * corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr * corr)
# Age at which R started smoking
label = 'AGE_START_SMOKE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
# Age of sexual debut
label = 'AGE_FIRST_SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
corr,p = pearsonr(d['PATIENCE_MONTH'], d[label])
print(corr)
corr,p = pearsonr(d['RISK_1K'], d[label])
print(corr)
from partial_corr import partial_corr
# Gender
label = 'SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
# Retirement savings
label = 'RETIREMENT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'STEAL_LARGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'STEAL_SMALL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'WORRY_ANX_DEPRESS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'TRYING_LOSE_WEIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'FIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'HEALTH_INSUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'HEALTH_INSUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'ENTREPRENEUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'EXERCISE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'CCMISSPAY'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'CCMAX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'CCDEBT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'DRUG'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'ALCOHOL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'BINGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'SMOKE_100_CIGS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'CIGARETTE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'EMERGENCY_FUND'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'FIT_LIT_4'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'FIT_LIT_5'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'FIT_LIT_6'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'FIT_LIT_7'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'FIT_LIT_8'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'EMPLOYED'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'IQ'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'INCOME'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'DOC_VISIT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'BMI'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'AGE_START_SMOKE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
label = 'AGE_FIRST_SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
asdf = np.vstack([d['PATIENCE_MONTH'], d['RISK_1K'], d[label]]).T
print(d[['PATIENCE_MONTH', 'RISK_1K', label]].corr())
print('')
print(partial_corr(asdf))
# Gender
label = 'SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d[label] = d[label]-1
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
# Retirement savings
label = 'RETIREMENT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
# Theft (> $50)
label = 'STEAL_LARGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
# Make theft a binary variable
d.replace({label: {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'STEAL_SMALL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
# Make theft a binary variable
d.replace({label: {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'WORRY_ANX_DEPRESS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'TRYING_LOSE_WEIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
#1 Lose weight
#2 Gain weight
#3 Stay about the same
#4 Not trying to do anything
d.replace({label: {4: 0, 3: 0, 2: 0}}, inplace=True)
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'FIGHT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d.replace({label: {2: 1, 3: 1, 4: 1, 5: 1, 6: 1}}, inplace=True)
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'HEALTH_INSUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'ENTREPRENEUR'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'EXERCISE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'CCMISSPAY'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'CCMAX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'CCDEBT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'DRUG'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'ALCOHOL'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'BINGE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d['BINGE'] = (d['BINGE'] > 0)
d['BINGE'] = d['BINGE'].astype(float)
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'SMOKE_100_CIGS'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'CIGARETTE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.mnlogit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.mnlogit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.mnlogit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'EMERGENCY_FUND'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'FIT_LIT_4'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'FIT_LIT_5'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d[label] = (d[label] == 1)
d[label] = d[label].astype(float)
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'FIT_LIT_6'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d[label] = (d[label] == 3)
d[label] = d[label].astype(float)
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'FIT_LIT_7'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
d[label] = (d[label] == 2)
d[label] = d[label].astype(float)
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'FIT_LIT_8'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'EMPLOYED'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.logit(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.logit(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.logit(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'IQ'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'INCOME'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'DOC_VISIT'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'BMI'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'AGE_START_SMOKE'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))
label = 'AGE_FIRST_SEX'
d = data.dropna(subset = ['RISK_1K', 'PATIENCE_MONTH', label])
mod1 = smf.ols(formula=label+'~PATIENCE_MONTH+RISK_1K', data=d).fit(disp=False)
print(mod1.summary())
mod2 = smf.ols(formula=label+'~PATIENCE_MONTH', data=d).fit(disp=False)
print(mod2.summary())
mod3 = smf.ols(formula=label+'~RISK_1K', data=d).fit(disp=False)
print(mod3.summary())
# p-value of chi-squared diff tests
print(1 - stats.chi.cdf(-2 * (mod2.llf - mod1.llf), mod1.df_model - mod2.df_model))
print(1 - stats.chi.cdf(-2 * (mod3.llf - mod1.llf), mod1.df_model - mod3.df_model))