Slide 29
Slide 29 text
In [34]: def multivariate_normal_auc(d, rho=0):
"""Generate multivariate normal samples and classify them.
d: Cohen's effect size along each dimension
num_dims: number of dimensions
returns: AUC
"""
mu1 = [0, 0]
mu2 = [d, d]
sigma = [[1, rho], [rho, 1]]
# generate the samples
sample1 = multivariate_normal(mu1, sigma).rvs(n)
sample2 = multivariate_normal(mu2, sigma).rvs(n)
# label the samples and extract the features and labels
df1 = pd.DataFrame(sample1)
df1['label'] = 1
df2 = pd.DataFrame(sample2)
df2['label'] = 2
df = pd.concat([df1, df2], ignore_index=True)
X = df.drop(columns='label')
y = df.label
# run the model
model = LogisticRegression(solver='lbfgs').fit(X, y)
y_pred_prob = model.predict_proba(X)[:,1]
# compute AUC
auc = roc_auc_score(y, y_pred_prob)
return auc