Python Code: Test a basic linear regression

import numpy as np import pandas as pd import seaborn as sns import statsmodels.formula.api as smf import matplotlib.pyplot as plt

# bug fix for display formats to avoid run time errors pd.set_option('display.float_format', lambda x:'%.2f'%x)

#call in data set data = pd.read_csv('C:\Python scripts\W_coursera\Refdata\gapminder.csv')

# convert variables to numeric format using convert_objects function data['employrate'] = pd.to_numeric(data['employrate'], errors='coerce') data['alcconsumption'] = pd.to_numeric(data['alcconsumption'], errors='coerce')

data2 = data[['employrate', 'alcconsumption']].copy() data2.employrate = data2.employrate - data2.employrate.mean()

print("Print the mean value of explanatory variable") print(data2.employrate.mean())

################################################################ # BASIC LINEAR REGRESSION ################################################################ scat1 = seaborn.regplot(x="employrate", y="alcconsumption", scatter=True, data=data2) plt.xlabel('Employrate') plt.ylabel('Alcohol consumption') plt.title ('Scatterplot for the Association Between Employrate and Alcohol consumption') print(scat1)

print ("OLS regression model for the association between Employrate and Alcohol consumptione") reg1 = smf.ols('alcconsumption ~ employrate', data=data2).fit() print (reg1.summary())