add new info to the file
This commit is contained in:
parent
dedb8492ab
commit
1a7270020b
91
practica/math/statistic/statistic_tests/ANOVA.py
Normal file
91
practica/math/statistic/statistic_tests/ANOVA.py
Normal file
@ -0,0 +1,91 @@
|
||||
# однофакторный дисперсионный анализ
|
||||
# Исследователь набирает 30 студентов для участия в исследовании. Студентам случайным образом назначают использовать один из трех
|
||||
# методов обучения в течение следующих трех недель для подготовки к экзамену. По истечении трех недель все студенты сдают одинаковый
|
||||
# тест.Используйте следующие шаги, чтобы выполнить однофакторный дисперсионный анализ, чтобы определить, одинаковы ли средние баллы
|
||||
# для всех трех групп.
|
||||
|
||||
#1
|
||||
group1 = [85, 86, 88, 75, 78, 94, 98, 79, 71, 80]
|
||||
group2 = [91, 92, 93, 85, 87, 84, 82, 88, 95, 96]
|
||||
group3 = [79, 78, 88, 94, 92, 85, 83, 85, 82, 81]
|
||||
from scipy.stats import f_oneway
|
||||
print(f_oneway(group1, group2, group3)) # вывод: statistic = 2.3575 (F тест) pvalue = 0.1138
|
||||
if (f_oneway(group1, group2, group3))[1] < 0.5:
|
||||
print ('Не можем отвергнуть нулевую теорию')
|
||||
else:
|
||||
print (('Можем отвергнуть нулевую теорию'))
|
||||
|
||||
#2 Другая БД
|
||||
|
||||
mydata = pd.read_csv('c:/Users/admin/Documents/prog/hello/practica/DataBase/day.csv')
|
||||
import os
|
||||
import pandas
|
||||
#Changing the current working directory
|
||||
os.chdir("D:/Ediwsor_Project - Bike_Rental_Count")
|
||||
BIKE = pandas.read_csv("day.csv")
|
||||
BIKE['holiday']=BIKE['holiday'].astype(str)
|
||||
BIKE['weekday']=BIKE['weekday'].astype(str)
|
||||
BIKE['workingday']=BIKE['workingday'].astype(str)
|
||||
BIKE['weathersit']=BIKE['weathersit'].astype(str)
|
||||
BIKE['dteday']=pandas.to_datetime(BIKE['dteday'])
|
||||
BIKE['season']=BIKE['season'].astype(str)
|
||||
BIKE['yr']=BIKE['yr'].astype(str)
|
||||
BIKE['mnth']=BIKE['mnth'].astype(str)
|
||||
print(BIKE.dtypes)
|
||||
|
||||
import statsmodels.api as sm
|
||||
from statsmodels.formula.api import ols
|
||||
|
||||
|
||||
for x in categorical_col:
|
||||
model = ols('cnt' + '~' + x, data = BIKE).fit() #Oridnary least square method
|
||||
result_anova = sm.stats.anova_lm(model) # ANOVA Test
|
||||
print(result_anova)
|
||||
|
||||
|
||||
#3 Другая БД
|
||||
|
||||
import pandas as pd
|
||||
# load data file
|
||||
df = pd.read_csv("https://reneshbedre.github.io/assets/posts/anova/onewayanova.txt", sep="\t")
|
||||
# reshape the d dataframe suitable for statsmodels package
|
||||
df_melt = pd.melt(df.reset_index(), id_vars=['index'], value_vars=['A', 'B', 'C', 'D'])
|
||||
# replace column names
|
||||
df_melt.columns = ['index', 'treatments', 'value']
|
||||
|
||||
# generate a boxplot to see the data distribution by treatments. Using boxplot, we can
|
||||
# easily detect the differences between different treatments
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
ax = sns.boxplot(x='treatments', y='value', data=df_melt, color='#99c2a2')
|
||||
ax = sns.swarmplot(x="treatments", y="value", data=df_melt, color='#7d0013')
|
||||
plt.show()
|
||||
|
||||
import scipy.stats as stats
|
||||
# stats f_oneway functions takes the groups as input and returns ANOVA F and p value
|
||||
fvalue, pvalue = stats.f_oneway(df['A'], df['B'], df['C'], df['D'])
|
||||
print(fvalue, pvalue)
|
||||
# 17.492810457516338 2.639241146210922e-05
|
||||
|
||||
# get ANOVA table as R like output
|
||||
import statsmodels.api as sm
|
||||
from statsmodels.formula.api import ols
|
||||
|
||||
# Ordinary Least Squares (OLS) model
|
||||
model = ols('value ~ C(treatments)', data=df_melt).fit()
|
||||
anova_table = sm.stats.anova_lm(model, typ=2)
|
||||
anova_table
|
||||
# output (ANOVA F and p value)
|
||||
# sum_sq df F PR(>F)
|
||||
# C(treatments) 3010.95 3.0 17.49281 0.000026
|
||||
# Residual 918.00 16.0 NaN NaN
|
||||
|
||||
# ANOVA table using bioinfokit v1.0.3 or later (it uses wrapper script for anova_lm)
|
||||
from bioinfokit.analys import stat
|
||||
res = stat()
|
||||
res.anova_stat(df=df_melt, res_var='value', anova_model='value ~ C(treatments)')
|
||||
res.anova_summary
|
||||
# output (ANOVA F and p value)
|
||||
# df sum_sq mean_sq F PR(>F)
|
||||
# C(treatments) 3.0 3010.95 1003.650 17.49281 0.000026
|
||||
# Residual 16.0 918.00 57.375 NaN NaN
|
Loading…
Reference in New Issue
Block a user