add new info to the file

2023-10-12 21:28:48 +03:00 · 2023-10-12 21:28:48 +03:00 · 1a7270020b
commit 1a7270020b
parent dedb8492ab
1 changed files with 91 additions and 0 deletions
--- a/practica/math/statistic/statistic_tests/ANOVA.py
+++ b/practica/math/statistic/statistic_tests/ANOVA.py
@ -0,0 +1,91 @@
+# однофакторный дисперсионный анализ
+# Исследователь набирает 30 студентов для участия в исследовании. Студентам случайным образом назначают использовать один из трех
+# методов обучения в течение следующих трех недель для подготовки к экзамену. По истечении трех недель все студенты сдают одинаковый 
+# тест.Используйте следующие шаги, чтобы выполнить однофакторный дисперсионный анализ, чтобы определить, одинаковы ли средние баллы
+# для всех трех групп.
+
+#1
+group1 = [85, 86, 88, 75, 78, 94, 98, 79, 71, 80]
+group2 = [91, 92, 93, 85, 87, 84, 82, 88, 95, 96]
+group3 = [79, 78, 88, 94, 92, 85, 83, 85, 82, 81]
+from scipy.stats import f_oneway
+print(f_oneway(group1, group2, group3))  # вывод: statistic = 2.3575 (F тест) pvalue = 0.1138
+if (f_oneway(group1, group2, group3))[1] < 0.5:
+    print ('Не можем отвергнуть нулевую теорию')
+else: 
+    print (('Можем отвергнуть нулевую теорию'))
+
+#2 Другая БД
+
+mydata = pd.read_csv('c:/Users/admin/Documents/prog/hello/practica/DataBase/day.csv') 
+import os
+import pandas 
+#Changing the current working directory
+os.chdir("D:/Ediwsor_Project - Bike_Rental_Count")
+BIKE = pandas.read_csv("day.csv")
+BIKE['holiday']=BIKE['holiday'].astype(str)
+BIKE['weekday']=BIKE['weekday'].astype(str)
+BIKE['workingday']=BIKE['workingday'].astype(str)
+BIKE['weathersit']=BIKE['weathersit'].astype(str)
+BIKE['dteday']=pandas.to_datetime(BIKE['dteday'])
+BIKE['season']=BIKE['season'].astype(str)
+BIKE['yr']=BIKE['yr'].astype(str)
+BIKE['mnth']=BIKE['mnth'].astype(str)
+print(BIKE.dtypes)
+
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+ 
+
+for x in categorical_col:
+    model = ols('cnt' + '~' + x, data = BIKE).fit() #Oridnary least square method
+    result_anova = sm.stats.anova_lm(model) # ANOVA Test
+    print(result_anova)
+
+
+#3 Другая БД
+
+import pandas as pd
+# load data file
+df = pd.read_csv("https://reneshbedre.github.io/assets/posts/anova/onewayanova.txt", sep="\t")
+# reshape the d dataframe suitable for statsmodels package 
+df_melt = pd.melt(df.reset_index(), id_vars=['index'], value_vars=['A', 'B', 'C', 'D'])
+# replace column names
+df_melt.columns = ['index', 'treatments', 'value']
+
+# generate a boxplot to see the data distribution by treatments. Using boxplot, we can 
+# easily detect the differences between different treatments
+import matplotlib.pyplot as plt
+import seaborn as sns
+ax = sns.boxplot(x='treatments', y='value', data=df_melt, color='#99c2a2')
+ax = sns.swarmplot(x="treatments", y="value", data=df_melt, color='#7d0013')
+plt.show()
+
+import scipy.stats as stats
+# stats f_oneway functions takes the groups as input and returns ANOVA F and p value
+fvalue, pvalue = stats.f_oneway(df['A'], df['B'], df['C'], df['D'])
+print(fvalue, pvalue)
+# 17.492810457516338 2.639241146210922e-05
+
+# get ANOVA table as R like output
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+
+# Ordinary Least Squares (OLS) model
+model = ols('value ~ C(treatments)', data=df_melt).fit()
+anova_table = sm.stats.anova_lm(model, typ=2)
+anova_table
+# output (ANOVA F and p value)
+#                 sum_sq    df         F    PR(>F)
+# C(treatments)  3010.95   3.0  17.49281  0.000026
+# Residual        918.00  16.0       NaN       NaN
+
+# ANOVA table using bioinfokit v1.0.3 or later (it uses wrapper script for anova_lm)
+from bioinfokit.analys import stat
+res = stat()
+res.anova_stat(df=df_melt, res_var='value', anova_model='value ~ C(treatments)')
+res.anova_summary
+# output (ANOVA F and p value)
+#                  df   sum_sq   mean_sq         F    PR(>F)
+# C(treatments)   3.0  3010.95  1003.650  17.49281  0.000026
+# Residual       16.0   918.00    57.375       NaN       NaN