2. Software implementation¶
The programme is divided into two parts:
Generic class for distribution analysis
Main program
2.1. Distribution class¶
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
#-------------------------------------------------------
class distribution():
"""distribution analysis class"""
#-------------------------------------------------------
def __init__(self):
self.database = None
#-------------------------------------------------------
def loadCSVFile(self,csv_file):
self.database = pd.read_csv(csv_file)
#-------------------------------------------------------
def display(self,nb_head):
print(self.database.head(nb_head))
#-------------------------------------------------------
def displayStatistic(self):
print(self.database.describe())
#-------------------------------------------------------
def getNames(self):
return list(self.database.columns)
#-------------------------------------------------------
def median(self,name=None):
return self.statistic_function(self.database.median,name)
#-------------------------------------------------------
def mean(self,name=None):
return self.statistic_function(self.database.mean,name)
#-------------------------------------------------------
def max(self,name=None):
return self.statistic_function(self.database.max,name)
#-------------------------------------------------------
def min(self,name=None):
return self.statistic_function(self.database.min,name)
#-------------------------------------------------------
def std(self,name=None):
return self.statistic_function(self.database.std,name)
#-------------------------------------------------------
def quantile(self,q,name=None):
return self.statistic_function(self.database.quantile,name,q)
#-------------------------------------------------------
def statistic_function(self,fonction,name,arg=None):
_dict=dict()
if name:
if arg:
_dict[name]=fonction(arg)[name]
else:
_dict[name]=fonction()[name]
else:
for col_name in self.getNames():
if arg:
_dict[col_name]=fonction(arg)[col_name]
else:
_dict[col_name]=fonction()[col_name]
return _dict
#-------------------------------------------------------
def ttest(self,dependant):
"""
return tupple of (t-statistic,p-value)
"""
if dependant:
value=stats.ttest_rel(self.database.Congruent, self.database.Incongruent)
else:
value=stats.ttest_ind(self.database.Congruent, self.database.Incongruent)
return (value[0],value[1])
#-------------------------------------------------------
def repartitionPlot(self,with_file=True):
bin_val=24
title=str('Distribution repartition(bin:{})').format(bin_val)
self.database.plot(kind='hist', alpha=0.5 ,bins=20, title=title);
plt.xlabel('Time (sec)')
if with_file:
filename=title
for ch in [" ","(",")",":"]:
filename=filename.replace(ch,"_")
plt.savefig(str("{}.png").format(filename))
title=str('Distribution repartition')
self.database.plot(kind='box', title=title);
plt.ylabel('Time (sec)')
if with_file:
filename=title
for ch in [" ","(",")"]:
filename=filename.replace(ch,"_")
plt.savefig(str("{}.png").format(filename))
#-------------------------------------------------------
def showPlot(self):
plt.show()
#-------------------------------------------------------
2.2. Main program¶
import distribution
#-------------------------------------------------------
if __name__ == '__main__':
MyDist=distribution.distribution()
MyDist.loadCSVFile("./DATABASE/stroopdata.csv")
MyDist.displayStatistic()
# print(MyDist.median())
# print(MyDist.mean())
# print(MyDist.min())
# print(MyDist.max())
# print(MyDist.std())
# print(MyDist.quantile(0.25))
# print(MyDist.quantile(0.5))
# print(MyDist.quantile(0.75))
ttest=(MyDist.ttest(dependant=True))
print(str("t-statistic: {}, p-value: {}").format(ttest[0],ttest[1]))
MyDist.repartitionPlot()
MyDist.showPlot()
#-------------------------------------------------------