2. Software implementation

The programme is divided into two parts:

  • Generic class for distribution analysis

  • Main program

2.1. Distribution class

import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

#-------------------------------------------------------

class distribution():

	"""distribution analysis class"""

	#-------------------------------------------------------

	def __init__(self):
		
		self.database = None
		
	#-------------------------------------------------------

	def loadCSVFile(self,csv_file):

		self.database = pd.read_csv(csv_file)

	#-------------------------------------------------------

	def display(self,nb_head):

		print(self.database.head(nb_head))

	#-------------------------------------------------------

	def displayStatistic(self):

		print(self.database.describe())

	#-------------------------------------------------------

	def getNames(self):

		return list(self.database.columns)

	#-------------------------------------------------------

	def median(self,name=None):

		return self.statistic_function(self.database.median,name)
	#-------------------------------------------------------

	def mean(self,name=None):

		return self.statistic_function(self.database.mean,name)

	#-------------------------------------------------------

	def max(self,name=None):

		return self.statistic_function(self.database.max,name)

	#-------------------------------------------------------

	def min(self,name=None):

		return self.statistic_function(self.database.min,name)

	#-------------------------------------------------------

	def std(self,name=None):

		return self.statistic_function(self.database.std,name)

	#-------------------------------------------------------

	def quantile(self,q,name=None):

		return self.statistic_function(self.database.quantile,name,q)

	#-------------------------------------------------------

	def statistic_function(self,fonction,name,arg=None):

		_dict=dict()
		if name:
			if arg:
				_dict[name]=fonction(arg)[name]
			else:
				_dict[name]=fonction()[name]
		else:
			for col_name in self.getNames():
				if arg:
					_dict[col_name]=fonction(arg)[col_name]
				else:
					_dict[col_name]=fonction()[col_name]
		return _dict

	#-------------------------------------------------------

	def ttest(self,dependant):
		"""
		return tupple of (t-statistic,p-value)
		"""
		
		if dependant:
			value=stats.ttest_rel(self.database.Congruent, self.database.Incongruent)
		else:
			value=stats.ttest_ind(self.database.Congruent, self.database.Incongruent)

		return (value[0],value[1])

	#-------------------------------------------------------

	def repartitionPlot(self,with_file=True):

		bin_val=24
		title=str('Distribution repartition(bin:{})').format(bin_val)
		self.database.plot(kind='hist', alpha=0.5 ,bins=20, title=title);
		plt.xlabel('Time (sec)')
		
		if with_file:
			filename=title
			for ch in [" ","(",")",":"]:
				filename=filename.replace(ch,"_")
			plt.savefig(str("{}.png").format(filename))

		title=str('Distribution repartition')
		self.database.plot(kind='box', title=title);
		plt.ylabel('Time (sec)')

		if with_file:
			filename=title
			for ch in [" ","(",")"]:
				filename=filename.replace(ch,"_")
			plt.savefig(str("{}.png").format(filename))

	#-------------------------------------------------------

	def showPlot(self):

		plt.show()

	#-------------------------------------------------------

2.2. Main program


import distribution

#-------------------------------------------------------

if __name__ == '__main__':

	MyDist=distribution.distribution()
	MyDist.loadCSVFile("./DATABASE/stroopdata.csv")
	MyDist.displayStatistic()
	# print(MyDist.median())
	# print(MyDist.mean())
	# print(MyDist.min())
	# print(MyDist.max())
	# print(MyDist.std())
	# print(MyDist.quantile(0.25))
	# print(MyDist.quantile(0.5))
	# print(MyDist.quantile(0.75))
	ttest=(MyDist.ttest(dependant=True))
	print(str("t-statistic: {}, p-value: {}").format(ttest[0],ttest[1]))

	MyDist.repartitionPlot()
	MyDist.showPlot()

#-------------------------------------------------------