import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import missingno as msno
import seaborn as sns


pd.options.display.width = None
pd.options.display.max_columns = None

dailyActivity = pd.read_csv('Fitabase Data 4.12.16-5.12.16/dailyActivity_merged.csv')
dailyCalories = pd.read_csv('Fitabase Data 4.12.16-5.12.16/dailyCalories_merged.csv')

minuteSleep = pd.read_csv('Fitabase Data 4.12.16-5.12.16/minuteSleep_merged.csv')
dailySteps = pd.read_csv('Fitabase Data 4.12.16-5.12.16/dailySteps_merged.csv')
heartrateSeconds = pd.read_csv('Fitabase Data 4.12.16-5.12.16/heartrate_seconds_merged.csv')
hourlyCalories = pd.read_csv('Fitabase Data 4.12.16-5.12.16/hourlyCalories_merged.csv')

hourlyIntensities = pd.read_csv('Fitabase Data 4.12.16-5.12.16/hourlyIntensities_merged.csv')
hourlySteps = pd.read_csv('Fitabase Data 4.12.16-5.12.16/hourlySteps_merged.csv')
sleepDay = pd.read_csv('Fitabase Data 4.12.16-5.12.16/sleepDay_merged.csv')
stepsDf = pd.read_csv('Fitabase Data 4.12.16-5.12.16/hourlySteps_merged.csv')
weightLog = pd.read_csv('Fitabase Data 4.12.16-5.12.16/weightLogInfo_merged.csv')


hourlySteps.head()


hourlyIntensities.head()


heartrateSeconds.head()


dailySteps.head()


dailyCalories.head()


# sleepDay.shape (413, 5)
sleepDay.head()


minuteSleep.head()


dailyActivity.head()


stepsDf.head()


hourlyCalories.head()


weightLog.head()


dailyActivity.head()


dailyActivity.head()


#convert to datetime
dailyActivity['ActivityDate'] = pd.to_datetime(dailyActivity['ActivityDate'])

# add a new column that indicates the day's name
dailyActivity['day_name'] = dailyActivity['ActivityDate'].dt.day_name()

# add a new column that indicates weekday/weekend
dailyActivity['weekend_weekday'] = np.where(dailyActivity['ActivityDate'].dt.dayofweek > 4, 'Weekend', 'Weekday')


# steps taken summary
print("Total Steps Summary")
steps = dailyActivity['TotalSteps'].describe()
print(steps)
print('\n')

# byMinutes summary
minutes = dailyActivity[['VeryActiveMinutes', 'FairlyActiveMinutes', 'LightlyActiveMinutes', 'SedentaryMinutes']].describe()
print(minutes)
print('\n')

# Calories and total distance burned summary
calories = dailyActivity[['Calories', 'TotalDistance']].describe()
print(calories)
print('\n')

#Sleep Records, weight, and bmi
print(sleepDay[['TotalSleepRecords', 'TotalMinutesAsleep', 'TotalTimeInBed']].describe())
print('\n')
print(weightLog[['BMI', 'WeightKg']].describe())

Total Steps Summary
count      940.000000
mean      7637.910638
std       5087.150742
min          0.000000
25%       3789.750000
50%       7405.500000
75%      10727.000000
max      36019.000000
Name: TotalSteps, dtype: float64


       VeryActiveMinutes  FairlyActiveMinutes  LightlyActiveMinutes  \
count         940.000000           940.000000            940.000000   
mean           21.164894            13.564894            192.812766   
std            32.844803            19.987404            109.174700   
min             0.000000             0.000000              0.000000   
25%             0.000000             0.000000            127.000000   
50%             4.000000             6.000000            199.000000   
75%            32.000000            19.000000            264.000000   
max           210.000000           143.000000            518.000000   

       SedentaryMinutes  
count        940.000000  
mean         991.210638  
std          301.267437  
min            0.000000  
25%          729.750000  
50%         1057.500000  
75%         1229.500000  
max         1440.000000  


          Calories  TotalDistance
count   940.000000     940.000000
mean   2303.609574       5.489702
std     718.166862       3.924606
min       0.000000       0.000000
25%    1828.500000       2.620000
50%    2134.000000       5.245000
75%    2793.250000       7.712500
max    4900.000000      28.030001


       TotalSleepRecords  TotalMinutesAsleep  TotalTimeInBed
count         413.000000          413.000000      413.000000
mean            1.118644          419.467312      458.639225
std             0.345521          118.344679      127.101607
min             1.000000           58.000000       61.000000
25%             1.000000          361.000000      403.000000
50%             1.000000          433.000000      463.000000
75%             1.000000          490.000000      526.000000
max             3.000000          796.000000      961.000000


             BMI    WeightKg
count  67.000000   67.000000
mean   25.185224   72.035821
std     3.066963   13.923206
min    21.450001   52.599998
25%    23.959999   61.400002
50%    24.389999   62.500000
75%    25.559999   85.049999
max    47.540001  133.500000


byMinute = dailyActivity.pivot_table(
    values = ['VeryActiveMinutes', 'FairlyActiveMinutes', 'LightlyActiveMinutes', 'SedentaryMinutes'],
    index = 'day_name'
)

byMinute


byDistance = dailyActivity.pivot_table(
    values = ['VeryActiveDistance', 'ModeratelyActiveDistance', 'LightActiveDistance', 'SedentaryActiveDistance'],
    index = 'day_name'
)

byDistance


byMinute.plot(kind = 'bar', figsize = [17,7])

<AxesSubplot:xlabel='day_name'>


byDistance.plot(kind = 'bar', figsize = [17,7])

<AxesSubplot:xlabel='day_name'>


print('The Most Active Based on the Type of Activity (Fairly, Lightly, etc)')
print(byMinute.idxmax())
print('\n')
print('The Least Active Based on the Type of Activity (Fairly, Lightly, etc)')
print(byMinute.idxmin())

The Most Active Based on the Type of Activity (Fairly, Lightly, etc)
FairlyActiveMinutes     Saturday
LightlyActiveMinutes    Saturday
SedentaryMinutes          Monday
VeryActiveMinutes         Monday
dtype: object


The Least Active Based on the Type of Activity (Fairly, Lightly, etc)
FairlyActiveMinutes     Thursday
LightlyActiveMinutes      Sunday
SedentaryMinutes        Thursday
VeryActiveMinutes       Thursday
dtype: object


print('The Most Active Based On Distance')
print(byDistance.idxmax())
print('\n')
print('The Least Active Based On Distance')
print(byDistance.idxmin())

The Most Active Based On Distance
LightActiveDistance          Saturday
ModeratelyActiveDistance     Saturday
SedentaryActiveDistance        Monday
VeryActiveDistance          Wednesday
dtype: object


The Least Active Based On Distance
LightActiveDistance         Sunday
ModeratelyActiveDistance    Friday
SedentaryActiveDistance     Sunday
VeryActiveDistance          Friday
dtype: object


caloriesGrouped = dailyActivity.pivot_table(
    values = 'Calories',
    index = 'day_name'
)

caloriesGrouped


fig_dims = (15, 10)
fig, ax = plt.subplots(figsize=fig_dims)

sns.set_style("darkgrid")
reg = sns.regplot(
    data = dailyActivity,
    x = 'TotalSteps',
    y = 'Calories',
    ax = ax,
    color = 'blue'
)

reg.set_title('Relationship Between Total Steps Taken and Calories Burned')

Text(0.5, 1.0, 'Relationship Between Total Steps Taken and Calories Burned')


# Ensuring proper formatting and creating a new column in order to assist in the joining process
sleepDay['SleepDay'] = pd.to_datetime(sleepDay['SleepDay'])
sleepDay['date'] = pd.to_datetime(sleepDay['SleepDay'])
dailyActivity['date'] = pd.to_datetime(dailyActivity['ActivityDate'])

# sleepDay[['TotalSleepRecords', 'TotalMinutesAsleep', 'TotalTimeInBed']]

#join sleep data with daily activity
joinedDf = dailyActivity.merge(sleepDay, on = ['Id', 'date'])


joinedDf.head()


dailyActivity.groupby('weekend_weekday')['Calories'].describe()


joinedDf.groupby('weekend_weekday')[['TotalSleepRecords', 'TotalMinutesAsleep', 'TotalTimeInBed']].describe()


sleep = sns.relplot(
    data = joinedDf, 
    x = 'TotalMinutesAsleep',
    y = 'TotalTimeInBed',
    col = 'weekend_weekday',
    hue = 'TotalSleepRecords',
    height = 7
)

sleep.fig.suptitle('Total Minutes Asleep Vs Total Time in Bed',  y = 1.03)
sleep.set_titles("Week: {col_name}")
plt.show()


# convert to proper minutes
sleepDay['TotalMinutesAsleep'] = pd.to_timedelta(sleepDay['TotalMinutesAsleep'], unit = 'm')
sleepDay['TotalMinutesAsleep'] = sleepDay['TotalMinutesAsleep'].astype('timedelta64[m]')

#convert to hours
sleepDay['TotalHoursAsleep'] = sleepDay['TotalMinutesAsleep'] / 60

# round to nearest hundreths 
sleepDay['TotalHoursAsleep'] = sleepDay['TotalHoursAsleep'].round(2)

#add sleep status
sleepDay['SleepStatus'] = np.where(sleepDay['TotalHoursAsleep'] < 7, 'Bad Sleepers', 
         (np.where( sleepDay['TotalHoursAsleep'] > 9, 'Oversleepers', 'Normal Sleepers'))
        ) 
#used ternary operator. link for future me's reference: https://stackoverflow.com/questions/39109045/numpy-where-with-multiple-conditions
#np.select article by dataquest: https://www.dataquest.io/blog/tutorial-add-column-pandas-dataframe-based-on-if-else-condition/

sleepDay['day_name'] = sleepDay['SleepDay'].dt.day_name()
sleepDay['weekend_weekday'] = np.where(sleepDay['SleepDay'].dt.dayofweek > 4, 'Weekend', 'Weekday')

sleepDay.head()

#Overview
fig_dims = (20, 15)
fig, ax = plt.subplots(figsize = fig_dims)
overview = sns.countplot(
    data = sleepDay,
    x = sleepDay['SleepStatus'],
    hue = 'weekend_weekday',
    ax = ax
)
overview.set_title('Overview Look of Sleep Statuses', fontsize = 20)
overview.set_xlabel('Sleep Status', fontsize = 20)
overview.set_ylabel('Total', fontsize = 20)
plt.xticks(size = 20)
plt.yticks(size = 20)

plt.show()

#by Day of the week
fig_dims = (20, 15)
fig, ax = plt.subplots(figsize = fig_dims)
overview2 = sns.countplot(
    data = sleepDay,
    x = sleepDay['SleepStatus'],
    hue = 'day_name',
    ax = ax
)
overview2.set_title('Overview Look of Sleep Statuses by Day of The Week', fontsize = 20)
overview2.set_xlabel('Sleep Status', fontsize = 20)
overview2.set_ylabel('Total', fontsize = 20)
plt.xticks(size = 20)
plt.yticks(size = 20)

plt.show()


byMinute


# There's 2,483,658 rows!
heartrateSeconds.shape
print(heartrateSeconds.head())
summary = round(heartrateSeconds['Value'].describe())
print('\n')
print(summary)
unique = heartrateSeconds['Id'].nunique() #there's only 14 unique uses
print('\nNumber of Users in This Dataset: ' + str(unique))

           Id                  Time  Value
0  2022484408  4/12/2016 7:21:00 AM     97
1  2022484408  4/12/2016 7:21:05 AM    102
2  2022484408  4/12/2016 7:21:10 AM    105
3  2022484408  4/12/2016 7:21:20 AM    103
4  2022484408  4/12/2016 7:21:25 AM    101


count    2483658.0
mean          77.0
std           19.0
min           36.0
25%           63.0
50%           73.0
75%           88.0
max          203.0
Name: Value, dtype: float64

Number of Users in This Dataset: 14


#Ensure proper time formatting
heartrateSeconds['Time'] = pd.to_datetime(heartrateSeconds['Time'], format = '%m/%d/%Y %I:%M:%S %p')

mean = heartrateSeconds.groupby(heartrateSeconds['Time'].dt.day_name())['Value'].mean()
mean

Time
Friday       77.520836
Monday       77.454335
Saturday     79.973815
Sunday       75.925004
Thursday     77.035902
Tuesday      77.013723
Wednesday    76.451580
Name: Value, dtype: float64


mean = heartrateSeconds.groupby(heartrateSeconds['Time'].dt.hour)['Value'].mean()
fig_dims = (23, 13)
fig, ax = plt.subplots(figsize=fig_dims)

# create temporary dataframe
tempDf = pd.DataFrame(mean)

tempDf

t = sns.barplot(
    data = tempDf, 
    x = tempDf.index,
    y = 'Value'
)

t.set_xlabel('Time (Hour)', fontsize = 20)
t.set_ylabel('Average HeartRate', fontsize = 20)
t.set_title('Average Heart-Rate Per Minute Based on Time (Hour)', fontsize = 20)

Text(0.5, 1.0, 'Average Heart-Rate Per Minute Based on Time (Hour)')


#rejoining a new df as sleepDay was altered
rejoined = dailyActivity.merge(sleepDay, on = ['Id', 'date'])
rejoined.head()


# Before I analyze with sleep status, let's examine just the total steps taken by weekdays

meanSteps = dailyActivity.groupby('day_name')['TotalSteps'].mean()

fig_dims = (23, 13)
fig, ax = plt.subplots(figsize=fig_dims)

# create temporary dataframe
tempDf = pd.DataFrame(meanSteps)
tempDf = tempDf.reset_index()

#sort by day
cats = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
tempDf['day_name'] = pd.Categorical(tempDf['day_name'], categories = cats)
tempDf = tempDf.sort_values('day_name')

print(tempDf)

t = sns.barplot(
    data = tempDf, 
    x = 'day_name',
    y = 'TotalSteps'
)

t.set_xlabel('Day of the Week', fontsize = 20)
t.set_ylabel('Total Steps', fontsize = 20)
t.set_title('Average Total Steps Taken by the Day of the Week', fontsize = 20)

    day_name   TotalSteps
1     Monday  7780.866667
5    Tuesday  8125.006579
6  Wednesday  7559.373333
4   Thursday  7405.836735
0     Friday  7448.230159
2   Saturday  8152.975806
3     Sunday  6933.231405

Text(0.5, 1.0, 'Average Total Steps Taken by the Day of the Week')


lightlyActive = joinedDf[['LightlyActiveMinutes', 'TotalSleepRecords', 'TotalMinutesAsleep', 'TotalTimeInBed']]

lightlyActive.head()


fig_dims = (15, 10)
fig, ax = plt.subplots(figsize=fig_dims)

sns.set_style("darkgrid")
reg = sns.regplot(
    data = lightlyActive,
    x = 'LightlyActiveMinutes',
    y = 'TotalMinutesAsleep',
    ax = ax,
    color = 'blue'
)

reg.set_title('Relationship between Total Minutes of Sleep and Users Who are Light Active')

Text(0.5, 1.0, 'Relationship between Total Minutes of Sleep and Users Who are Light Active')


sedentary = joinedDf[['SedentaryMinutes', 'TotalSleepRecords', 'TotalMinutesAsleep', 'TotalTimeInBed']]

sedentary.head()


sns.set_style("darkgrid")
fig_dims = (15, 10)
fig, ax = plt.subplots(figsize=fig_dims)

s = sns.regplot(
    data = sedentary,
    x = 'SedentaryMinutes',
    y = 'TotalMinutesAsleep',
    ax = ax,
    color = 'blue'
)

s.set_title('Relationship between Total Minutes of Sleep and Users Who are Sedentary')

Text(0.5, 1.0, 'Relationship between Total Minutes of Sleep and Users Who are Sedentary')


joinedDf.head()


sns.set_style("darkgrid")
fig_dims = (15, 10)
fig, ax = plt.subplots(figsize=fig_dims)

s = sns.regplot(
    data = joinedDf,
    x = 'FairlyActiveMinutes',
    y = 'TotalMinutesAsleep',
    ax = ax,
    color = 'blue'
)

s.set_title('Relationship between Total Minutes of Sleep and Users Who are Fairly Active', fontsize = 20)
s.set_xlabel('Fairly Active Minutes', fontsize = 20)
s.set_ylabel('Total Minutes Asleep', fontsize = 20)
plt.show()

# Very Active 
sns.set_style("darkgrid")
fig_dims = (15, 10)
fig, ax = plt.subplots(figsize=fig_dims)

s = sns.regplot(
    data = joinedDf,
    x = 'VeryActiveMinutes',
    y = 'TotalMinutesAsleep',
    ax = ax,
    color = 'blue'
)

s.set_title('Relationship between Total Minutes of Sleep and Users Who are Very Active', fontsize = 20)
s.set_xlabel('Very Active Minutes', fontsize = 20)
s.set_ylabel('Total Minutes Asleep', fontsize = 20)
plt.show()


#Ensuring proper formatting
hourlyIntensities['ActivityHour'] = pd.to_datetime(hourlyIntensities['ActivityHour'], format = '%m/%d/%Y %I:%M:%S %p')


#Group by the hours and find the average total intensity, which should therefore give only 24 rows in the end
mean = hourlyIntensities.groupby(hourlyIntensities['ActivityHour'].dt.time)['TotalIntensity'].mean()

sns.set_style("darkgrid")
fig_dims = (23, 13)
fig, ax = plt.subplots(figsize=fig_dims)

#create temporary dataframe
tempDf = pd.DataFrame(mean)

t = sns.barplot(
    data = tempDf, 
    x = tempDf.index,
    y = 'TotalIntensity'
)

t.set_xlabel('Time', fontsize = 20)
t.set_ylabel('Average Total Intensity', fontsize = 20)
t.set_title('Average Total Intensity Per Hour', fontsize = 20)

Text(0.5, 1.0, 'Average Total Intensity Per Hour')


meanDays = hourlyIntensities.groupby(hourlyIntensities['ActivityHour'].dt.day_name())['TotalIntensity'].mean()

fig_dims = (23, 13)
fig, ax = plt.subplots(figsize=fig_dims)

# create temporary dataframe
tempDf = pd.DataFrame(meanDays)
tempDf = tempDf.reset_index()

tempDf.rename(columns = {"ActivityHour": "Day"}, inplace = True)

#sort by day
cats = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
tempDf['Day'] = pd.Categorical(tempDf['Day'], categories = cats)
tempDf = tempDf.sort_values('Day')

t= sns.barplot(
    x = 'Day',
    y = 'TotalIntensity',
    data = tempDf
)

t.set_xlabel('Day', fontsize = 20)
t.set_ylabel('Average Total Intensity', fontsize = 20)
t.set_title('Average Total Intensity Vs Day', fontsize = 20)

Text(0.5, 1.0, 'Average Total Intensity Vs Day')

	Id	ActivityHour	StepTotal
0	1503960366	4/12/2016 12:00:00 AM	373
1	1503960366	4/12/2016 1:00:00 AM	160
2	1503960366	4/12/2016 2:00:00 AM	151
3	1503960366	4/12/2016 3:00:00 AM	0
4	1503960366	4/12/2016 4:00:00 AM	0

	Id	ActivityHour	TotalIntensity	AverageIntensity
0	1503960366	4/12/2016 12:00:00 AM	20	0.333333
1	1503960366	4/12/2016 1:00:00 AM	8	0.133333
2	1503960366	4/12/2016 2:00:00 AM	7	0.116667
3	1503960366	4/12/2016 3:00:00 AM	0	0.000000
4	1503960366	4/12/2016 4:00:00 AM	0	0.000000

	Id	ActivityDay	StepTotal
0	1503960366	4/12/2016	13162
1	1503960366	4/13/2016	10735
2	1503960366	4/14/2016	10460
3	1503960366	4/15/2016	9762
4	1503960366	4/16/2016	12669

	Id	ActivityDay	Calories
0	1503960366	4/12/2016	1985
1	1503960366	4/13/2016	1797
2	1503960366	4/14/2016	1776
3	1503960366	4/15/2016	1745
4	1503960366	4/16/2016	1863

	Id	SleepDay	TotalSleepRecords	TotalMinutesAsleep	TotalTimeInBed
0	1503960366	4/12/2016 12:00:00 AM	1	327	346
1	1503960366	4/13/2016 12:00:00 AM	2	384	407
2	1503960366	4/15/2016 12:00:00 AM	1	412	442
3	1503960366	4/16/2016 12:00:00 AM	2	340	367
4	1503960366	4/17/2016 12:00:00 AM	1	700	712

	Id	Time	Value
0	2022484408	4/12/2016 7:21:00 AM	97
1	2022484408	4/12/2016 7:21:05 AM	102
2	2022484408	4/12/2016 7:21:10 AM	105
3	2022484408	4/12/2016 7:21:20 AM	103
4	2022484408	4/12/2016 7:21:25 AM	101

	Id	date	value	logId
0	1503960366	4/12/2016 2:47:30 AM	3	11380564589
1	1503960366	4/12/2016 2:48:30 AM	2	11380564589
2	1503960366	4/12/2016 2:49:30 AM	1	11380564589
3	1503960366	4/12/2016 2:50:30 AM	1	11380564589
4	1503960366	4/12/2016 2:51:30 AM	1	11380564589

	Id	Date	WeightKg	WeightPounds	Fat	BMI	IsManualReport	LogId
0	1503960366	5/2/2016 11:59:59 PM	52.599998	115.963147	22.0	22.650000	True	1462233599000
1	1503960366	5/3/2016 11:59:59 PM	52.599998	115.963147	NaN	22.650000	True	1462319999000
2	1927972279	4/13/2016 1:08:52 AM	133.500000	294.317120	NaN	47.540001	False	1460509732000
3	2873212765	4/21/2016 11:59:59 PM	56.700001	125.002104	NaN	21.450001	True	1461283199000
4	2873212765	5/12/2016 11:59:59 PM	57.299999	126.324875	NaN	21.690001	True	1463097599000

	FairlyActiveMinutes	LightlyActiveMinutes	SedentaryMinutes	VeryActiveMinutes
day_name
Friday	12.111111	204.198413	1000.309524	20.055556
Monday	14.000000	192.058333	1027.941667	23.108333
Saturday	15.201613	207.145161	964.282258	21.919355
Sunday	14.528926	173.975207	990.256198	19.983471
Thursday	11.959184	185.421769	961.993197	19.408163
Tuesday	14.335526	197.342105	1007.361842	22.953947
Wednesday	13.100000	189.853333	989.480000	20.780000

	LightActiveDistance	ModeratelyActiveDistance	SedentaryActiveDistance	VeryActiveDistance
day_name
Friday	3.489127	0.483810	0.001825	1.312937
Monday	3.363083	0.585833	0.002583	1.537333
Saturday	3.617177	0.677339	0.001048	1.514597
Sunday	2.892314	0.618017	0.000661	1.488926
Thursday	3.283129	0.505170	0.002313	1.390476
Tuesday	3.471053	0.593026	0.001447	1.613289
Wednesday	3.256333	0.527067	0.001333	1.633467

	Calories
day_name
Friday	2331.785714
Monday	2324.208333
Saturday	2354.967742
Sunday	2263.000000
Thursday	2199.571429
Tuesday	2356.013158
Wednesday	2302.620000

	Id	ActivityDate	TotalSteps	TotalDistance	TrackerDistance	VeryActiveDistance	ModeratelyActiveDistance	LightActiveDistance	VeryActiveMinutes	FairlyActiveMinutes	LightlyActiveMinutes	SedentaryMinutes	Calories	day_name	weekend_weekday	date	SleepDay	TotalSleepRecords	TotalMinutesAsleep	TotalTimeInBed
0	1503960366	2016-04-12	13162	8.50	8.50	1.88	0.55	6.06	25	13	328	728	1985	Tuesday	Weekday	2016-04-12	2016-04-12	1	327	346
1	1503960366	2016-04-13	10735	6.97	6.97	1.57	0.69	4.71	21	19	217	776	1797	Wednesday	Weekday	2016-04-13	2016-04-13	2	384	407
2	1503960366	2016-04-15	9762	6.28	6.28	2.14	1.26	2.83	29	34	209	726	1745	Friday	Weekday	2016-04-15	2016-04-15	1	412	442
3	1503960366	2016-04-16	12669	8.16	8.16	2.71	0.41	5.04	36	10	221	773	1863	Saturday	Weekend	2016-04-16	2016-04-16	2	340	367
4	1503960366	2016-04-17	9705	6.48	6.48	3.19	0.78	2.51	38	20	164	539	1728	Sunday	Weekend	2016-04-17	2016-04-17	1	700	712

	count	mean	std	min	25%	50%	75%	max
weekend_weekday
Weekday	695.0	2301.516547	704.675507	0.0	1841.0	2159.0	2799.5	4900.0
Weekend	245.0	2309.546939	756.589860	0.0	1792.0	2096.0	2739.0	4552.0

	TotalSleepRecords								TotalMinutesAsleep								TotalTimeInBed
	count	mean	std	min	25%	50%	75%	max	count	mean	std	min	25%	50%	75%	max	count	mean	std	min	25%	50%	75%	max
weekend_weekday
Weekday	300.0	1.093333	0.313501	1.0	1.0	1.0	1.0	3.0	300.0	413.106667	103.459145	59.0	362.5	426.0	472.5	796.0	300.0	449.903333	108.342949	65.0	406.0	458.0	506.0	961.0
Weekend	113.0	1.185841	0.412931	1.0	1.0	1.0	1.0	3.0	113.0	436.353982	150.162288	58.0	361.0	465.0	530.0	775.0	113.0	481.831858	165.356428	61.0	402.0	501.0	575.0	961.0

Case Study: How Can a Wellness Technology Company Play It Smart?¶

Part of the Google Analytics Professional Certificate¶

by Whoong Zi Wei¶

Key Stakeholders:¶

Bellabeat's Key Products:¶

Business Objective¶

Loading Packages¶

Prepare Phase¶

There are a total of 18 datasets available. However, I will only need a few of them for this case study. I will also merge some datasets if neccesary as I progress through this case study.¶

After taking a look and understanding the datasets available, I've decided to use the above 11 datasets for my analysis.¶

How active are these Users?¶

Let's get a brief summary of their daily activities.¶

Insights from this summary:¶

Let's see if the day of the week, or weekends or weekdays, have any influence on how users are active.¶

The most and least active based on the type of activity (Fairly, Lightly, etc)¶

The Most and Least Active Based On Distance¶

However, based on the above visualizations and summary data, I can't really draw a solid conclusion. For now, I hyphothesize that users are probably most active on Saturdays.¶

What factors contribute to the highest calorie burns?¶

Based on the summary data, it would seem that people burn the least calorie on Thursdays on average, whereas Tuesday is has the most calories burned on average.¶

Let's analyze steps taken vs calories burned¶

How does the data vary from weekends and weekdays?¶

How much calories do they burn between weekdays and weekends? How about the quality of their sleep?¶

I will now inner join between the sleepDay and dailyActivity dataset in order to analyze the users' sleeping behaviours.¶

Calories burned between weekdays and weekends¶

Sleep quality between weekdays and weekends¶

Let's see the relationship between total sleep minutes and total time in bed subgrouped by the type of week¶

Both display a positive relationship. I guess it's common sense that the longer you stay in bed, the more likely you will fall into sleep. Let's analyze this in another way.¶

Based on a study here, healthy adults are suggested to sleep between 7-9 hours. Therefore, I will categorized those who slept below 7 as Bad Sleepers, those who slept beyond 9 hours Oversleepers, and those between 7 and 9 hours as normal.¶

Also it seems that the users underslept the most on Tuesdays. Not sure why this is the case, I should try to investigate this.¶

Let's look back at my summary data based on the users' active status.¶

Let's analyze the heartrate dataset¶

Moving on to the summary:¶

Alright let's start with analyzing whether the heart-rate has anything to do with people undersleeping.¶

Let's look at the average heart-rate per minute for each hour of the day¶

Let's try analyzing daily steps¶

On weekdays, Tuesday has the most steps taken, whereas on weekends it has the most steps taken on Saturdays. I hyphothesize that since Tuesday seems to be the most busiest day among the users, this may be the very reason why users are mostly undersleeping on Tuesdays.¶

Is there a relationship between how active they are and their sleep schedule?¶

Since most of these users are lightly active, let's see if there's any relationship between light active users and their sleeping behaviours.¶

Looks like there's no correlation between those who lightly active and the amount of sleep they have.¶

What about those who are mostly sedentary?¶

There's a negative correlation! The more idle the person is during the day, it seems that they would also be getting fewer sleep than those who are more active!¶

I recommend bellabeat to take note of this insight and maybe consider sending notifications to users who might have been idle throughout the day to get some light exercises so as to allow them to have a better sleep later in the day.¶

What About those who are fairly active and very active?¶

Although there's a slight negative trend for Fairly active users, it doesn't seem to be a very strong correlation but still worth mentioning. Also, those who are very active don't seem to have a correlation as well.¶

On what hour do users are the most active during the day?¶

With this visualization I can see that:¶

On what day of the week are users the most active during the day?¶

Summary of my findings:¶