Skip to content

Commit

Permalink
Added function docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Henry Hammond committed Aug 3, 2014
1 parent 0bba4f6 commit 5dac664
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 1 deletion.
26 changes: 26 additions & 0 deletions kcbo/statistical_tests/beta_binomial_difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,30 @@ def generate_text_description(self, summary_data):
return description

def conversion_test(dataframe, groups=None, groupcol='group', successcol='conversions', totalcol='total', samples=100000, **kwargs):
"""Beta-Binomial model Conversion t_test.py
Given a dataframe of the form:
|Group|Successes|Total|
|-----|---------|-----|
|A | <int>|<int>|
|B | <int>|<int>|
...
Compute estimates of the true proportion of successes and compare proportions from different groups.
Inputs:
dataframe -- Pandas dataframe of form above
groups -- (optional) list of groups to look at. Excluded looks at all groups
groupcol -- string for indexing dataframe column for groups
successcol -- string for indexing dataframe column for number of succcesses
totalcol -- string for indexing dataframe column for group totals
Returns:
(description, raw_data)
description: table describing output data
raw_data: dictionary of output data
"""
return BetaBinomialTest(dataframe, groups, groupcol, successcol, totalcol, samples, **kwargs).summary()
26 changes: 26 additions & 0 deletions kcbo/statistical_tests/lognormal_comparison_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,32 @@ def generate_text_description(self, summary_data):


def lognormal_comparison_test(dataframe, groups=None, groupcol='group', valuecol='value', **kwargs):
"""Lognormal Median Comparison
Given a dataframe of the form:
|Group |Observed Value|
|-------|--------------|
|<group>| <float>|
...
Compute estimates of the difference of medians between groups.
Note: This test assumes that input comes from distributions with the same variance.
Inputs:
dataframe -- Pandas dataframe of form above
groups -- (optional) list of groups to look at. Excluded looks at all groups
groupcol -- string for indexing dataframe column for groups
valuecol -- string for indexing dataframe column for values of observations
Returns:
(description, raw_data)
description: table describing output data
raw_data: dictionary of output data
"""

results = LognormalMedianComparison(
dataframe, groups=None, groupcol='group', valuecol='value', **kwargs)
return results.summary()
33 changes: 32 additions & 1 deletion kcbo/statistical_tests/t_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def initialize_test(self, dataframe, groups=None, groupcol='group', valuecol='va
self.burns = burns
self.thin = thin

self.progress_bar = kwargs.get('progress_bar',False)
if self.delay_statistic != True:
local_vars = locals()
del local_vars['self']
Expand Down Expand Up @@ -106,7 +107,7 @@ def run_model(self, key, df=None, groups=None, groupcol=None, valuecol=None, poo

# Generate our MCMC object and run sampler
mcmc = pm.MCMC(model)
mcmc.sample(iter=samples, burn=burns, thin=thin, progress_bar=False)
mcmc.sample(iter=samples, burn=burns, thin=thin, progress_bar=self.progress_bar)

self.mcmcs[key] = mcmc
self.complete_key(key)
Expand Down Expand Up @@ -197,6 +198,36 @@ def generate_text_description(self, summary_data):


def t_test(df, groups=None, groupcol='group', valuecol='value', pooling='default', samples=40000, burns=10000, thin=1, *args, **kwargs):
"""Bayesian t-Test
Given a dataframe of the form:
|Group |Observed Value|
|-------|--------------|
|<group>| <float>|
...
Perform pairwise t-Tests on groups
Inputs:
dataframe -- Pandas dataframe of form above
groups -- (optional) list of groups to look at. Excluded looks at all groups
groupcol -- string for indexing dataframe column for groups
valuecol -- string for indexing dataframe column for values of observations
pooling -- strategy for using pooled data in test.
* 'default' -- uses pairwise pooled data
* 'all' -- uses pooled data from all groups
samples -- number of samples to use in MCMC
burns -- number of burns to use in MCMC
thin -- thinning to use in MCMC
progress_bar -- boolean, show progress bar of sampler (PyMC progress bar)
Returns:
(description, raw_data)
description: table describing output data
raw_data: dictionary of output data
"""
test = TTest(df, groups, groupcol, valuecol,
pooling, samples, burns, thin, *args, **kwargs)
return test.summary()

0 comments on commit 5dac664

Please sign in to comment.