Fitting LoS distributions for HEP#

1. Imports#

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
from fitter import Fitter, get_common_distributions, get_distributions

2. Length-of-stay categories#

  • Primary Hip

  • Revision Hip

  • Primary Knee

  • Revision Knee

  • Unicompartmental Primary Knee

  • Delayed Discharge

3. Read in data#

HipKnee_1320_mffd = pd.read_csv("Test_data/HipKnee_with_los_delay_.csv", usecols = ['admission_date',
                                                                         'discharge_date',
                                                                         'spell_los',
                                                                         'mffd',
                                                                         'hip_prim_proc',
                                                                         'knee_prim_proc',
                                                                         'surgery_site'])

4. Calculate proportions of surgical types within Hips and Knees#

# All hips, all knees
length_hip = HipKnee_1320_mffd.loc[HipKnee_1320_mffd['surgery_site'] == "Site: hip"] 
length_knee = HipKnee_1320_mffd.loc[HipKnee_1320_mffd['surgery_site'] == "Site: knee"]

# hip types within all hips
primary_hip_n = HipKnee_1320_mffd.loc[HipKnee_1320_mffd['hip_prim_proc'] == "Primary THR"]
revision_hip_n = HipKnee_1320_mffd.loc[HipKnee_1320_mffd['hip_prim_proc'] == "Revision of hip"]
hip_resurfacing_n =  HipKnee_1320_mffd.loc[HipKnee_1320_mffd['hip_prim_proc'] == "Hip resurfacing"]

# knee types within all knees
primary_knee_n = HipKnee_1320_mffd.loc[HipKnee_1320_mffd['knee_prim_proc'] == "Primary TKR"]
revision_knee_n = HipKnee_1320_mffd.loc[HipKnee_1320_mffd['knee_prim_proc'] == "Revision of knee"]
unicompart_knee_n = HipKnee_1320_mffd.loc[HipKnee_1320_mffd['knee_prim_proc'] == "Primary UKR"]
# counts for both delayed and not delayed
print(len(primary_hip_n), len(revision_hip_n), len(hip_resurfacing_n), len(primary_knee_n), len(revision_knee_n), len(unicompart_knee_n))
print(len(primary_hip_n) + len(revision_hip_n) + len(hip_resurfacing_n) + len(primary_knee_n) + len(revision_knee_n) + len(unicompart_knee_n))
3057 482 52 2302 392 679
6964
# prop of hip types within hips
prop_prim_hip = primary_hip_n.shape[0] / length_hip.shape[0]
prop_rev_hip = revision_hip_n.shape[0] / length_hip.shape[0]
prop_hip_resurfacing = hip_resurfacing_n.shape[0] / length_hip.shape[0]
tot_hips = prop_prim_hip + prop_rev_hip + prop_hip_resurfacing

# prop knee types within knees
prop_prim_knee = primary_knee_n.shape[0] / length_knee.shape[0]
prop_rev_knee = revision_knee_n.shape[0] / length_knee.shape[0]
prop_uncompart_knee = unicompart_knee_n.shape[0] / length_knee.shape[0]
tot_knees = prop_prim_knee + prop_rev_knee + prop_uncompart_knee
#proportions for both delayed and not delayed by joint
print(prop_prim_hip, prop_rev_hip, prop_prim_knee, prop_rev_knee, prop_uncompart_knee)
0.8616121758737317 0.1358511837655017 0.6738875878220141 0.11475409836065574 0.1987704918032787

5. Calculate proportions of surgical types within classes: Primary and Revision#

# prop primary types within primaries
prop_primary_hips = primary_hip_n.shape[0] / (primary_hip_n.shape[0] + primary_knee_n.shape[0] + unicompart_knee_n.shape[0])
prop_primary_knees = primary_knee_n.shape[0] / (primary_knee_n.shape[0] + primary_hip_n.shape[0] + unicompart_knee_n.shape[0])
prop_unicompart_knees = unicompart_knee_n.shape[0] / (primary_knee_n.shape[0] + primary_hip_n.shape[0] + unicompart_knee_n.shape[0])

# prop revision types within revisions
prop_revision_hips = revision_hip_n.shape[0] / (revision_hip_n.shape[0] + revision_knee_n.shape[0])
prop_revision_knees = revision_knee_n.shape[0] / (revision_hip_n.shape[0] + revision_knee_n.shape[0])
#proportions for both delayed and not delayed by primary/revision
print(prop_primary_hips, prop_primary_knees, prop_unicompart_knees, prop_revision_hips, prop_revision_knees)
0.5062934746604836 0.3812520702219278 0.1124544551175886 0.551487414187643 0.448512585812357

6. Calculate proportion delayed#

  • Overall delay for all surgical types

# count of NOT delayed and proportion of delayed
delay_n = HipKnee_1320_mffd[HipKnee_1320_mffd['mffd'].isna()]
delay_prop = (HipKnee_1320_mffd.shape[0] - delay_n.shape[0]) / HipKnee_1320_mffd.shape[0]
delay = HipKnee_1320_mffd.shape[0] - delay_n.shape[0]
delay
529
# overall proportion of patients delayed
delay_prop
0.07596209075244112
# dataframes for checking - patients delayed per surgery
primary_knee_del = primary_knee_n[primary_knee_n['mffd'].notna()]
uni_knee_del = unicompart_knee_n[unicompart_knee_n['mffd'].notna()]
revise_knee_del = revision_knee_n[revision_knee_n['mffd'].notna()]
primary_hip_del = primary_hip_n[primary_hip_n['mffd'].notna()]
revise_hip_del = revision_hip_n[revision_hip_n['mffd'].notna()]

# df patients not delayed
primary_knee = primary_knee_n[primary_knee_n['mffd'].isna()]
uni_knee = unicompart_knee_n[unicompart_knee_n['mffd'].isna()]
revise_knee = revision_knee_n[revision_knee_n['mffd'].isna()]
primary_hip = primary_hip_n[primary_hip_n['mffd'].isna()]
revise_hip = revision_hip_n[revision_hip_n['mffd'].isna()]

# checking proportions per surgery - compare with overall proportion delayed
prop_primary_knee_del = primary_knee_del.shape[0] / (primary_knee.shape[0] + primary_knee_del.shape[0])
prop_revision_knee_del = revise_knee_del.shape[0] / (revise_knee.shape[0] + revise_knee_del.shape[0])
prop_unicom_knee_del = uni_knee_del.shape[0] / (uni_knee.shape[0] + uni_knee_del.shape[0])
prop_primary_hip_del = primary_hip_del.shape[0] / (primary_hip.shape[0] + primary_hip_del.shape[0])
prop_revison_hip_del = revise_hip_del.shape[0] / (revise_hip.shape[0] + revise_hip_del.shape[0])
# proportion of patients delayed per surgery type
print(prop_primary_hip_del, prop_primary_knee_del, prop_revison_hip_del, prop_revision_knee_del, prop_unicom_knee_del)
0.0775269872423945 0.06602953953084274 0.15767634854771784 0.1326530612244898 0.016200294550810016

7. Vectors for fitting no delay LoS#

  • Save to csv for using empirical distributions in model

primary_knee_v = primary_knee['spell_los'].tolist()
uni_knee_v = uni_knee['spell_los'].tolist()
revise_knee_v = revise_knee['spell_los'].tolist()
primary_hip_v = primary_hip['spell_los'].tolist()
revise_hip_v = revise_hip['spell_los'].tolist()

series_dict = {'Primary Knee': pd.Series(primary_knee_v),
               'Unicompart Knee': pd.Series(uni_knee_v),
               'Revision Knee': pd.Series(revise_knee_v),
               'Primary Hip': pd.Series(primary_hip_v),
               'Revision Hip': pd.Series(revise_hip_v)}

#save to df with 0 changed to very small number
df = pd.DataFrame(series_dict)
df.replace(to_replace = 0, value = 0.0001, inplace=True)
df.to_csv('Test_data/los_spells.csv', index=False)

######################################################
#save to df with 0 eliminated
a = [i for i in primary_knee_v if i != 0]
b = [i for i in uni_knee_v if i != 0]
c = [i for i in revise_knee_v if i != 0]
d = [i for i in primary_hip_v if i != 0]
e = [i for i in revise_hip_v if i != 0]

nonzero_dict = {'Primary Knee': pd.Series(a),
               'Unicompart Knee': pd.Series(b),
               'Revision Knee': pd.Series(c),
               'Primary Hip': pd.Series(d),
               'Revision Hip': pd.Series(e)}

df2 = pd.DataFrame(nonzero_dict)
df2.to_csv('Test_data/los_spells_no_zero.csv', index=False)

8. Los vector for delayed discharge (all surgical types)#

delayed_los = HipKnee_1320_mffd[HipKnee_1320_mffd['mffd'].notna()]
delayed_los_v = delayed_los['spell_los'].tolist()

9. Visualise LoS distributions#

#plt.hist(primary_knee_v, bins=50);

sns.set_style('white')
sns.set_context("paper", font_scale = 2)

sns.displot(data=primary_knee_v, kind="hist", bins = 50, aspect = 1.5)
<seaborn.axisgrid.FacetGrid at 0x7fe96dbff970>
../../_images/4406d282ee5d024220fb93aaaca8f4907c3b42aa764ec1596fe51fefdf376bcf.png
#plt.hist(uni_knee_v, bins=20);

sns.set_style('white')
sns.set_context("paper", font_scale = 2)

sns.displot(data=uni_knee_v, kind="hist", bins = 20, aspect = 1.5)
<seaborn.axisgrid.FacetGrid at 0x7fe965960910>
../../_images/cf54bdcad1ccd8c5688070901481de2434c61883bb58198b8052912a24b0d119.png
#plt.hist(revise_knee_v, bins=50);

sns.set_style('white')
sns.set_context("paper", font_scale = 2)

sns.displot(data=revise_knee_v, kind="hist", bins = 50, aspect = 1.5)
<seaborn.axisgrid.FacetGrid at 0x7fe96dbffa30>
../../_images/7023e8f20f98ff493f4dca7f46d842ad9bbf455c92b4f85b0bddbdcbd8befcf0.png
#plt.hist(primary_hip_v, bins=50);
sns.set_style('white')
sns.set_context("paper", font_scale = 2)

sns.displot(data=primary_hip_v, kind="hist", bins = 50, aspect = 1.5)
<seaborn.axisgrid.FacetGrid at 0x7fe9657776a0>
../../_images/20ffa12bb7b78dcdb3c3d7e47f807077e948f7be0a6a8421e7529a74d62bedb6.png
#plt.hist(revise_hip_v, bins=50);

sns.set_style('white')
sns.set_context("paper", font_scale = 2)

sns.displot(revise_hip_v, kind="hist", bins = 50, aspect = 1.5)
<seaborn.axisgrid.FacetGrid at 0x7fe96555e040>
../../_images/ca3789c9b23ed6a4205ae03e75fe6366d2acc27a4178e89d783bc89cd49146c9.png
#plt.hist(delayed_los_v, bins=50);

sns.set_style('white')
sns.set_context("paper", font_scale = 2)

sns.displot(delayed_los_v, kind="hist", bins = 50, aspect = 1.5)
<seaborn.axisgrid.FacetGrid at 0x7fe965960cd0>
../../_images/c0c6714d4e6cc9d874967873ea7c7dc2976ce0726ff6e7b138349e1e091091c5.png
delay_los_pdv = pd.Series(delayed_los_v)
print("delay_los", delay_los_pdv.describe())

primary_knee_pdv = pd.Series(primary_knee_v)
print("primary_knee", primary_knee_pdv.describe())

uni_knee_pdv = pd.Series(uni_knee_v)
print("uni_knee", uni_knee_pdv.describe())

revise_knee_pdv = pd.Series(revise_knee_v)
print("revise_knee", revise_knee_pdv.describe())

primary_hip_pdv = pd.Series(primary_hip_v)
print("primary_hip", primary_hip_pdv.describe())

revise_hip_pdv = pd.Series(revise_hip_v)
print("revise_hip", revise_hip_pdv.describe())
delay_los count    529.000000
mean      16.521739
std       15.153132
min        2.000000
25%        8.000000
50%       13.000000
75%       19.000000
max      156.000000
dtype: float64
primary_knee count    2150.000000
mean        4.651163
std         2.828129
min         0.000000
25%         3.000000
50%         4.000000
75%         6.000000
max        42.000000
dtype: float64
uni_knee count    668.000000
mean       2.914671
std        2.136334
min        0.000000
25%        2.000000
50%        3.000000
75%        4.000000
max       16.000000
dtype: float64
revise_knee count    340.000000
mean       7.194118
std        7.598554
min        0.000000
25%        3.000000
50%        5.000000
75%        9.000000
max       63.000000
dtype: float64
primary_hip count    2820.000000
mean        4.433333
std         2.949526
min         0.000000
25%         3.000000
50%         4.000000
75%         5.000000
max        44.000000
dtype: float64
revise_hip count    406.000000
mean       6.908867
std        6.965812
min        0.000000
25%        3.000000
50%        5.000000
75%        8.000000
max       86.000000
dtype: float64

10. Convert to NumPy arrays for use in Fitter#

delay_los_npv = np.asarray(delayed_los_v)
primary_knee_npv = np.asarray(primary_knee_v)
uni_knee_npv = np.asarray(uni_knee_v)
revise_knee_npv = np.asarray(revise_knee_v)
primary_hip_npv = np.asarray(primary_hip_v)
revise_hip_npv = np.asarray(revise_hip_v)

11. Fit distributions#

get_distributions()
['_fit',
 'alpha',
 'anglit',
 'arcsine',
 'argus',
 'beta',
 'betaprime',
 'bradford',
 'burr',
 'burr12',
 'cauchy',
 'chi',
 'chi2',
 'cosine',
 'crystalball',
 'dgamma',
 'dweibull',
 'erlang',
 'expon',
 'exponnorm',
 'exponpow',
 'exponweib',
 'f',
 'fatiguelife',
 'fisk',
 'foldcauchy',
 'foldnorm',
 'gamma',
 'gausshyper',
 'genexpon',
 'genextreme',
 'gengamma',
 'genhalflogistic',
 'genhyperbolic',
 'geninvgauss',
 'genlogistic',
 'gennorm',
 'genpareto',
 'gibrat',
 'gilbrat',
 'gompertz',
 'gumbel_l',
 'gumbel_r',
 'halfcauchy',
 'halfgennorm',
 'halflogistic',
 'halfnorm',
 'hypsecant',
 'invgamma',
 'invgauss',
 'invweibull',
 'johnsonsb',
 'johnsonsu',
 'kappa3',
 'kappa4',
 'ksone',
 'kstwo',
 'kstwobign',
 'laplace',
 'laplace_asymmetric',
 'levy',
 'levy_l',
 'levy_stable',
 'loggamma',
 'logistic',
 'loglaplace',
 'lognorm',
 'loguniform',
 'lomax',
 'maxwell',
 'mielke',
 'moyal',
 'nakagami',
 'ncf',
 'nct',
 'ncx2',
 'norm',
 'norminvgauss',
 'pareto',
 'pearson3',
 'powerlaw',
 'powerlognorm',
 'powernorm',
 'rayleigh',
 'rdist',
 'recipinvgauss',
 'reciprocal',
 'rice',
 'rv_continuous',
 'rv_histogram',
 'semicircular',
 'skewcauchy',
 'skewnorm',
 'studentized_range',
 't',
 'trapezoid',
 'trapz',
 'triang',
 'truncexpon',
 'truncnorm',
 'truncweibull_min',
 'tukeylambda',
 'uniform',
 'vonmises',
 'vonmises_line',
 'wald',
 'weibull_max',
 'weibull_min',
 'wrapcauchy']
f = Fitter(delay_los_npv,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon',
                          'erlang'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 5 distributions: 100%|████████████████████| 5/5 [00:00<00:00, 27.36it/s]
         sumsquare_error          aic          bic  kl_div  ks_statistic  \
lognorm         0.004644  1608.122559 -6140.474568     inf      0.051474   
beta            0.005357  1924.655190 -6058.553177     inf      0.087312   
erlang          0.005441  1895.761508 -6056.655095     inf      0.088753   
expon           0.009436  1599.608397 -5771.632045     inf      0.162747   
gamma           0.035865  6380.344444 -5059.053541     inf      0.921926   

            ks_pvalue  
lognorm  1.170505e-01  
beta     5.866521e-04  
erlang   4.477466e-04  
expon    1.037166e-12  
gamma    0.000000e+00  
{'lognorm': {'s': 0.6977106156387711,
  'loc': 0.46203959259225885,
  'scale': 12.381893465319441}}
../../_images/1a4e17eee7eceb1f9cd3ca44dae60cea77abac15082c477ab9347c3774973fa2.png
f = Fitter(primary_knee_npv,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon',
                          'erlang'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 5 distributions: 100%|████████████████████| 5/5 [00:00<00:00, 21.14it/s]
         sumsquare_error          aic           bic  kl_div  ks_statistic  \
lognorm         0.578484  1860.575987 -17651.201383     inf      0.151704   
expon           0.792283  1214.423450 -16982.682686     inf      0.363244   
beta            0.904699  1153.425425 -16682.064683     inf      0.427036   
erlang          0.957660  4442.059841 -16567.424259     inf      0.992310   
gamma           1.696721  2924.646338 -15337.709648     inf      0.872199   

             ks_pvalue  
lognorm   1.150099e-43  
expon    1.064843e-254  
beta      0.000000e+00  
erlang    0.000000e+00  
gamma     0.000000e+00  

{'lognorm': {'s': 0.4054742453283533,
  'loc': -1.0156970361318063,
  'scale': 5.187968679570998}}
../../_images/fd61eaf8184034e8d3c7497f17c0fe295d74082e38605ebbd353652c78c1f0ae.png
f = Fitter(uni_knee_pdv,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon',
                          'erlang'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 5 distributions: 100%|████████████████████| 5/5 [00:00<00:00, 31.50it/s]
         sumsquare_error          aic          bic  kl_div  ks_statistic  \
lognorm         6.349747   924.541981 -3090.610424     inf      0.171962   
gamma           6.363627   945.514562 -3089.151847     inf      0.167334   
beta            6.373490   921.072492 -3081.613030     inf      0.168304   
expon           6.603280   766.898464 -3070.961471     inf      0.271955   
erlang          7.357514  3288.577391 -2992.209312     inf      0.919689   

            ks_pvalue  
lognorm  9.648984e-18  
gamma    8.102790e-17  
beta     5.212017e-17  
expon    3.872916e-44  
erlang   0.000000e+00  

{'lognorm': {'s': 0.3803355622865142,
  'loc': -2.1623715729774657,
  'scale': 4.71533144760631}}
../../_images/7a7cd1ca71a53f0ca26aef78e55331b2ade5adf60ce1c74a1994e8094b5016e3.png
f = Fitter(revise_knee_npv,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon',
                          'erlang'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 5 distributions: 100%|████████████████████| 5/5 [00:00<00:00, 25.81it/s]
         sumsquare_error          aic          bic  kl_div  ks_statistic  \
expon           0.068790  1274.368194 -2880.262979     inf      0.123337   
beta            0.100426  1301.742494 -2739.959169     inf      0.310421   
lognorm         0.118113  1229.635547 -2690.633827     inf      0.366558   
erlang          0.170774  5154.728107 -2565.275127     inf      0.895442   
gamma           0.398495  4032.310584 -2277.175334     inf      0.699071   

             ks_pvalue  
expon     5.759763e-05  
beta      1.342060e-29  
lognorm   1.816040e-41  
erlang    0.000000e+00  
gamma    5.534748e-167  
{'expon': {'loc': 0.0, 'scale': 7.194117647058824}}
../../_images/841cba0e2ad89bf57ce6d352b5bece13dc8544138cfc3e70bf36709135f79f25.png
f = Fitter(primary_hip_npv,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon',
                          'erlang'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 5 distributions: 100%|████████████████████| 5/5 [00:00<00:00, 21.66it/s]
         sumsquare_error          aic           bic  kl_div  ks_statistic  \
lognorm         0.509158  1963.042781 -24283.125652     inf      0.166587   
beta            0.516464  2432.283680 -24235.005142     inf      0.168023   
expon           0.654193  1294.311552 -23584.255723     inf      0.334257   
gamma           0.686329  1119.561080 -23441.078834     inf      0.325523   
erlang          0.828594  4614.364362 -22909.864887     inf      0.962388   

             ks_pvalue  
lognorm   7.205752e-69  
beta      4.630348e-70  
expon    2.091397e-281  
gamma    1.536661e-266  
erlang    0.000000e+00  

{'lognorm': {'s': 0.3615166592899655,
  'loc': -2.4363376840086204,
  'scale': 6.4116655364890836}}
../../_images/c820446981a2a6d67b1ca5b6a0200a1ebd25fa07f0fb02f847cf70e230098d17.png
f = Fitter(revise_hip_npv,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon',
                          'erlang'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 5 distributions: 100%|████████████████████| 5/5 [00:00<00:00, 21.51it/s]
         sumsquare_error          aic          bic  kl_div  ks_statistic  \
lognorm         0.022496  1733.019612 -3961.087365     inf      0.107411   
beta            0.033304  2128.183021 -3795.802654     inf      0.142546   
expon           0.059486  1635.338315 -3572.307506     inf      0.241396   
erlang          0.130983  5711.381127 -3245.831210     inf      0.983865   
gamma           0.138877  3996.315953 -3222.070957     inf      0.938027   

            ks_pvalue  
lognorm  1.559684e-04  
beta     1.162057e-07  
expon    2.614308e-21  
erlang   0.000000e+00  
gamma    0.000000e+00  

{'lognorm': {'s': 0.6152292643165856,
  'loc': -0.934387442319201,
  'scale': 6.350152207311825}}
../../_images/6821ffc17814e1a68bd0e566d571d9f8e6f78dd46efce39e49fc5f000c49abc8.png
f.get_best(method = 'sumsquare_error')
{'lognorm': {'s': 0.6152292643165856,
  'loc': -0.934387442319201,
  'scale': 6.350152207311825}}

12. Clip arrays - optional truncation of los data#

delay_los_npv_clip = np.asarray(delayed_los_v) # leave as is
primary_knee_npv_clip = np.clip(np.asarray(primary_knee_v), 0, 30)
uni_knee_npv_clip = np.clip(np.asarray(uni_knee_v), 0, 30)
revise_knee_npv_clip = np.clip(np.asarray(revise_knee_v), 0, 30)
primary_hip_npv_clip = np.clip(np.asarray(primary_hip_v), 0, 30)
revise_hip_npv_clip = np.clip(np.asarray(revise_hip_v), 0, 30)

12.1 Fit clipped data#

f = Fitter(primary_knee_npv_clip,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 4 distributions: 100%|████████████████████| 4/4 [00:00<00:00, 28.55it/s]
         sumsquare_error          aic           bic  kl_div  ks_statistic  \
lognorm         1.352282  1383.035323 -15825.554283     inf      0.151034   
gamma           1.369003  1633.491636 -15799.132769     inf      0.167515   
beta            1.371934  1592.110062 -15786.860642     inf      0.166926   
expon           1.651798   957.270922 -15403.074367     inf      0.363888   

             ks_pvalue  
lognorm   2.775736e-43  
gamma     3.330412e-53  
beta      7.852058e-53  
expon    1.245667e-255  

{'lognorm': {'s': 0.40225541615533966,
  'loc': -1.0399482363715324,
  'scale': 5.212995574296882}}
../../_images/f1d2ba69acde07c729e174c870a94f164d93ba0a9adfb8cc8e9f33ae6f8d4c7a.png
f = Fitter(uni_knee_npv_clip,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 4 distributions: 100%|████████████████████| 4/4 [00:00<00:00, 43.09it/s]
         sumsquare_error         aic          bic  kl_div  ks_statistic  \
lognorm         6.349747  924.541981 -3090.610424     inf      0.171962   
gamma           6.363627  945.514562 -3089.151847     inf      0.167334   
beta            6.373490  921.072492 -3081.613030     inf      0.168304   
expon           6.603280  766.898464 -3070.961471     inf      0.271955   

            ks_pvalue  
lognorm  9.648984e-18  
gamma    8.102790e-17  
beta     5.212017e-17  
expon    3.872916e-44  

{'lognorm': {'s': 0.3803355622865142,
  'loc': -2.1623715729774657,
  'scale': 4.71533144760631}}
../../_images/8a87aa34803e870b4f19de340a3bed470b920c62ab1607827920cdc6e4dab5ab.png
f = Fitter(revise_knee_npv_clip,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 4 distributions: 100%|████████████████████| 4/4 [00:00<00:00, 29.05it/s]
         sumsquare_error          aic          bic  kl_div  ks_statistic  \
expon           0.553174   824.389317 -2171.491717     inf      0.134110   
beta            0.567029   862.802255 -2151.423185     inf      0.246918   
lognorm         0.614330  1100.527541 -2130.010433     inf      0.399620   
gamma           0.635206   904.170953 -2118.648631     inf      0.369219   

            ks_pvalue  
expon    8.562804e-06  
beta     9.612934e-19  
lognorm  1.649474e-49  
gamma    4.379593e-42  

{'expon': {'loc': 0.0, 'scale': 6.920588235294118}}
../../_images/4dd0b5d9fb1b9064892a337977692707a6152fa7170f3ff91625b656abab1d22.png
f = Fitter(primary_hip_npv_clip,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 4 distributions: 100%|████████████████████| 4/4 [00:00<00:00, 29.92it/s]
         sumsquare_error          aic           bic  kl_div  ks_statistic  \
lognorm         1.309768  1377.265202 -21618.657207     inf      0.165827   
gamma           1.321269  1546.157021 -21594.003778     inf      0.166356   
beta            1.324767  1505.611887 -21578.602914     inf      0.169617   
expon           1.537796   979.324423 -21173.990919     inf      0.334836   

             ks_pvalue  
lognorm   3.051803e-68  
gamma     1.117626e-68  
beta      2.135810e-71  
expon    2.091822e-282  

{'lognorm': {'s': 0.35741083072060453,
  'loc': -2.489064861474697,
  'scale': 6.466619066862258}}
../../_images/b36f7dcdeaf9117e1938d78b983d50c103174b1459d9df776ec1ea4a4bab73da.png
f = Fitter(revise_hip_npv_clip,
           distributions=['gamma',
                          'lognorm',
                          'beta',
                          'expon'
                          ]);
f.fit()
print(f.summary())
f.get_best(method = 'sumsquare_error')
Fitting 4 distributions: 100%|████████████████████| 4/4 [00:00<00:00, 44.59it/s]
         sumsquare_error         aic          bic  kl_div  ks_statistic  \
lognorm         0.780455  903.666307 -2521.198708     inf      0.106709   
beta            0.796737  919.212449 -2506.809528     inf      0.132526   
gamma           0.796812  916.728918 -2512.777811     inf      0.132578   
expon           0.885004  833.485595 -2476.164953     inf      0.251605   

            ks_pvalue  
lognorm  1.764127e-04  
beta     1.116592e-06  
gamma    1.104079e-06  
expon    3.889626e-23  

{'lognorm': {'s': 0.5796389432896831,
  'loc': -1.102767801493329,
  'scale': 6.517280834355917}}
../../_images/e8e9e8f26098bc6e3993a41ca2de2dea444170674a2b80810365bf77d7a52a46.png

12.2 Summarise clipped data#

delay_los_pdv = pd.Series(delay_los_npv_clip)
print("delay_los", delay_los_pdv.describe())

primary_knee_pdv = pd.Series(primary_knee_npv_clip)
print("primary_knee", primary_knee_pdv.describe())

uni_knee_pdv = pd.Series(uni_knee_npv_clip)
print("uni_knee", uni_knee_pdv.describe())

revise_knee_pdv = pd.Series(revise_knee_npv_clip)
print("revise_knee", revise_knee_pdv.describe())

primary_hip_pdv = pd.Series(primary_hip_npv_clip)
print("primary_hip", primary_hip_pdv.describe())

revise_hip_pdv = pd.Series(revise_hip_npv_clip)
print("revise_hip", revise_hip_pdv.describe())
delay_los count    529.000000
mean      16.521739
std       15.153132
min        2.000000
25%        8.000000
50%       13.000000
75%       19.000000
max      156.000000
dtype: float64
primary_knee count    2150.000000
mean        4.642326
std         2.731335
min         0.000000
25%         3.000000
50%         4.000000
75%         6.000000
max        30.000000
dtype: float64
uni_knee count    668.000000
mean       2.914671
std        2.136334
min        0.000000
25%        2.000000
50%        3.000000
75%        4.000000
max       16.000000
dtype: float64
revise_knee count    340.000000
mean       6.920588
std        6.291085
min        0.000000
25%        3.000000
50%        5.000000
75%        9.000000
max       30.000000
dtype: float64
primary_hip count    2820.000000
mean        4.425887
std         2.869115
min         0.000000
25%         3.000000
50%         4.000000
75%         5.000000
max        30.000000
dtype: float64
revise_hip count    406.000000
mean       6.665025
std        5.232482
min        0.000000
25%        3.000000
50%        5.000000
75%        8.000000
max       30.000000
dtype: float64