kernal_smoothing.py 5.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. def kernal_smoothing(xaxis_values):
  2. #USING KERNAL SMOOTHING WEIGHT METHOD TO SMOOTH THE CURVE FOR EACH ANSWER CHOICE
  3. global presmoothing_data
  4. global kernal_smoothing_weights_df
  5. global all_weights_for_j
  6. global items_df
  7. pd.options.mode.chained_assignment = None
  8. presmoothing_data = pd.DataFrame()
  9. kernal_smoothing_weights_df = pd.DataFrame()
  10. items_df = pd.DataFrame()
  11. presmoothing_data = alldata_forplot[alldata_forplot['item_id'] == x ]
  12. total_students = len(presmoothing_data.apptnumber.unique())
  13. h = 1.1*(total_students**(-.2)) #bandwidth used in smoothing
  14. items_df[xaxis_values] = [number for number in range(0,total_correct + 1)]
  15. kernal_smoothing_weights_df['Freq'] = presmoothing_data.groupby(xaxis_values).size()
  16. kernal_smoothing_weights_df = kernal_smoothing_weights_df.reset_index()
  17. kernal_smoothing_weights_df = kernal_smoothing_weights_df.merge(pd.pivot_table(presmoothing_data.groupby([xaxis_values,'response']).size().to_frame('freq'), values='freq', index=[xaxis_values], columns=['response'], fill_value= 0).reset_index(),on=xaxis_values)
  18. kernal_smoothing_weights_df['A_pct'] = kernal_smoothing_weights_df['A'] / kernal_smoothing_weights_df['Freq']
  19. kernal_smoothing_weights_df['B_pct'] = kernal_smoothing_weights_df['B'] / kernal_smoothing_weights_df['Freq']
  20. kernal_smoothing_weights_df['C_pct'] = kernal_smoothing_weights_df['C'] / kernal_smoothing_weights_df['Freq']
  21. kernal_smoothing_weights_df['D_pct'] = kernal_smoothing_weights_df['D'] / kernal_smoothing_weights_df['Freq']
  22. if MC_options_number == ["5MC"]:
  23. kernal_smoothing_weights_df['E_pct'] = kernal_smoothing_weights_df['E'] / kernal_smoothing_weights_df['Freq']
  24. kernal_smoothing_weights_df['kernal_smoothing_weight'] = 0
  25. kernal_smoothing_weights_df['smoothed_value_A'] = 0
  26. kernal_smoothing_weights_df['smoothed_value_B'] = 0
  27. kernal_smoothing_weights_df['smoothed_value_C'] = 0
  28. kernal_smoothing_weights_df['smoothed_value_D'] = 0
  29. kernal_smoothing_weights_df['smoothed_value_E'] = 0
  30. kernal_smoothing_weights_df = items_df.merge(kernal_smoothing_weights_df, on=xaxis_values, how='left')
  31. kernal_smoothing_weights_df = kernal_smoothing_weights_df.fillna(0)
  32. kernal_smoothing_weights_df_original = kernal_smoothing_weights_df.copy(deep=True)
  33. for correct in range(0,total_correct + 1):
  34. all_weights_for_j = kernal_smoothing_weights_df_original.copy(deep=True)
  35. kernal_smoothing_weights_df['bandwidth_var'] = (-1/(2*h))
  36. kernal_smoothing_weights_df['var'] = statistics.variance(presmoothing_data[xaxis_values])
  37. all_weights_for_j['kernal_smoothing_weight'] = np.exp(kernal_smoothing_weights_df['bandwidth_var'] * ((kernal_smoothing_weights_df['num_correct']- correct)**2) / kernal_smoothing_weights_df['var']) * (kernal_smoothing_weights_df['Freq'])
  38. kernal_smoothing_weights_df['kernal_smoothing_weight'].iat[correct] = all_weights_for_j['kernal_smoothing_weight'].sum()
  39. kernal_smoothing_weights_df['wgt_new'] = all_weights_for_j['kernal_smoothing_weight']
  40. kernal_smoothing_weights_df = kernal_smoothing_weights_df.rename(columns={'wgt_new': 'wgt_' + str(correct)})
  41. all_weights_for_j['smoothed_value_A'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['A_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
  42. all_weights_for_j['smoothed_value_B'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['B_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
  43. all_weights_for_j['smoothed_value_C'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['C_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
  44. all_weights_for_j['smoothed_value_D'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['D_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
  45. if MC_options_number == ["5MC"]:
  46. all_weights_for_j['smoothed_value_E'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['E_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
  47. kernal_smoothing_weights_df['smoothed_value_A'].iat[correct] = all_weights_for_j['smoothed_value_A'].sum()
  48. kernal_smoothing_weights_df['smoothed_value_B'].iat[correct] = all_weights_for_j['smoothed_value_B'].sum()
  49. kernal_smoothing_weights_df['smoothed_value_C'].iat[correct] = all_weights_for_j['smoothed_value_C'].sum()
  50. kernal_smoothing_weights_df['smoothed_value_D'].iat[correct] = all_weights_for_j['smoothed_value_D'].sum()
  51. if MC_options_number == ["5MC"]:
  52. kernal_smoothing_weights_df['smoothed_value_E'][correct] = all_weights_for_j['smoothed_value_E'].sum()