| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- def kernal_smoothing(xaxis_values):
- #USING KERNAL SMOOTHING WEIGHT METHOD TO SMOOTH THE CURVE FOR EACH ANSWER CHOICE
- global presmoothing_data
- global kernal_smoothing_weights_df
- global all_weights_for_j
- global items_df
- pd.options.mode.chained_assignment = None
- presmoothing_data = pd.DataFrame()
- kernal_smoothing_weights_df = pd.DataFrame()
- items_df = pd.DataFrame()
- presmoothing_data = alldata_forplot[alldata_forplot['item_id'] == x ]
- total_students = len(presmoothing_data.apptnumber.unique())
- h = 1.1*(total_students**(-.2)) #bandwidth used in smoothing
- items_df[xaxis_values] = [number for number in range(0,total_correct + 1)]
- kernal_smoothing_weights_df['Freq'] = presmoothing_data.groupby(xaxis_values).size()
- kernal_smoothing_weights_df = kernal_smoothing_weights_df.reset_index()
- kernal_smoothing_weights_df = kernal_smoothing_weights_df.merge(pd.pivot_table(presmoothing_data.groupby([xaxis_values,'response']).size().to_frame('freq'), values='freq', index=[xaxis_values], columns=['response'], fill_value= 0).reset_index(),on=xaxis_values)
- kernal_smoothing_weights_df['A_pct'] = kernal_smoothing_weights_df['A'] / kernal_smoothing_weights_df['Freq']
- kernal_smoothing_weights_df['B_pct'] = kernal_smoothing_weights_df['B'] / kernal_smoothing_weights_df['Freq']
- kernal_smoothing_weights_df['C_pct'] = kernal_smoothing_weights_df['C'] / kernal_smoothing_weights_df['Freq']
- kernal_smoothing_weights_df['D_pct'] = kernal_smoothing_weights_df['D'] / kernal_smoothing_weights_df['Freq']
- if MC_options_number == ["5MC"]:
- kernal_smoothing_weights_df['E_pct'] = kernal_smoothing_weights_df['E'] / kernal_smoothing_weights_df['Freq']
- kernal_smoothing_weights_df['kernal_smoothing_weight'] = 0
- kernal_smoothing_weights_df['smoothed_value_A'] = 0
- kernal_smoothing_weights_df['smoothed_value_B'] = 0
- kernal_smoothing_weights_df['smoothed_value_C'] = 0
- kernal_smoothing_weights_df['smoothed_value_D'] = 0
- kernal_smoothing_weights_df['smoothed_value_E'] = 0
- kernal_smoothing_weights_df = items_df.merge(kernal_smoothing_weights_df, on=xaxis_values, how='left')
- kernal_smoothing_weights_df = kernal_smoothing_weights_df.fillna(0)
- kernal_smoothing_weights_df_original = kernal_smoothing_weights_df.copy(deep=True)
- for correct in range(0,total_correct + 1):
- all_weights_for_j = kernal_smoothing_weights_df_original.copy(deep=True)
- kernal_smoothing_weights_df['bandwidth_var'] = (-1/(2*h))
- kernal_smoothing_weights_df['var'] = statistics.variance(presmoothing_data[xaxis_values])
- all_weights_for_j['kernal_smoothing_weight'] = np.exp(kernal_smoothing_weights_df['bandwidth_var'] * ((kernal_smoothing_weights_df['num_correct']- correct)**2) / kernal_smoothing_weights_df['var']) * (kernal_smoothing_weights_df['Freq'])
- kernal_smoothing_weights_df['kernal_smoothing_weight'].iat[correct] = all_weights_for_j['kernal_smoothing_weight'].sum()
- kernal_smoothing_weights_df['wgt_new'] = all_weights_for_j['kernal_smoothing_weight']
- kernal_smoothing_weights_df = kernal_smoothing_weights_df.rename(columns={'wgt_new': 'wgt_' + str(correct)})
- all_weights_for_j['smoothed_value_A'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['A_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
- all_weights_for_j['smoothed_value_B'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['B_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
- all_weights_for_j['smoothed_value_C'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['C_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
- all_weights_for_j['smoothed_value_D'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['D_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
- if MC_options_number == ["5MC"]:
- all_weights_for_j['smoothed_value_E'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['E_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
- kernal_smoothing_weights_df['smoothed_value_A'].iat[correct] = all_weights_for_j['smoothed_value_A'].sum()
- kernal_smoothing_weights_df['smoothed_value_B'].iat[correct] = all_weights_for_j['smoothed_value_B'].sum()
- kernal_smoothing_weights_df['smoothed_value_C'].iat[correct] = all_weights_for_j['smoothed_value_C'].sum()
- kernal_smoothing_weights_df['smoothed_value_D'].iat[correct] = all_weights_for_j['smoothed_value_D'].sum()
- if MC_options_number == ["5MC"]:
- kernal_smoothing_weights_df['smoothed_value_E'][correct] = all_weights_for_j['smoothed_value_E'].sum()
|