|
|
@@ -0,0 +1,68 @@
|
|
|
+def kernal_smoothing(xaxis_values):
|
|
|
+
|
|
|
+ #USING KERNAL SMOOTHING WEIGHT METHOD TO SMOOTH THE CURVE FOR EACH ANSWER CHOICE
|
|
|
+
|
|
|
+ global presmoothing_data
|
|
|
+ global kernal_smoothing_weights_df
|
|
|
+ global all_weights_for_j
|
|
|
+ global items_df
|
|
|
+
|
|
|
+ pd.options.mode.chained_assignment = None
|
|
|
+
|
|
|
+ presmoothing_data = pd.DataFrame()
|
|
|
+ kernal_smoothing_weights_df = pd.DataFrame()
|
|
|
+ items_df = pd.DataFrame()
|
|
|
+ presmoothing_data = alldata_forplot[alldata_forplot['item_id'] == x ]
|
|
|
+
|
|
|
+ total_students = len(presmoothing_data.apptnumber.unique())
|
|
|
+ h = 1.1*(total_students**(-.2)) #bandwidth used in smoothing
|
|
|
+
|
|
|
+ items_df[xaxis_values] = [number for number in range(0,total_correct + 1)]
|
|
|
+ kernal_smoothing_weights_df['Freq'] = presmoothing_data.groupby(xaxis_values).size()
|
|
|
+ kernal_smoothing_weights_df = kernal_smoothing_weights_df.reset_index()
|
|
|
+ kernal_smoothing_weights_df = kernal_smoothing_weights_df.merge(pd.pivot_table(presmoothing_data.groupby([xaxis_values,'response']).size().to_frame('freq'), values='freq', index=[xaxis_values], columns=['response'], fill_value= 0).reset_index(),on=xaxis_values)
|
|
|
+ kernal_smoothing_weights_df['A_pct'] = kernal_smoothing_weights_df['A'] / kernal_smoothing_weights_df['Freq']
|
|
|
+ kernal_smoothing_weights_df['B_pct'] = kernal_smoothing_weights_df['B'] / kernal_smoothing_weights_df['Freq']
|
|
|
+ kernal_smoothing_weights_df['C_pct'] = kernal_smoothing_weights_df['C'] / kernal_smoothing_weights_df['Freq']
|
|
|
+ kernal_smoothing_weights_df['D_pct'] = kernal_smoothing_weights_df['D'] / kernal_smoothing_weights_df['Freq']
|
|
|
+
|
|
|
+ if MC_options_number == ["5MC"]:
|
|
|
+ kernal_smoothing_weights_df['E_pct'] = kernal_smoothing_weights_df['E'] / kernal_smoothing_weights_df['Freq']
|
|
|
+
|
|
|
+ kernal_smoothing_weights_df['kernal_smoothing_weight'] = 0
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_A'] = 0
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_B'] = 0
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_C'] = 0
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_D'] = 0
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_E'] = 0
|
|
|
+ kernal_smoothing_weights_df = items_df.merge(kernal_smoothing_weights_df, on=xaxis_values, how='left')
|
|
|
+ kernal_smoothing_weights_df = kernal_smoothing_weights_df.fillna(0)
|
|
|
+
|
|
|
+ kernal_smoothing_weights_df_original = kernal_smoothing_weights_df.copy(deep=True)
|
|
|
+
|
|
|
+ for correct in range(0,total_correct + 1):
|
|
|
+ all_weights_for_j = kernal_smoothing_weights_df_original.copy(deep=True)
|
|
|
+ kernal_smoothing_weights_df['bandwidth_var'] = (-1/(2*h))
|
|
|
+ kernal_smoothing_weights_df['var'] = statistics.variance(presmoothing_data[xaxis_values])
|
|
|
+
|
|
|
+ all_weights_for_j['kernal_smoothing_weight'] = np.exp(kernal_smoothing_weights_df['bandwidth_var'] * ((kernal_smoothing_weights_df['num_correct']- correct)**2) / kernal_smoothing_weights_df['var']) * (kernal_smoothing_weights_df['Freq'])
|
|
|
+
|
|
|
+ kernal_smoothing_weights_df['kernal_smoothing_weight'].iat[correct] = all_weights_for_j['kernal_smoothing_weight'].sum()
|
|
|
+ kernal_smoothing_weights_df['wgt_new'] = all_weights_for_j['kernal_smoothing_weight']
|
|
|
+ kernal_smoothing_weights_df = kernal_smoothing_weights_df.rename(columns={'wgt_new': 'wgt_' + str(correct)})
|
|
|
+
|
|
|
+ all_weights_for_j['smoothed_value_A'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['A_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
|
|
|
+ all_weights_for_j['smoothed_value_B'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['B_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
|
|
|
+ all_weights_for_j['smoothed_value_C'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['C_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
|
|
|
+ all_weights_for_j['smoothed_value_D'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['D_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
|
|
|
+
|
|
|
+ if MC_options_number == ["5MC"]:
|
|
|
+ all_weights_for_j['smoothed_value_E'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['E_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
|
|
|
+
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_A'].iat[correct] = all_weights_for_j['smoothed_value_A'].sum()
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_B'].iat[correct] = all_weights_for_j['smoothed_value_B'].sum()
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_C'].iat[correct] = all_weights_for_j['smoothed_value_C'].sum()
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_D'].iat[correct] = all_weights_for_j['smoothed_value_D'].sum()
|
|
|
+
|
|
|
+ if MC_options_number == ["5MC"]:
|
|
|
+ kernal_smoothing_weights_df['smoothed_value_E'][correct] = all_weights_for_j['smoothed_value_E'].sum()
|