def kernal_smoothing(xaxis_values): #USING KERNAL SMOOTHING WEIGHT METHOD TO SMOOTH THE CURVE FOR EACH ANSWER CHOICE global presmoothing_data global kernal_smoothing_weights_df global all_weights_for_j global items_df pd.options.mode.chained_assignment = None presmoothing_data = pd.DataFrame() kernal_smoothing_weights_df = pd.DataFrame() items_df = pd.DataFrame() presmoothing_data = alldata_forplot[alldata_forplot['item_id'] == x ] total_students = len(presmoothing_data.apptnumber.unique()) h = 1.1*(total_students**(-.2)) #bandwidth used in smoothing items_df[xaxis_values] = [number for number in range(0,total_correct + 1)] kernal_smoothing_weights_df['Freq'] = presmoothing_data.groupby(xaxis_values).size() kernal_smoothing_weights_df = kernal_smoothing_weights_df.reset_index() kernal_smoothing_weights_df = kernal_smoothing_weights_df.merge(pd.pivot_table(presmoothing_data.groupby([xaxis_values,'response']).size().to_frame('freq'), values='freq', index=[xaxis_values], columns=['response'], fill_value= 0).reset_index(),on=xaxis_values) kernal_smoothing_weights_df['A_pct'] = kernal_smoothing_weights_df['A'] / kernal_smoothing_weights_df['Freq'] kernal_smoothing_weights_df['B_pct'] = kernal_smoothing_weights_df['B'] / kernal_smoothing_weights_df['Freq'] kernal_smoothing_weights_df['C_pct'] = kernal_smoothing_weights_df['C'] / kernal_smoothing_weights_df['Freq'] kernal_smoothing_weights_df['D_pct'] = kernal_smoothing_weights_df['D'] / kernal_smoothing_weights_df['Freq'] if MC_options_number == ["5MC"]: kernal_smoothing_weights_df['E_pct'] = kernal_smoothing_weights_df['E'] / kernal_smoothing_weights_df['Freq'] kernal_smoothing_weights_df['kernal_smoothing_weight'] = 0 kernal_smoothing_weights_df['smoothed_value_A'] = 0 kernal_smoothing_weights_df['smoothed_value_B'] = 0 kernal_smoothing_weights_df['smoothed_value_C'] = 0 kernal_smoothing_weights_df['smoothed_value_D'] = 0 kernal_smoothing_weights_df['smoothed_value_E'] = 0 kernal_smoothing_weights_df = items_df.merge(kernal_smoothing_weights_df, on=xaxis_values, how='left') kernal_smoothing_weights_df = kernal_smoothing_weights_df.fillna(0) kernal_smoothing_weights_df_original = kernal_smoothing_weights_df.copy(deep=True) for correct in range(0,total_correct + 1): all_weights_for_j = kernal_smoothing_weights_df_original.copy(deep=True) kernal_smoothing_weights_df['bandwidth_var'] = (-1/(2*h)) kernal_smoothing_weights_df['var'] = statistics.variance(presmoothing_data[xaxis_values]) all_weights_for_j['kernal_smoothing_weight'] = np.exp(kernal_smoothing_weights_df['bandwidth_var'] * ((kernal_smoothing_weights_df['num_correct']- correct)**2) / kernal_smoothing_weights_df['var']) * (kernal_smoothing_weights_df['Freq']) kernal_smoothing_weights_df['kernal_smoothing_weight'].iat[correct] = all_weights_for_j['kernal_smoothing_weight'].sum() kernal_smoothing_weights_df['wgt_new'] = all_weights_for_j['kernal_smoothing_weight'] kernal_smoothing_weights_df = kernal_smoothing_weights_df.rename(columns={'wgt_new': 'wgt_' + str(correct)}) all_weights_for_j['smoothed_value_A'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['A_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0) all_weights_for_j['smoothed_value_B'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['B_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0) all_weights_for_j['smoothed_value_C'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['C_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0) all_weights_for_j['smoothed_value_D'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['D_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0) if MC_options_number == ["5MC"]: all_weights_for_j['smoothed_value_E'] = ((kernal_smoothing_weights_df["wgt_" + str(correct)] * kernal_smoothing_weights_df['E_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0) kernal_smoothing_weights_df['smoothed_value_A'].iat[correct] = all_weights_for_j['smoothed_value_A'].sum() kernal_smoothing_weights_df['smoothed_value_B'].iat[correct] = all_weights_for_j['smoothed_value_B'].sum() kernal_smoothing_weights_df['smoothed_value_C'].iat[correct] = all_weights_for_j['smoothed_value_C'].sum() kernal_smoothing_weights_df['smoothed_value_D'].iat[correct] = all_weights_for_j['smoothed_value_D'].sum() if MC_options_number == ["5MC"]: kernal_smoothing_weights_df['smoothed_value_E'][correct] = all_weights_for_j['smoothed_value_E'].sum()