judit
/
python


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
							def kernal_smoothing(xaxis_values):

    #USING KERNAL SMOOTHING WEIGHT METHOD TO SMOOTH THE CURVE FOR EACH ANSWER CHOICE 

    global presmoothing_data
    global kernal_smoothing_weights_df
    global all_weights_for_j
    global items_df

    pd.options.mode.chained_assignment = None

    presmoothing_data = pd.DataFrame()
    kernal_smoothing_weights_df = pd.DataFrame()
    items_df = pd.DataFrame()
    presmoothing_data = alldata_forplot[alldata_forplot['item_id'] == x ]

    total_students = len(presmoothing_data.apptnumber.unique())
    h = 1.1*(total_students**(-.2)) #bandwidth used in smoothing

    items_df[xaxis_values] = [number for number in range(0,total_correct + 1)] 
    kernal_smoothing_weights_df['Freq'] = presmoothing_data.groupby(xaxis_values).size()
    kernal_smoothing_weights_df = kernal_smoothing_weights_df.reset_index()
    kernal_smoothing_weights_df = kernal_smoothing_weights_df.merge(pd.pivot_table(presmoothing_data.groupby([xaxis_values,'response']).size().to_frame('freq'), values='freq', index=[xaxis_values], columns=['response'], fill_value= 0).reset_index(),on=xaxis_values)  
    kernal_smoothing_weights_df['A_pct'] = kernal_smoothing_weights_df['A'] / kernal_smoothing_weights_df['Freq']
    kernal_smoothing_weights_df['B_pct'] = kernal_smoothing_weights_df['B'] / kernal_smoothing_weights_df['Freq']
    kernal_smoothing_weights_df['C_pct'] = kernal_smoothing_weights_df['C'] / kernal_smoothing_weights_df['Freq']
    kernal_smoothing_weights_df['D_pct'] = kernal_smoothing_weights_df['D'] / kernal_smoothing_weights_df['Freq']

    if MC_options_number == ["5MC"]:
        kernal_smoothing_weights_df['E_pct'] = kernal_smoothing_weights_df['E'] / kernal_smoothing_weights_df['Freq']            

    kernal_smoothing_weights_df['kernal_smoothing_weight'] = 0
    kernal_smoothing_weights_df['smoothed_value_A'] = 0
    kernal_smoothing_weights_df['smoothed_value_B'] = 0
    kernal_smoothing_weights_df['smoothed_value_C'] = 0
    kernal_smoothing_weights_df['smoothed_value_D'] = 0
    kernal_smoothing_weights_df['smoothed_value_E'] = 0
    kernal_smoothing_weights_df = items_df.merge(kernal_smoothing_weights_df, on=xaxis_values, how='left') 
    kernal_smoothing_weights_df = kernal_smoothing_weights_df.fillna(0) 

    kernal_smoothing_weights_df_original = kernal_smoothing_weights_df.copy(deep=True)

    for correct in range(0,total_correct + 1):
        all_weights_for_j = kernal_smoothing_weights_df_original.copy(deep=True)
        kernal_smoothing_weights_df['bandwidth_var'] = (-1/(2*h))
        kernal_smoothing_weights_df['var'] = statistics.variance(presmoothing_data[xaxis_values])                                             

        all_weights_for_j['kernal_smoothing_weight'] = np.exp(kernal_smoothing_weights_df['bandwidth_var'] * ((kernal_smoothing_weights_df['num_correct']- correct)**2)  / kernal_smoothing_weights_df['var']) * (kernal_smoothing_weights_df['Freq'])

        kernal_smoothing_weights_df['kernal_smoothing_weight'].iat[correct] = all_weights_for_j['kernal_smoothing_weight'].sum()
        kernal_smoothing_weights_df['wgt_new'] = all_weights_for_j['kernal_smoothing_weight']
        kernal_smoothing_weights_df = kernal_smoothing_weights_df.rename(columns={'wgt_new': 'wgt_' + str(correct)})

        all_weights_for_j['smoothed_value_A'] =  ((kernal_smoothing_weights_df["wgt_" + str(correct)]  * kernal_smoothing_weights_df['A_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
        all_weights_for_j['smoothed_value_B'] =  ((kernal_smoothing_weights_df["wgt_" + str(correct)]  * kernal_smoothing_weights_df['B_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
        all_weights_for_j['smoothed_value_C'] =  ((kernal_smoothing_weights_df["wgt_" + str(correct)]  * kernal_smoothing_weights_df['C_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)
        all_weights_for_j['smoothed_value_D'] =  ((kernal_smoothing_weights_df["wgt_" + str(correct)]  * kernal_smoothing_weights_df['D_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)

        if MC_options_number == ["5MC"]:
            all_weights_for_j['smoothed_value_E'] =  ((kernal_smoothing_weights_df["wgt_" + str(correct)]  * kernal_smoothing_weights_df['E_pct']) / kernal_smoothing_weights_df['kernal_smoothing_weight']).replace([np.inf, -np.inf], 0)

        kernal_smoothing_weights_df['smoothed_value_A'].iat[correct] = all_weights_for_j['smoothed_value_A'].sum()
        kernal_smoothing_weights_df['smoothed_value_B'].iat[correct] = all_weights_for_j['smoothed_value_B'].sum()
        kernal_smoothing_weights_df['smoothed_value_C'].iat[correct] = all_weights_for_j['smoothed_value_C'].sum()
        kernal_smoothing_weights_df['smoothed_value_D'].iat[correct] = all_weights_for_j['smoothed_value_D'].sum()

        if MC_options_number == ["5MC"]:
            kernal_smoothing_weights_df['smoothed_value_E'][correct] = all_weights_for_j['smoothed_value_E'].sum()