关于问卷数据的多选题独热编码处理

avatar
作者
筋斗云
阅读量:0
import pandas as pd  # 读取Excel文件 file_path = "量表编码.xlsx" data = pd.read_excel(file_path)  # 假设多选题的列名为 'multiple_choice_question' multiple_choice_cols = [     "7、",     "8、",     "9、",     "12、",     "14、",     "21、",     "25、", ]   # 处理多选题列,进行独热编码 def process_multiple_choice_columns(data, columns):     for col in columns:         if col in data.columns:             col_index = data.columns.get_loc(col)             multiple_choice_data = data[col].str.get_dummies("┋")             data = data.drop(columns=[col])             for new_col in multiple_choice_data.columns:                 data.insert(                     col_index, f"{col}_{new_col}", multiple_choice_data[new_col]                 )                 col_index += 1     return data   # 处理多选题列 data = process_multiple_choice_columns(data, multiple_choice_cols)   # 处理非数值型数据进行编码 def process_non_numeric_columns(data):     non_numeric_cols = data.select_dtypes(include=["object"]).columns     for col in non_numeric_cols:         data[col] = pd.Categorical(data[col]).codes     return data   # 编码非数值型数据 data = process_non_numeric_columns(data)  data_scaled = pd.DataFrame(data) data_scaled.to_excel("data_scaled.xlsx") 

广告一刻

为您即时展示最新活动产品广告消息,让您随时掌握产品活动新动态!