dataframe 针对列条件赋值
针对单列条件:
复制代码
#常规方式
import pandas as pd
df = pd.DataFrame({‘one’:[‘a’, ‘a’, ‘b’, ‘c’], ‘two’:[3,1,2,3], ‘three’:[‘C’,‘B’,‘C’,‘A’]})
print(df)
df.loc[df[‘two’]==2, ‘one’]=‘x’ #修改列 "one" 的值,推荐使用.loc
print(df)
df.one[df.two==2]=‘x’
print(df)
复制代码
复制代码
#函数方式
def fun(x):
if x >= 30:
return 1
else:
return 0
values= feature[‘values’].apply(lambda x: fun(x))
#若需要将改动赋值给原始的 feature 的列中的话,可以进行一次赋值
feature[‘values’]=values
#或者直接一次修改后赋值。
feature[‘values’]= feature[‘values’].apply(lambda x: fun(x))
复制代码
复制代码
import numpy as np
import pandas as pd
data = {‘city’: [‘Beijing’, ‘Shanghai’, ‘Guangzhou’, ‘Shenzhen’, ‘Hangzhou’, ‘Chongqing’],
‘year’: [2016,2016,2015,2017,2016, 2016], ‘population’: [2100, 2300, 1000, 700, 500, 500]}
frame = pd.DataFrame(data, columns = [‘year’, ‘city’, ‘population’, ‘debt’])
使用 apply 函数, 如果 city 字段包含’ing’关键词,则’判断’这一列赋值为 1, 否则为 0
frame[‘panduan’] = frame.city.apply(lambda x: 1 if ‘ing’ in x else 0)
print(frame)
复制代码
针对多列的条件:
复制代码
#常规方式
import pandas as pd
df = pd.DataFrame({‘one’:[‘a’, ‘a’, ‘b’, ‘c’], ‘two’:[3,1,2,3], ‘three’:[‘C’,‘B’,‘C’,‘A’]})
print(df)
df.loc[(df[‘two’]==2)| (df[‘three’]==‘A’), ‘one’]=‘x’# 推荐使用.loc
print(df)
df.loc[(df[‘two’]==2)& (df[‘three’]==‘C’), ‘one’]=‘x’# 推荐使用.loc
print(df)
复制代码
复制代码
import numpy as np
import pandas as pd
data = {‘city’: [‘Beijing’, ‘Shanghai’, ‘Guangzhou’, ‘Shenzhen’, ‘Hangzhou’, ‘Chongqing’],
‘year’: [2016,2016,2015,2017,2016, 2016],
‘population’: [2100, 2300, 1000, 700, 500, 500]}
frame = pd.DataFrame(data, columns = [‘year’, ‘city’, ‘population’, ‘debt’])
def function(a, b):
if ‘ing’ in a and b == 2016:
return 1
else:
return 0
frame[‘test’] = frame.apply(lambda x: function(x.city, x.year), axis = 1)
print(frame)
复制代码
复制代码
def win_or_loss(df):
cond_loss_1 = (df[‘gli_h’] < -80)& (df[‘sc_h’] > df[‘sc_g’])
cond_loss_2 = (df[‘gli_g’] < -80)& (df[‘sc_h’] < df[‘sc_g’])
cond_loss_3 = (df[‘gli_drew’] < -80)& (df[‘eur_h’] < df[‘eur_g’]) & (df[‘sc_h’] < df[‘sc_g’])
cond_loss_4 = (df[‘gli_drew’] < -80)& (df[‘eur_h’] > df[‘eur_g’]) & (df[‘sc_h’] > df[‘sc_g’])
cond_loss = cond_loss_1 | cond_loss_2 | cond_loss_3 | cond_loss_4
#
cond_win_1 = (df[‘gli_h’] < -80)& (df[‘sc_h’] < df[‘sc_g’])
cond_win_2 = (df[‘gli_g’] < -80)& (df[‘sc_h’] > df[‘sc_g’])
cond_win_3 = (df[‘gli_drew’] < -80)& (df[‘eur_h’] < df[‘eur_g’]) & (df[‘sc_h’] > df[‘sc_g’])
cond_win_4 = (df[‘gli_drew’] < -80)& (df[‘eur_h’] > df[‘eur_g’]) & (df[‘sc_h’] < df[‘sc_g’])
cond_win = cond_win_1 | cond_win_2 | cond_win_3 | cond_win_4
#
if cond_win:
return ‘win’
elif cond_loss:
return ‘loss’
else:
return ‘d’
def df_mark_win(df):
cond_price = (df[‘price_h’] > 1.9)& (df[‘price_drew’] > 1.9)& (df[‘price_g’] > 1.9)
cond_trd = (df[‘trade_h’] > 300000)| (df[‘trade_drew’] > 300000)| (df[‘trade_g’] > 300000)
cond_bfidx = (df[‘index_h’] > 80)| (df[‘index_drew’] > 80)| (df[‘index_g’] > 80)
cond_gli = (df[‘gli_h’]<-80)| (df[‘gli_drew’]<-80)| (df[‘gli_g’]<-80)
cond_hot = (df[‘hot_h’] > 80)| (df[‘hot_drew’] > 80)| (df[‘hot_g’] > 80)
df_rst = df.loc[cond_price & cond_trd & cond_bfidx & cond_gli & cond_hot].copy()
#用 copy() 避免在原 df 上操作避免报错
df_rst[‘result’] = df_rst.apply(lambda x: win_or_loss(x), axis=1)
return df_rst
复制代码