df2 = pd.concat([df, df['domain'].str.split('.', expand=True)], axis=1).drop('domain', axis=1)
print(df2)
# local 0 1
# A aaa xxx com
# B bbb yyy com
# C ccc zzz com
# D ddd None None
如果剩余的列很少,则只能选择与pd.concat()串联(联接)时所需的列。
df3 = pd.concat([df['local'], df['domain'].str.split('.', expand=True)], axis=1)
print(df3)
# local 0 1
# A aaa xxx com
# B bbb yyy com
# C ccc zzz com
# D ddd None None
要重命名特定的列,请使用rename()方法。
df3.rename(columns={0: 'second_LD', 1: 'TLD'}, inplace=True)
print(df3)
# local second_LD TLD
# A aaa xxx com
# B bbb yyy com
# C ccc zzz com
# D ddd None None
df = s_org.str.extract('(.+)@(.+)\.(.+)', expand=True)
print(df)
# 0 1 2
# A aaa xxx com
# B bbb yyy com
# C ccc zzz com
# D NaN NaN NaN
df = s_org.str.extract('(.+)@(.+)\.(.+)', expand=False)
print(df)
# 0 1 2
# A aaa xxx com
# B bbb yyy com
# C ccc zzz com
# D NaN NaN NaN
FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)
如果对正则表达式模式使用命名组(?P …),则该名称将按原样是列名。
df_name = s_org.str.extract('(?P<local>.*)@(?P<second_LD>.*)\.(?P<TLD>.*)', expand=True)
print(df_name)
# local second_LD TLD
# A aaa xxx com
# B bbb yyy com
# C ccc zzz com
# D NaN NaN NaN