本文共 1876 字,大约阅读时间需要 6 分钟。
# -*- coding: utf-8 -*-# @File : pandas_dataframe_add_new_class_demo.py# @Date : 2020-01-06 17:49# @Author : adminimport pandas as pdfrom matplotlib import pyplot as pltimport numpy as np;df=pd.read_csv("../../data/911.csv");df=df.head(10);print(df.head(5))#前5行的title列的数据#print(df[:5]["title"])###########################################################截取字符串,获取分类#################################jieque_list=df["title"].str.split(": ").tolist();catory_list=[m[0] for m in jieque_list]print(catory_list)r=np.array(catory_list)print(r)###########################################################在原矩阵中新加一列#################################df['category']=pd.DataFrame(np.array(catory_list).reshape(df.shape[0],1))print(df.head)###########################################################分类求条数#################################cate=df.groupby(by="category")["title"].count();print(cate)b=np.array([[2,4,1,5,6,1],[9,5,76,23,5,9]])print(b)
结果:
lat lng ... addr e
0 40.297876 -75.581294 ... REINDEER CT & DEAD END 1 1 40.258061 -75.264680 ... BRIAR PATH & WHITEMARSH LN 1 2 40.121182 -75.351975 ... HAWS AVE 1 3 40.116153 -75.343513 ... AIRY ST & SWEDE ST 1 4 40.251492 -75.603350 ... CHERRYWOOD CT & DEAD END 1[5 rows x 9 columns]
['EMS', 'EMS', 'Fire', 'EMS', 'EMS', 'EMS', 'EMS', 'EMS', 'EMS', 'Traffic'] ['EMS' 'EMS' 'Fire' 'EMS' 'EMS' 'EMS' 'EMS' 'EMS' 'EMS' 'Traffic'] <bound method NDFrame.head of lat lng ... e category 0 40.297876 -75.581294 ... 1 EMS 1 40.258061 -75.264680 ... 1 EMS 2 40.121182 -75.351975 ... 1 Fire 3 40.116153 -75.343513 ... 1 EMS 4 40.251492 -75.603350 ... 1 EMS 5 40.253473 -75.283245 ... 1 EMS 6 40.182111 -75.127795 ... 1 EMS 7 40.217286 -75.405182 ... 1 EMS 8 40.289027 -75.399590 ... 1 EMS 9 40.102398 -75.291458 ... 1 Traffic转载地址:http://mxtgi.baihongyu.com/