博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
5-pandas基础运算
阅读量:6878 次
发布时间:2019-06-26

本文共 6708 字,大约阅读时间需要 22 分钟。

hot3.png

#encoding:utf8import pandas as pdimport numpy as nps = pd.Series([1,2,3,4,5],index=list('acefh'))print(s)'''a    1c    2e    3f    4h    5'''print(s.index)'''Index(['a', 'c', 'e', 'f', 'h'], dtype='object')'''#重新索引并对Nan值赋值为0print(s.reindex(list('abcdefgh'),fill_value=0))'''a    1b    0c    2d    0e    3f    4g    0h    5'''#把Nan赋值为上一个非Nan的值(类比股票停盘的时候把停盘的值赋值为停盘之前的值)print(s.reindex(list('abcdefgh')))#method:bfill是把下一个的非Nan值赋值到Nan中print(s.reindex(list('abcdefgh'),method='ffill'))'''a    1.0b    NaNc    2.0d    NaNe    3.0f    4.0g    NaNh    5.0dtype: float64a    1b    1c    2d    2e    3f    4g    4h    5'''df = pd.DataFrame(np.random.randn(4,6),index=list('ADFH'),columns=['one','two','three','four','five','six'])print(df)'''        one       two     three      four      five       sixA  0.352770  0.302011  0.375550  1.804725 -0.494243 -0.467798D -0.246352 -1.346173 -0.194345 -0.050121 -1.695538 -0.666932F -1.134675  0.889683  0.603448  2.041425 -0.537469 -0.551439H  1.916636  0.433567  1.072732 -1.391239  0.732202 -0.829673'''#二维数组重索引行,填充Nan值df2 = df.reindex(index=list('ABCDEFGH'),fill_value=0)print(df2)'''        one       two     three      four      five       sixA  0.617191  0.687148  1.274273 -0.839415  0.792152 -0.536064B       NaN       NaN       NaN       NaN       NaN       NaNC       NaN       NaN       NaN       NaN       NaN       NaND -0.730075 -0.286531 -1.884375  1.139414 -0.169306  0.217407E       NaN       NaN       NaN       NaN       NaN       NaNF  1.132639  0.130489  0.894960  0.700022  0.825214 -1.424234G       NaN       NaN       NaN       NaN       NaN       NaNH -0.197997  1.464797 -0.733199 -0.366465 -0.709581  0.780381        one       two     three      four      five       sixA -0.741244  2.237643  0.596041 -1.825212  1.535922 -1.279042B  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000C  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000D  0.799521  0.453463  0.935007  0.469048 -1.783111 -0.145021E  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000F  0.355039 -0.500475 -0.444605 -0.559341  0.031650  1.377536G  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000H -0.363621  0.510240  0.088605 -1.108609 -0.799488  0.681844'''#二维数据列重索引print(df.reindex(columns=['one','two','three','four','five','six','seven'],fill_value=0))'''        one       two     three      four      five       six  sevenA  0.886400 -0.423722 -0.236410 -2.955891  1.138746  0.617567    NaND  0.604896  0.496586 -0.209181 -1.913454  0.022793 -2.085502    NaNF  1.120339 -0.510216 -2.438642 -0.648351 -0.047299 -0.569957    NaNH  1.390851 -0.539437 -0.378924 -0.976334  2.274232  0.002144    NaN        one       two     three      four      five       six  sevenA -1.548185 -0.310676 -0.441914  0.576015  0.969689 -0.450120      0D  0.247333 -0.559566 -0.352404  0.235390 -0.078221  0.990842      0F -0.582162  0.672071  0.582770  0.761390 -0.039544 -0.411953      0H  1.799309  0.494148  0.847326 -0.958537 -2.313566 -0.286750      0另外method方法只对行有效,列无效的'''#取消显示某行某列#但原数据不变print(df.drop('A'))#axis:0默认为行,axis:1为列print(df.drop(['one','two'],axis=1))print(df)'''        one       two     three      four      five       sixD  0.595548 -1.324211 -1.654202 -0.661661  0.461671  1.273477F  0.045223  0.951209  0.654337 -0.530489  1.707179  0.973863H  0.808623  0.627833  1.630329  0.287034  0.143080 -0.406583      three      four      five       sixA  2.154951  0.848024  1.028920  0.753677D -1.654202 -0.661661  0.461671  1.273477F  0.654337 -0.530489  1.707179  0.973863H  1.630329  0.287034  0.143080 -0.406583        one       two     three      four      five       sixA  1.413738  0.819763  2.154951  0.848024  1.028920  0.753677D  0.595548 -1.324211 -1.654202 -0.661661  0.461671  1.273477F  0.045223  0.951209  0.654337 -0.530489  1.707179  0.973863H  0.808623  0.627833  1.630329  0.287034  0.143080 -0.406583'''df = pd.DataFrame(np.arange(12).reshape(4,3),index=['one','two','three','four'],columns=list('ABC'))print(df)'''       A   B   Cone    0   1   2two    3   4   5three  6   7   8four   9  10  11'''#按列进行运算print(df.apply(lambda x: x.max() - x.min()))'''A    9B    9C    9'''#按行进行运算#axis:0为列默认,axis:1为行print(df.apply(lambda x: x.max() - x.min(),axis=1))'''one      2two      2three    2four     2'''#查看帮助help(df.apply)'''    axis : {0 or 'index', 1 or 'columns'}, default 0        * 0 or 'index': apply function to each column        * 1 or 'columns': apply function to each row'''#apply中可以传入更复杂的函数而不是lambda这样的匿名函数def min_max(x):    return pd.Series([x.min(),x.max()],index=['min','max'])print(df.apply(min_max))'''    A   B   Cmin  0   1   2max  9  10  11'''print(df.apply(min_max,axis=1))'''       min  maxone      0    2two      3    5three    6    8four     9   11'''#对dataframe中的小数取位数df = pd.DataFrame(np.random.randn(4,3),index=['one','two','three','four'],columns=list('ABC'))print(df)'''              A         B         Cone   -0.163500  1.513105  0.620532two   -0.372754  1.180852 -0.013991three -1.065681  0.286195 -1.399696four   1.042050 -0.251143 -1.671825'''formater = lambda x: '%.03f' %xprint(df.applymap(formater))'''            A       B       Cone     0.030  -0.223  -0.038two    -0.358  -0.020   0.557three   0.820  -0.646   0.296four    0.273   0.765   0.625'''#排序df = pd.DataFrame(np.random.randint(1,10,(4,3)),columns=['one','two','three'],index=list('ABCD'))print(df.sort_values(by='one',ascending=False))'''   one  two  threeC    1    4      1A    2    7      1D    6    7      1B    7    5      9   one  two  threeB    8    4      5C    8    1      8D    3    4      6A    2    2      2'''#元素的排名s = pd.Series([3,6,2,6,4])print(s.rank(method='first'))'''0    2.01    4.52    1.03    4.54    3.00    2.01    4.02    1.03    5.04    3.0先出现的排名较高,默认是method=average'''print(df)print(df.rank(method='first'))'''   one  two  threeA    7    1      4B    5    2      8C    4    3      9D    9    6      5   one  two  threeA  3.0  1.0    1.0B  2.0  2.0    3.0C  1.0  3.0    4.0D  4.0  4.0    2.0'''s = pd.Series(list('aaaabbbdbdbdbdjdjkfk'))print(s.value_counts())'''b    6d    5a    4k    2j    2f    1'''print(s.unique())'''['a' 'b' 'd' 'j' 'k' 'f']获取不重复的元素列表'''#判断是否是里面的值print(s.isin(['a','c','k']))'''0      True1      True2      True3      True4     False5     False6     False7     False8     False9     False10    False11    False12    False13    False14    False15    False16    False17     True18    False19     True'''print(s.isin(s.unique()))'''0     True1     True2     True3     True4     True5     True6     True7     True8     True9     True10    True11    True12    True13    True14    True15    True16    True17    True18    True19    True'''

 

转载于:https://my.oschina.net/eddylinux/blog/1528570

你可能感兴趣的文章
openSUSE13.2安装ruby和rails
查看>>
python 高级函数
查看>>
F.Cards with Numbers
查看>>
简单入门Buffer
查看>>
OO第四阶段总结
查看>>
javascript总结02
查看>>
创建windows服务
查看>>
HTML5 入门基础
查看>>
【转载】读懂IL代码就这么简单(二)
查看>>
C++文件操作(fstream)
查看>>
R语言学习路线图-转帖
查看>>
【导入导出】sqlldr 导入含有内嵌换行符的数据
查看>>
Linux中常用命令
查看>>
RDS最佳实践(四)—如何处理Mysql的子查询
查看>>
最大流:Dinic模板
查看>>
安卓开发中个人能力的进阶进程
查看>>
人工智能10年将有颠覆性改变
查看>>
探秘AOP实现原理
查看>>
单点登录(SSO)简介
查看>>
2018最新大数据学习路线分享
查看>>