#encoding:utf8import pandas as pdimport numpy as nps = pd.Series([1,2,3,4,5],index=list('acefh'))print(s)'''a 1c 2e 3f 4h 5'''print(s.index)'''Index(['a', 'c', 'e', 'f', 'h'], dtype='object')'''#重新索引并对Nan值赋值为0print(s.reindex(list('abcdefgh'),fill_value=0))'''a 1b 0c 2d 0e 3f 4g 0h 5'''#把Nan赋值为上一个非Nan的值(类比股票停盘的时候把停盘的值赋值为停盘之前的值)print(s.reindex(list('abcdefgh')))#method:bfill是把下一个的非Nan值赋值到Nan中print(s.reindex(list('abcdefgh'),method='ffill'))'''a 1.0b NaNc 2.0d NaNe 3.0f 4.0g NaNh 5.0dtype: float64a 1b 1c 2d 2e 3f 4g 4h 5'''df = pd.DataFrame(np.random.randn(4,6),index=list('ADFH'),columns=['one','two','three','four','five','six'])print(df)''' one two three four five sixA 0.352770 0.302011 0.375550 1.804725 -0.494243 -0.467798D -0.246352 -1.346173 -0.194345 -0.050121 -1.695538 -0.666932F -1.134675 0.889683 0.603448 2.041425 -0.537469 -0.551439H 1.916636 0.433567 1.072732 -1.391239 0.732202 -0.829673'''#二维数组重索引行,填充Nan值df2 = df.reindex(index=list('ABCDEFGH'),fill_value=0)print(df2)''' one two three four five sixA 0.617191 0.687148 1.274273 -0.839415 0.792152 -0.536064B NaN NaN NaN NaN NaN NaNC NaN NaN NaN NaN NaN NaND -0.730075 -0.286531 -1.884375 1.139414 -0.169306 0.217407E NaN NaN NaN NaN NaN NaNF 1.132639 0.130489 0.894960 0.700022 0.825214 -1.424234G NaN NaN NaN NaN NaN NaNH -0.197997 1.464797 -0.733199 -0.366465 -0.709581 0.780381 one two three four five sixA -0.741244 2.237643 0.596041 -1.825212 1.535922 -1.279042B 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000C 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000D 0.799521 0.453463 0.935007 0.469048 -1.783111 -0.145021E 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000F 0.355039 -0.500475 -0.444605 -0.559341 0.031650 1.377536G 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000H -0.363621 0.510240 0.088605 -1.108609 -0.799488 0.681844'''#二维数据列重索引print(df.reindex(columns=['one','two','three','four','five','six','seven'],fill_value=0))''' one two three four five six sevenA 0.886400 -0.423722 -0.236410 -2.955891 1.138746 0.617567 NaND 0.604896 0.496586 -0.209181 -1.913454 0.022793 -2.085502 NaNF 1.120339 -0.510216 -2.438642 -0.648351 -0.047299 -0.569957 NaNH 1.390851 -0.539437 -0.378924 -0.976334 2.274232 0.002144 NaN one two three four five six sevenA -1.548185 -0.310676 -0.441914 0.576015 0.969689 -0.450120 0D 0.247333 -0.559566 -0.352404 0.235390 -0.078221 0.990842 0F -0.582162 0.672071 0.582770 0.761390 -0.039544 -0.411953 0H 1.799309 0.494148 0.847326 -0.958537 -2.313566 -0.286750 0另外method方法只对行有效,列无效的'''#取消显示某行某列#但原数据不变print(df.drop('A'))#axis:0默认为行,axis:1为列print(df.drop(['one','two'],axis=1))print(df)''' one two three four five sixD 0.595548 -1.324211 -1.654202 -0.661661 0.461671 1.273477F 0.045223 0.951209 0.654337 -0.530489 1.707179 0.973863H 0.808623 0.627833 1.630329 0.287034 0.143080 -0.406583 three four five sixA 2.154951 0.848024 1.028920 0.753677D -1.654202 -0.661661 0.461671 1.273477F 0.654337 -0.530489 1.707179 0.973863H 1.630329 0.287034 0.143080 -0.406583 one two three four five sixA 1.413738 0.819763 2.154951 0.848024 1.028920 0.753677D 0.595548 -1.324211 -1.654202 -0.661661 0.461671 1.273477F 0.045223 0.951209 0.654337 -0.530489 1.707179 0.973863H 0.808623 0.627833 1.630329 0.287034 0.143080 -0.406583'''df = pd.DataFrame(np.arange(12).reshape(4,3),index=['one','two','three','four'],columns=list('ABC'))print(df)''' A B Cone 0 1 2two 3 4 5three 6 7 8four 9 10 11'''#按列进行运算print(df.apply(lambda x: x.max() - x.min()))'''A 9B 9C 9'''#按行进行运算#axis:0为列默认,axis:1为行print(df.apply(lambda x: x.max() - x.min(),axis=1))'''one 2two 2three 2four 2'''#查看帮助help(df.apply)''' axis : {0 or 'index', 1 or 'columns'}, default 0 * 0 or 'index': apply function to each column * 1 or 'columns': apply function to each row'''#apply中可以传入更复杂的函数而不是lambda这样的匿名函数def min_max(x): return pd.Series([x.min(),x.max()],index=['min','max'])print(df.apply(min_max))''' A B Cmin 0 1 2max 9 10 11'''print(df.apply(min_max,axis=1))''' min maxone 0 2two 3 5three 6 8four 9 11'''#对dataframe中的小数取位数df = pd.DataFrame(np.random.randn(4,3),index=['one','two','three','four'],columns=list('ABC'))print(df)''' A B Cone -0.163500 1.513105 0.620532two -0.372754 1.180852 -0.013991three -1.065681 0.286195 -1.399696four 1.042050 -0.251143 -1.671825'''formater = lambda x: '%.03f' %xprint(df.applymap(formater))''' A B Cone 0.030 -0.223 -0.038two -0.358 -0.020 0.557three 0.820 -0.646 0.296four 0.273 0.765 0.625'''#排序df = pd.DataFrame(np.random.randint(1,10,(4,3)),columns=['one','two','three'],index=list('ABCD'))print(df.sort_values(by='one',ascending=False))''' one two threeC 1 4 1A 2 7 1D 6 7 1B 7 5 9 one two threeB 8 4 5C 8 1 8D 3 4 6A 2 2 2'''#元素的排名s = pd.Series([3,6,2,6,4])print(s.rank(method='first'))'''0 2.01 4.52 1.03 4.54 3.00 2.01 4.02 1.03 5.04 3.0先出现的排名较高,默认是method=average'''print(df)print(df.rank(method='first'))''' one two threeA 7 1 4B 5 2 8C 4 3 9D 9 6 5 one two threeA 3.0 1.0 1.0B 2.0 2.0 3.0C 1.0 3.0 4.0D 4.0 4.0 2.0'''s = pd.Series(list('aaaabbbdbdbdbdjdjkfk'))print(s.value_counts())'''b 6d 5a 4k 2j 2f 1'''print(s.unique())'''['a' 'b' 'd' 'j' 'k' 'f']获取不重复的元素列表'''#判断是否是里面的值print(s.isin(['a','c','k']))'''0 True1 True2 True3 True4 False5 False6 False7 False8 False9 False10 False11 False12 False13 False14 False15 False16 False17 True18 False19 True'''print(s.isin(s.unique()))'''0 True1 True2 True3 True4 True5 True6 True7 True8 True9 True10 True11 True12 True13 True14 True15 True16 True17 True18 True19 True'''