机器学习记录
Pandas
conda install pandas
数据
git clone https://github.com/KeithGalli/pandas .git
练习
python">import pandas as pd
python">data_dir = "/data_dir"
python">df = pd. read_csv( f' { data_dir} /pandas /pokemon_data.csv' )
df. shape
(800, 12)
python">df. head( )
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False
python">df = pd. read_excel( f' { data_dir} /pandas /pokemon_data.xlsx' )
df. shape
(800, 12)
python">df. head( )
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False
python">def read ( ) :
global df
df = pd. read_csv( f' { data_dir} /pandas /pokemon_data.csv' )
read( )
df. columns
Index(['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
'Sp. Def', 'Speed', 'Generation', 'Legendary'],
dtype='object')
python">df. Name
0 Bulbasaur
1 Ivysaur
2 Venusaur
3 VenusaurMega Venusaur
4 Charmander
...
795 Diancie
796 DiancieMega Diancie
797 HoopaHoopa Confined
798 HoopaHoopa Unbound
799 Volcanion
Name: Name, Length: 800, dtype: object
python">df[ 'Name' ]
0 Bulbasaur
1 Ivysaur
2 Venusaur
3 VenusaurMega Venusaur
4 Charmander
...
795 Diancie
796 DiancieMega Diancie
797 HoopaHoopa Confined
798 HoopaHoopa Unbound
799 Volcanion
Name: Name, Length: 800, dtype: object
python">df[ [ 'Name' , 'HP' , 'Speed' ] ]
Name HP Speed 0 Bulbasaur 45 45 1 Ivysaur 60 60 2 Venusaur 80 80 3 VenusaurMega Venusaur 80 80 4 Charmander 39 65 ... ... ... ... 795 Diancie 50 50 796 DiancieMega Diancie 50 110 797 HoopaHoopa Confined 80 70 798 HoopaHoopa Unbound 80 80 799 Volcanion 80 70
800 rows × 3 columns
python">
df. iloc[ 1 ]
# 2
Name Ivysaur
Type 1 Grass
Type 2 Poison
HP 60
Attack 62
Defense 63
Sp. Atk 80
Sp. Def 80
Speed 60
Generation 1
Legendary False
Name: 1, dtype: object
python">
df. iloc[ 0 : 4 ]
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False
python">
df. iloc[ 2 , 2 ]
'Grass'
python">
0 Bulbasaur
1 Ivysaur
2 Venusaur
3 VenusaurMega Venusaur
4 Charmander
5 Charmeleon
...
796 DiancieMega Diancie
797 HoopaHoopa Confined
798 HoopaHoopa Unbound
799 Volcanion
python">
df. loc[ df[ 'Type 1' ] == "Grass" ]
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 48 43 Oddish Grass Poison 45 50 55 75 65 30 1 False ... ... ... ... ... ... ... ... ... ... ... ... ... 718 650 Chespin Grass NaN 56 61 65 48 45 38 6 False 719 651 Quilladin Grass NaN 61 78 95 56 58 57 6 False 720 652 Chesnaught Grass Fighting 88 107 122 74 75 64 6 False 740 672 Skiddo Grass NaN 66 65 48 62 57 52 6 False 741 673 Gogoat Grass NaN 123 100 62 97 81 68 6 False
70 rows × 12 columns
python">
df. describe( )
# HP Attack Defense Sp. Atk Sp. Def Speed Generation count 800.000000 800.000000 800.000000 800.000000 800.000000 800.000000 800.000000 800.00000 mean 362.813750 69.258750 79.001250 73.842500 72.820000 71.902500 68.277500 3.32375 std 208.343798 25.534669 32.457366 31.183501 32.722294 27.828916 29.060474 1.66129 min 1.000000 1.000000 5.000000 5.000000 10.000000 20.000000 5.000000 1.00000 25% 184.750000 50.000000 55.000000 50.000000 49.750000 50.000000 45.000000 2.00000 50% 364.500000 65.000000 75.000000 70.000000 65.000000 70.000000 65.000000 3.00000 75% 539.250000 80.000000 100.000000 90.000000 95.000000 90.000000 90.000000 5.00000 max 721.000000 255.000000 190.000000 230.000000 194.000000 230.000000 180.000000 6.00000
python">
df. sort_values( 'Name' )
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 510 460 Abomasnow Grass Ice 90 92 75 92 85 60 4 False 511 460 AbomasnowMega Abomasnow Grass Ice 90 132 105 132 105 30 4 False 68 63 Abra Psychic NaN 25 20 15 105 55 90 1 False 392 359 Absol Dark NaN 65 130 60 75 60 75 3 False 393 359 AbsolMega Absol Dark NaN 65 150 60 115 60 115 3 False ... ... ... ... ... ... ... ... ... ... ... ... ... 632 571 Zoroark Dark NaN 60 105 60 120 60 105 5 False 631 570 Zorua Dark NaN 40 65 40 80 40 65 5 False 46 41 Zubat Poison Flying 40 45 35 30 40 55 1 False 695 634 Zweilous Dark Dragon 72 85 70 65 70 58 5 False 794 718 Zygarde50% Forme Dragon Ground 108 100 121 81 95 95 6 True
800 rows × 12 columns
python">
df. sort_values( 'Name' , ascending= False )
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 794 718 Zygarde50% Forme Dragon Ground 108 100 121 81 95 95 6 True 695 634 Zweilous Dark Dragon 72 85 70 65 70 58 5 False 46 41 Zubat Poison Flying 40 45 35 30 40 55 1 False 631 570 Zorua Dark NaN 40 65 40 80 40 65 5 False 632 571 Zoroark Dark NaN 60 105 60 120 60 105 5 False ... ... ... ... ... ... ... ... ... ... ... ... ... 393 359 AbsolMega Absol Dark NaN 65 150 60 115 60 115 3 False 392 359 Absol Dark NaN 65 130 60 75 60 75 3 False 68 63 Abra Psychic NaN 25 20 15 105 55 90 1 False 511 460 AbomasnowMega Abomasnow Grass Ice 90 132 105 132 105 30 4 False 510 460 Abomasnow Grass Ice 90 92 75 92 85 60 4 False
800 rows × 12 columns
python">
df. sort_values( [ 'Type 1' , 'HP' ] )
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 316 292 Shedinja Bug Ghost 1 90 45 30 30 40 3 False 230 213 Shuckle Bug Rock 20 10 230 10 230 5 2 False 462 415 Combee Bug Flying 30 30 42 30 42 70 4 False 603 543 Venipede Bug Poison 30 45 59 30 39 57 5 False 314 290 Nincada Bug Ground 31 45 90 30 30 40 3 False ... ... ... ... ... ... ... ... ... ... ... ... ... 142 131 Lapras Water Ice 130 85 80 85 95 60 1 False 145 134 Vaporeon Water NaN 130 65 60 110 95 65 1 False 350 320 Wailmer Water NaN 130 70 35 70 35 60 3 False 655 594 Alomomola Water NaN 165 75 80 40 45 65 5 False 351 321 Wailord Water NaN 170 90 45 90 45 60 3 False
800 rows × 12 columns
python">
df. sort_values( [ 'Type 1' , 'HP' ] , ascending= [ 0 , 1 ] )
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 139 129 Magikarp Water NaN 20 10 55 15 20 80 1 False 381 349 Feebas Water NaN 20 15 20 10 55 80 3 False 97 90 Shellder Water NaN 30 65 100 45 25 40 1 False 106 98 Krabby Water NaN 30 105 90 25 25 50 1 False 125 116 Horsea Water NaN 30 40 70 70 25 60 1 False ... ... ... ... ... ... ... ... ... ... ... ... ... 232 214 HeracrossMega Heracross Bug Fighting 80 185 115 40 105 75 2 False 678 617 Accelgor Bug NaN 80 70 40 100 60 145 5 False 734 666 Vivillon Bug Flying 80 52 50 90 50 89 6 False 698 637 Volcarona Bug Fire 85 60 65 135 105 100 5 False 520 469 Yanmega Bug Flying 86 76 86 116 56 95 4 False
800 rows × 12 columns
python">df[ 'Total' ] = df[ 'HP' ] + df[ 'Attack' ] + df[ 'Defense' ] + df[ 'Sp. Atk' ] + df[ 'Sp. Def' ] + df[ 'Speed' ]
df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Total 0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 318 1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 405 2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 525 3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 625 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False 309 ... ... ... ... ... ... ... ... ... ... ... ... ... ... 795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True 600 796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True 700 797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True 600 798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True 680 799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True 600
800 rows × 13 columns
python">df[ 'Total' ] = df[ 'HP' ] + df[ 'Attack' ] + df[ 'Defense' ] + df[ 'Sp. Atk' ] + df[ 'Sp. Def' ] + df[ 'Speed' ]
df = df. drop( columns= [ 'Total' ] )
df. head( )
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False
python">
df[ 'Total' ] = df. iloc[ : , 4 : 10 ] . sum ( axis= 1 )
df. head( )
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Total 0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 318 1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 405 2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 525 3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 625 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False 309
python">read( )
df[ 'Total' ] = df. iloc[ : , 4 : 10 ] . sum ( axis= 1 )
df = df[ [ 'Total' , 'HP' , 'Defense' ] ]
df. head( 5 )
Total HP Defense 0 318 45 49 1 405 60 63 2 525 80 83 3 625 80 123 4 309 39 43
python">read( )
df[ 'Total' ] = df. iloc[ : , 4 : 10 ] . sum ( axis= 1 )
cols = list ( df. columns)
print ( cols)
df = df[ cols[ 0 : 4 ] + [ cols[ - 1 ] ] + cols[ 4 : - 1 ] ]
df. head( 5 )
['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary', 'Total']
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False 4 4 Charmander Fire NaN 309 39 52 43 60 50 65 1 False
python">
python">
df. loc[ ( df[ 'Type 1' ] == 'Grass' ) & ( df[ 'Type 2' ] == 'Poison' ) ]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False 48 43 Oddish Grass Poison 320 45 50 55 75 65 30 1 False 49 44 Gloom Grass Poison 395 60 65 70 85 75 40 1 False 50 45 Vileplume Grass Poison 490 75 80 85 110 90 50 1 False 75 69 Bellsprout Grass Poison 300 50 75 35 70 30 40 1 False 76 70 Weepinbell Grass Poison 390 65 90 50 85 45 55 1 False 77 71 Victreebel Grass Poison 490 80 105 65 100 70 70 1 False 344 315 Roselia Grass Poison 400 50 60 45 100 80 65 3 False 451 406 Budew Grass Poison 280 40 30 35 50 70 55 4 False 452 407 Roserade Grass Poison 515 60 70 65 125 105 90 4 False 651 590 Foongus Grass Poison 294 69 55 45 55 55 15 5 False 652 591 Amoonguss Grass Poison 464 114 85 70 85 80 30 5 False
python">
df. loc[ ( df[ 'Type 1' ] == 'Grass' ) | ( df[ 'Type 2' ] == 'Poison' ) ]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False 16 13 Weedle Bug Poison 195 40 35 30 20 20 50 1 False ... ... ... ... ... ... ... ... ... ... ... ... ... ... 718 650 Chespin Grass NaN 313 56 61 65 48 45 38 6 False 719 651 Quilladin Grass NaN 405 61 78 95 56 58 57 6 False 720 652 Chesnaught Grass Fighting 530 88 107 122 74 75 64 6 False 740 672 Skiddo Grass NaN 350 66 65 48 62 57 52 6 False 741 673 Gogoat Grass NaN 531 123 100 62 97 81 68 6 False
89 rows × 13 columns
python">new_df = df. loc[ ( df[ 'Type 1' ] == 'Grass' ) & ( df[ 'Type 2' ] == 'Poison' ) & ( df[ 'HP' ] > 70 ) ]
new_df = new_df. reset_index( )
new_df
index # Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False 1 3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False 2 50 45 Vileplume Grass Poison 490 75 80 85 110 90 50 1 False 3 77 71 Victreebel Grass Poison 490 80 105 65 100 70 70 1 False 4 652 591 Amoonguss Grass Poison 464 114 85 70 85 80 30 5 False
python">new_df = df. loc[ ( df[ 'Type 1' ] == 'Grass' ) & ( df[ 'Type 2' ] == 'Poison' ) & ( df[ 'HP' ] > 70 ) ]
new_df = new_df. reset_index( drop= True )
new_df
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False 1 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False 2 45 Vileplume Grass Poison 490 75 80 85 110 90 50 1 False 3 71 Victreebel Grass Poison 490 80 105 65 100 70 70 1 False 4 591 Amoonguss Grass Poison 464 114 85 70 85 80 30 5 False
python">df. loc[ df[ 'Name' ] . str . contains( 'Mega' ) ]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False 7 6 CharizardMega Charizard X Fire Dragon 634 78 130 111 130 85 100 1 False 8 6 CharizardMega Charizard Y Fire Flying 634 78 104 78 159 115 100 1 False 12 9 BlastoiseMega Blastoise Water NaN 630 79 103 120 135 115 78 1 False 19 15 BeedrillMega Beedrill Bug Poison 495 65 150 40 15 80 145 1 False 23 18 PidgeotMega Pidgeot Normal Flying 579 83 80 80 135 80 121 1 False 71 65 AlakazamMega Alakazam Psychic NaN 590 55 50 65 175 95 150 1 False 87 80 SlowbroMega Slowbro Water Psychic 590 95 75 180 130 80 30 1 False 102 94 GengarMega Gengar Ghost Poison 600 60 65 80 170 95 130 1 False 124 115 KangaskhanMega Kangaskhan Normal NaN 590 105 125 100 60 100 100 1 False 137 127 PinsirMega Pinsir Bug Flying 600 65 155 120 65 90 105 1 False 141 130 GyaradosMega Gyarados Water Dark 640 95 155 109 70 130 81 1 False 154 142 AerodactylMega Aerodactyl Rock Flying 615 80 135 85 70 95 150 1 False 163 150 MewtwoMega Mewtwo X Psychic Fighting 780 106 190 100 154 100 130 1 True 164 150 MewtwoMega Mewtwo Y Psychic NaN 780 106 150 70 194 120 140 1 True 168 154 Meganium Grass NaN 525 80 82 100 83 100 80 2 False 196 181 AmpharosMega Ampharos Electric Dragon 610 90 95 105 165 110 45 2 False 224 208 SteelixMega Steelix Steel Ground 610 75 125 230 55 95 30 2 False 229 212 ScizorMega Scizor Bug Steel 600 70 150 140 65 100 75 2 False 232 214 HeracrossMega Heracross Bug Fighting 600 80 185 115 40 105 75 2 False 248 229 HoundoomMega Houndoom Dark Fire 600 75 90 90 140 90 115 2 False 268 248 TyranitarMega Tyranitar Rock Dark 700 100 164 150 95 120 71 2 False 275 254 SceptileMega Sceptile Grass Dragon 630 70 110 75 145 85 145 3 False 279 257 BlazikenMega Blaziken Fire Fighting 630 80 160 80 130 80 100 3 False 283 260 SwampertMega Swampert Water Ground 635 100 150 110 95 110 70 3 False 306 282 GardevoirMega Gardevoir Psychic Fairy 618 68 85 65 165 135 100 3 False 327 302 SableyeMega Sableye Dark Ghost 480 50 85 125 85 115 20 3 False 329 303 MawileMega Mawile Steel Fairy 480 50 105 125 55 95 50 3 False 333 306 AggronMega Aggron Steel NaN 630 70 140 230 60 80 50 3 False 336 308 MedichamMega Medicham Fighting Psychic 510 60 100 85 80 85 100 3 False 339 310 ManectricMega Manectric Electric NaN 575 70 75 80 135 80 135 3 False 349 319 SharpedoMega Sharpedo Water Dark 560 70 140 70 110 65 105 3 False 354 323 CameruptMega Camerupt Fire Ground 560 70 120 100 145 105 20 3 False 366 334 AltariaMega Altaria Dragon Fairy 590 75 110 110 110 105 80 3 False 387 354 BanetteMega Banette Ghost NaN 555 64 165 75 93 83 75 3 False 393 359 AbsolMega Absol Dark NaN 565 65 150 60 115 60 115 3 False 397 362 GlalieMega Glalie Ice NaN 580 80 120 80 120 80 100 3 False 409 373 SalamenceMega Salamence Dragon Flying 700 95 145 130 120 90 120 3 False 413 376 MetagrossMega Metagross Steel Psychic 700 80 145 150 105 110 110 3 False 418 380 LatiasMega Latias Dragon Psychic 700 80 100 120 140 150 110 3 True 420 381 LatiosMega Latios Dragon Psychic 700 80 130 100 160 120 110 3 True 426 384 RayquazaMega Rayquaza Dragon Flying 780 105 180 100 180 100 115 3 True 476 428 LopunnyMega Lopunny Normal Fighting 580 65 136 94 54 96 135 4 False 494 445 GarchompMega Garchomp Dragon Ground 700 108 170 115 120 95 92 4 False 498 448 LucarioMega Lucario Fighting Steel 625 70 145 88 140 70 112 4 False 511 460 AbomasnowMega Abomasnow Grass Ice 594 90 132 105 132 105 30 4 False 527 475 GalladeMega Gallade Psychic Fighting 618 68 165 95 65 115 110 4 False 591 531 AudinoMega Audino Normal Fairy 545 103 60 126 80 126 50 5 False 796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 160 110 110 6 True
python">import re
df. loc[ df[ 'Type 1' ] . str . contains( 'Fire|grass' , regex= True , flags= re. I) ]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False 4 4 Charmander Fire NaN 309 39 52 43 60 50 65 1 False ... ... ... ... ... ... ... ... ... ... ... ... ... ... 735 667 Litleo Fire Normal 369 62 50 58 73 54 72 6 False 736 668 Pyroar Fire Normal 507 86 68 72 109 66 106 6 False 740 672 Skiddo Grass NaN 350 66 65 48 62 57 52 6 False 741 673 Gogoat Grass NaN 531 123 100 62 97 81 68 6 False 799 721 Volcanion Fire Water 600 80 110 120 130 90 70 6 True
122 rows × 13 columns
python">import re
df. loc[ df[ 'Name' ] . str . contains( '^pi[a-z]' , regex= True , flags= re. I) ]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 20 16 Pidgey Normal Flying 251 40 45 40 35 35 56 1 False 21 17 Pidgeotto Normal Flying 349 63 60 55 50 50 71 1 False 22 18 Pidgeot Normal Flying 479 83 80 75 70 70 101 1 False 23 18 PidgeotMega Pidgeot Normal Flying 579 83 80 80 135 80 121 1 False 30 25 Pikachu Electric NaN 320 35 55 40 50 50 90 1 False 136 127 Pinsir Bug NaN 500 65 125 100 55 70 85 1 False 137 127 PinsirMega Pinsir Bug Flying 600 65 155 120 65 90 105 1 False 186 172 Pichu Electric NaN 205 20 40 15 35 35 60 2 False 219 204 Pineco Bug NaN 290 50 65 90 35 35 15 2 False 239 221 Piloswine Ice Ground 450 100 100 80 60 60 50 2 False 438 393 Piplup Water NaN 314 53 51 53 61 56 40 4 False 558 499 Pignite Fire Fighting 418 90 93 55 70 55 55 5 False 578 519 Pidove Normal Flying 264 50 55 50 36 30 43 5 False
python">read( )
df. loc[ df[ 'Type 1' ] == 'Grass' , 'Type 1' ] = 'Flamer'
df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Flamer Poison 45 49 49 65 65 45 1 False 1 2 Ivysaur Flamer Poison 60 62 63 80 80 60 1 False 2 3 Venusaur Flamer Poison 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Flamer Poison 80 100 123 122 120 80 1 False 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False ... ... ... ... ... ... ... ... ... ... ... ... ... 795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True 796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True 797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True 798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True 799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True
800 rows × 12 columns
python">read( )
df. loc[ df[ 'Type 1' ] == 'Grass' , [ 'Type 1' , 'Lendary' ] ] = 'TEST VALUE'
df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Lendary 0 1 Bulbasaur TEST VALUE Poison 45 49 49 65 65 45 1 False TEST VALUE 1 2 Ivysaur TEST VALUE Poison 60 62 63 80 80 60 1 False TEST VALUE 2 3 Venusaur TEST VALUE Poison 80 82 83 100 100 80 1 False TEST VALUE 3 3 VenusaurMega Venusaur TEST VALUE Poison 80 100 123 122 120 80 1 False TEST VALUE 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False NaN ... ... ... ... ... ... ... ... ... ... ... ... ... ... 795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True NaN 796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True NaN 797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True NaN 798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True NaN 799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True NaN
800 rows × 13 columns
python">read( )
df. loc[ df[ 'Type 1' ] == 'Grass' , [ 'Type 1' , 'Lendary' ] ] = [ 'TEST 1' , 'TEST 2' ]
df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Lendary 0 1 Bulbasaur TEST 1 Poison 45 49 49 65 65 45 1 False TEST 2 1 2 Ivysaur TEST 1 Poison 60 62 63 80 80 60 1 False TEST 2 2 3 Venusaur TEST 1 Poison 80 82 83 100 100 80 1 False TEST 2 3 3 VenusaurMega Venusaur TEST 1 Poison 80 100 123 122 120 80 1 False TEST 2 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False NaN ... ... ... ... ... ... ... ... ... ... ... ... ... ... 795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True NaN 796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True NaN 797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True NaN 798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True NaN 799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True NaN
800 rows × 13 columns
python">read( )
df. groupby( [ 'Type 1' ] ) . mean( numeric_only= True ) . sort_values( 'Defense' , ascending= False )
# HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Type 1 Steel 442.851852 65.222222 92.703704 126.370370 67.518519 80.629630 55.259259 3.851852 0.148148 Rock 392.727273 65.363636 92.863636 100.795455 63.340909 75.477273 55.909091 3.454545 0.090909 Dragon 474.375000 83.312500 112.125000 86.375000 96.843750 88.843750 83.031250 3.875000 0.375000 Ground 356.281250 73.781250 95.750000 84.843750 56.468750 62.750000 63.906250 3.156250 0.125000 Ghost 486.500000 64.437500 73.781250 81.187500 79.343750 76.468750 64.343750 4.187500 0.062500 Water 303.089286 72.062500 74.151786 72.946429 74.812500 70.517857 65.964286 2.857143 0.035714 Ice 423.541667 72.000000 72.750000 71.416667 77.541667 76.291667 63.458333 3.541667 0.083333 Grass 344.871429 67.271429 73.214286 70.800000 77.500000 70.428571 61.928571 3.357143 0.042857 Bug 334.492754 56.884058 70.971014 70.724638 53.869565 64.797101 61.681159 3.217391 0.000000 Dark 461.354839 66.806452 88.387097 70.225806 74.645161 69.516129 76.161290 4.032258 0.064516 Poison 251.785714 67.250000 74.678571 68.821429 60.428571 64.392857 63.571429 2.535714 0.000000 Fire 327.403846 69.903846 84.769231 67.769231 88.980769 72.211538 74.442308 3.211538 0.096154 Psychic 380.807018 70.631579 71.456140 67.684211 98.403509 86.280702 81.491228 3.385965 0.245614 Electric 363.500000 59.795455 69.090909 66.295455 90.022727 73.704545 84.500000 3.272727 0.090909 Flying 677.750000 70.750000 78.750000 66.250000 94.250000 72.500000 102.500000 5.500000 0.500000 Fighting 363.851852 69.851852 96.777778 65.925926 53.111111 64.703704 66.074074 3.370370 0.000000 Fairy 449.529412 74.117647 61.529412 65.705882 78.529412 84.705882 48.588235 4.117647 0.058824 Normal 319.173469 77.275510 73.469388 59.846939 55.816327 63.724490 71.551020 3.051020 0.020408
python">read( )
df. groupby( [ 'Type 1' ] ) . sum ( numeric_only= True ) . sort_values( 'HP' , ascending= False )
# HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Type 1 Water 33946 8071 8305 8170 8379 7898 7388 320 4 Normal 31279 7573 7200 5865 5470 6245 7012 299 2 Grass 24141 4709 5125 4956 5425 4930 4335 235 3 Psychic 21706 4026 4073 3858 5609 4918 4645 193 14 Bug 23080 3925 4897 4880 3717 4471 4256 222 0 Fire 17025 3635 4408 3524 4627 3755 3871 167 5 Rock 17280 2876 4086 4435 2787 3321 2460 152 4 Dragon 15180 2666 3588 2764 3099 2843 2657 124 12 Electric 15994 2631 3040 2917 3961 3243 3718 144 4 Ground 11401 2361 3064 2715 1807 2008 2045 101 4 Dark 14302 2071 2740 2177 2314 2155 2361 125 2 Ghost 15568 2062 2361 2598 2539 2447 2059 134 2 Fighting 9824 1886 2613 1780 1434 1747 1784 91 0 Poison 7050 1883 2091 1927 1692 1803 1780 71 0 Steel 11957 1761 2503 3412 1823 2177 1492 104 4 Ice 10165 1728 1746 1714 1861 1831 1523 85 2 Fairy 7642 1260 1046 1117 1335 1440 826 70 1 Flying 2711 283 315 265 377 290 410 22 2
python">read( )
df. groupby( [ 'Type 1' ] ) . count( ) . sort_values( 'HP' , ascending= False )
# Name Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Type 1 Water 112 112 53 112 112 112 112 112 112 112 112 Normal 98 98 37 98 98 98 98 98 98 98 98 Grass 70 70 37 70 70 70 70 70 70 70 70 Bug 69 69 52 69 69 69 69 69 69 69 69 Psychic 57 57 19 57 57 57 57 57 57 57 57 Fire 52 52 24 52 52 52 52 52 52 52 52 Electric 44 44 17 44 44 44 44 44 44 44 44 Rock 44 44 35 44 44 44 44 44 44 44 44 Ghost 32 32 22 32 32 32 32 32 32 32 32 Ground 32 32 19 32 32 32 32 32 32 32 32 Dragon 32 32 21 32 32 32 32 32 32 32 32 Dark 31 31 21 31 31 31 31 31 31 31 31 Poison 28 28 13 28 28 28 28 28 28 28 28 Fighting 27 27 7 27 27 27 27 27 27 27 27 Steel 27 27 22 27 27 27 27 27 27 27 27 Ice 24 24 11 24 24 24 24 24 24 24 24 Fairy 17 17 2 17 17 17 17 17 17 17 17 Flying 4 4 2 4 4 4 4 4 4 4 4
python">read( )
df[ 'count' ] = 1
df. groupby( [ 'Type 1' ] ) . count( ) [ 'count' ]
Type 1
Bug 69
Dark 31
Dragon 32
Electric 44
Fairy 17
Fighting 27
Fire 52
Flying 4
Ghost 32
Grass 70
Ground 32
Ice 24
Normal 98
Poison 28
Psychic 57
Rock 44
Steel 27
Water 112
Name: count, dtype: int64
python">read( )
df[ 'count' ] = 1
df. groupby( [ 'Type 1' , 'Type 2' ] ) . count( ) [ 'count' ]
Type 1 Type 2
Bug Electric 2
Fighting 2
Fire 2
Flying 14
Ghost 1
..
Water Ice 3
Poison 3
Psychic 5
Rock 4
Steel 1
Name: count, Length: 136, dtype: int64
python">
for df in pd. read_csv( f' { data_dir} /pandas /pokemon_data.csv' , chunksize= 5 ) :
;
python">
new_df = pd. DataFrame( )
for df in pd. read_csv( f' { data_dir} /pandas /pokemon_data.csv' , chunksize= 5 ) :
new_df = pd. concat( [ new_df, df] )
new_df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary 0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False ... ... ... ... ... ... ... ... ... ... ... ... ... 795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True 796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True 797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True 798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True 799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True
800 rows × 12 columns