homework_1_for_lecture_3

import pandas as pd
url ='https://raw.githubusercontent.com/tidyverse/datascience-box/refs/heads/main/course-materials/lab-instructions/lab-03/data/nobel.csv'
df = pd.read_csv(url)
print(df.head())
   id       firstname    surname  year category  \
0   1  Wilhelm Conrad    Röntgen  1901  Physics   
1   2      Hendrik A.    Lorentz  1902  Physics   
2   3          Pieter     Zeeman  1902  Physics   
3   4           Henri  Becquerel  1903  Physics   
4   5          Pierre      Curie  1903  Physics   

                                         affiliation       city      country  \
0                                  Munich University     Munich      Germany   
1                                  Leiden University     Leiden  Netherlands   
2                               Amsterdam University  Amsterdam  Netherlands   
3                                École Polytechnique      Paris       France   
4  École municipale de physique et de chimie indu...      Paris       France   

    born_date   died_date  ... died_country_code overall_motivation share  \
0  1845-03-27  1923-02-10  ...                DE                NaN     1   
1  1853-07-18  1928-02-04  ...                NL                NaN     2   
2  1865-05-25  1943-10-09  ...                NL                NaN     2   
3  1852-12-15  1908-08-25  ...                FR                NaN     2   
4  1859-05-15  1906-04-19  ...                FR                NaN     4   

                                          motivation  born_country_original  \
0  "in recognition of the extraordinary services ...  Prussia (now Germany)   
1  "in recognition of the extraordinary service t...        the Netherlands   
2  "in recognition of the extraordinary service t...        the Netherlands   
3  "in recognition of the extraordinary services ...                 France   
4  "in recognition of the extraordinary services ...                 France   

       born_city_original died_country_original died_city_original  \
0  Lennep (now Remscheid)               Germany             Munich   
1                  Arnhem       the Netherlands                NaN   
2              Zonnemaire       the Netherlands          Amsterdam   
3                   Paris                France                NaN   
4                   Paris                France              Paris   

   city_original country_original  
0         Munich          Germany  
1         Leiden  the Netherlands  
2      Amsterdam  the Netherlands  
3          Paris           France  
4          Paris           France  

[5 rows x 26 columns]
df
id firstname surname year category affiliation city country born_date died_date ... died_country_code overall_motivation share motivation born_country_original born_city_original died_country_original died_city_original city_original country_original
0 1 Wilhelm Conrad Röntgen 1901 Physics Munich University Munich Germany 1845-03-27 1923-02-10 ... DE NaN 1 "in recognition of the extraordinary services ... Prussia (now Germany) Lennep (now Remscheid) Germany Munich Munich Germany
1 2 Hendrik A. Lorentz 1902 Physics Leiden University Leiden Netherlands 1853-07-18 1928-02-04 ... NL NaN 2 "in recognition of the extraordinary service t... the Netherlands Arnhem the Netherlands NaN Leiden the Netherlands
2 3 Pieter Zeeman 1902 Physics Amsterdam University Amsterdam Netherlands 1865-05-25 1943-10-09 ... NL NaN 2 "in recognition of the extraordinary service t... the Netherlands Zonnemaire the Netherlands Amsterdam Amsterdam the Netherlands
3 4 Henri Becquerel 1903 Physics École Polytechnique Paris France 1852-12-15 1908-08-25 ... FR NaN 2 "in recognition of the extraordinary services ... France Paris France NaN Paris France
4 5 Pierre Curie 1903 Physics École municipale de physique et de chimie indu... Paris France 1859-05-15 1906-04-19 ... FR NaN 4 "in recognition of the extraordinary services ... France Paris France Paris Paris France
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
930 965 Sir Gregory P. Winter 2018 Chemistry MRC Laboratory of Molecular Biology Cambridge United Kingdom 1951-04-14 NaN ... NaN NaN 4 "for the phage display of peptides and antibod... United Kingdom Leicester NaN NaN Cambridge United Kingdom
931 966 Denis Mukwege 2018 Peace NaN NaN NaN 1955-03-01 NaN ... NaN NaN 2 "for their efforts to end the use of sexual vi... Belgian Congo (now Democratic Republic of the ... Bukavu NaN NaN NaN NaN
932 967 Nadia Murad 2018 Peace NaN NaN NaN NaN NaN ... NaN NaN 2 "for their efforts to end the use of sexual vi... Iraq Kojo NaN NaN NaN NaN
933 968 William D. Nordhaus 2018 Economics Yale University New Haven CT USA 1941-05-31 NaN ... NaN NaN 2 "for integrating climate change into long-run ... USA Albuquerque NM NaN NaN New Haven CT USA
934 969 Paul M. Romer 2018 Economics NYU Stern School of Business New York NY USA NaN NaN ... NaN NaN 2 "for integrating technological innovations int... USA Denver CO NaN NaN New York NY USA

935 rows × 26 columns

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 935 entries, 0 to 934
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   id                     935 non-null    int64 
 1   firstname              935 non-null    object
 2   surname                906 non-null    object
 3   year                   935 non-null    int64 
 4   category               935 non-null    object
 5   affiliation            685 non-null    object
 6   city                   680 non-null    object
 7   country                681 non-null    object
 8   born_date              902 non-null    object
 9   died_date              627 non-null    object
 10  gender                 935 non-null    object
 11  born_city              907 non-null    object
 12  born_country           907 non-null    object
 13  born_country_code      907 non-null    object
 14  died_city              608 non-null    object
 15  died_country           614 non-null    object
 16  died_country_code      614 non-null    object
 17  overall_motivation     17 non-null     object
 18  share                  935 non-null    int64 
 19  motivation             935 non-null    object
 20  born_country_original  907 non-null    object
 21  born_city_original     907 non-null    object
 22  died_country_original  614 non-null    object
 23  died_city_original     608 non-null    object
 24  city_original          680 non-null    object
 25  country_original       681 non-null    object
dtypes: int64(3), object(23)
memory usage: 190.1+ KB
df[(df['country'].notna())]
id firstname surname year category affiliation city country born_date died_date ... died_country_code overall_motivation share motivation born_country_original born_city_original died_country_original died_city_original city_original country_original
0 1 Wilhelm Conrad Röntgen 1901 Physics Munich University Munich Germany 1845-03-27 1923-02-10 ... DE NaN 1 "in recognition of the extraordinary services ... Prussia (now Germany) Lennep (now Remscheid) Germany Munich Munich Germany
1 2 Hendrik A. Lorentz 1902 Physics Leiden University Leiden Netherlands 1853-07-18 1928-02-04 ... NL NaN 2 "in recognition of the extraordinary service t... the Netherlands Arnhem the Netherlands NaN Leiden the Netherlands
2 3 Pieter Zeeman 1902 Physics Amsterdam University Amsterdam Netherlands 1865-05-25 1943-10-09 ... NL NaN 2 "in recognition of the extraordinary service t... the Netherlands Zonnemaire the Netherlands Amsterdam Amsterdam the Netherlands
3 4 Henri Becquerel 1903 Physics École Polytechnique Paris France 1852-12-15 1908-08-25 ... FR NaN 2 "in recognition of the extraordinary services ... France Paris France NaN Paris France
4 5 Pierre Curie 1903 Physics École municipale de physique et de chimie indu... Paris France 1859-05-15 1906-04-19 ... FR NaN 4 "in recognition of the extraordinary services ... France Paris France Paris Paris France
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
928 963 Frances H. Arnold 2018 Chemistry California Institute of Technology (Caltech) Pasadena CA USA 1956-07-25 NaN ... NaN NaN 2 "for the directed evolution of enzymes" USA Pittsburgh PA NaN NaN Pasadena CA USA
929 964 George P. Smith 2018 Chemistry University of Missouri Columbia USA 1941-03-10 NaN ... NaN NaN 4 "for the phage display of peptides and antibod... USA Norwalk CT NaN NaN Columbia USA
930 965 Sir Gregory P. Winter 2018 Chemistry MRC Laboratory of Molecular Biology Cambridge United Kingdom 1951-04-14 NaN ... NaN NaN 4 "for the phage display of peptides and antibod... United Kingdom Leicester NaN NaN Cambridge United Kingdom
933 968 William D. Nordhaus 2018 Economics Yale University New Haven CT USA 1941-05-31 NaN ... NaN NaN 2 "for integrating climate change into long-run ... USA Albuquerque NM NaN NaN New Haven CT USA
934 969 Paul M. Romer 2018 Economics NYU Stern School of Business New York NY USA NaN NaN ... NaN NaN 2 "for integrating technological innovations int... USA Denver CO NaN NaN New York NY USA

681 rows × 26 columns

df[(df['died_date'].isna())]
id firstname surname year category affiliation city country born_date died_date ... died_country_code overall_motivation share motivation born_country_original born_city_original died_country_original died_city_original city_original country_original
68 68 Chen Ning Yang 1957 Physics Institute for Advanced Study Princeton NJ USA 1922-09-22 NaN ... NaN NaN 2 "for their penetrating investigation of the so... China Hofei Anhwei NaN NaN Princeton NJ USA
69 69 Tsung-Dao Lee 1957 Physics Columbia University New York NY USA 1926-11-24 NaN ... NaN NaN 2 "for their penetrating investigation of the so... China Shanghai NaN NaN New York NY USA
94 95 Leon N. Cooper 1972 Physics Brown University Providence RI USA 1930-02-28 NaN ... NaN NaN 3 "for their jointly developed theory of superco... USA New York NY NaN NaN Providence RI USA
96 97 Leo Esaki 1973 Physics IBM Thomas J. Watson Research Center Yorktown Heights NY USA 1925-03-12 NaN ... NaN NaN 4 "for their experimental discoveries regarding ... Japan Osaka NaN NaN Yorktown Heights NY USA
97 98 Ivar Giaever 1973 Physics General Electric Company Schenectady NY USA 1929-04-05 NaN ... NaN NaN 4 "for their experimental discoveries regarding ... Norway Bergen NaN NaN Schenectady NY USA
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
930 965 Sir Gregory P. Winter 2018 Chemistry MRC Laboratory of Molecular Biology Cambridge United Kingdom 1951-04-14 NaN ... NaN NaN 4 "for the phage display of peptides and antibod... United Kingdom Leicester NaN NaN Cambridge United Kingdom
931 966 Denis Mukwege 2018 Peace NaN NaN NaN 1955-03-01 NaN ... NaN NaN 2 "for their efforts to end the use of sexual vi... Belgian Congo (now Democratic Republic of the ... Bukavu NaN NaN NaN NaN
932 967 Nadia Murad 2018 Peace NaN NaN NaN NaN NaN ... NaN NaN 2 "for their efforts to end the use of sexual vi... Iraq Kojo NaN NaN NaN NaN
933 968 William D. Nordhaus 2018 Economics Yale University New Haven CT USA 1941-05-31 NaN ... NaN NaN 2 "for integrating climate change into long-run ... USA Albuquerque NM NaN NaN New Haven CT USA
934 969 Paul M. Romer 2018 Economics NYU Stern School of Business New York NY USA NaN NaN ... NaN NaN 2 "for integrating technological innovations int... USA Denver CO NaN NaN New York NY USA

308 rows × 26 columns

Source: homework_1_for_lecture_3.ipynb

homework_2_for_lecture_3

import pandas as pd
url ='https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(url)
print(df.head())
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  
df
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
... ... ... ... ... ... ... ... ... ... ... ... ...
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 12 columns

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
from skimpy import clean_columns
df = clean_columns(df,case="snake")
print(df.columns)
Index(['passenger_id', 'survived', 'pclass', 'name', 'sex', 'age', 'sib_sp',
       'parch', 'ticket', 'fare', 'cabin', 'embarked'],
      dtype='object')
df.fillna("-")
passenger_id survived pclass name sex age sib_sp parch ticket fare cabin embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 - S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 - S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 - S
... ... ... ... ... ... ... ... ... ... ... ... ...
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 - S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female - 1 2 W./C. 6607 23.4500 - S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 - Q

891 rows × 12 columns

df.describe()
passenger_id survived pclass sib_sp parch fare
count 891.000000 891.000000 891.000000 891.000000 891.000000 891.000000
mean 446.000000 0.383838 2.308642 0.523008 0.381594 32.204208
std 257.353842 0.486592 0.836071 1.102743 0.806057 49.693429
min 1.000000 0.000000 1.000000 0.000000 0.000000 0.000000
25% 223.500000 0.000000 2.000000 0.000000 0.000000 7.910400
50% 446.000000 0.000000 3.000000 0.000000 0.000000 14.454200
75% 668.500000 1.000000 3.000000 1.000000 0.000000 31.000000
max 891.000000 1.000000 3.000000 8.000000 6.000000 512.329200
sum_table = df.describe().round(2)
sum_table
passenger_id survived pclass sib_sp parch fare
count 891.00 891.00 891.00 891.00 891.00 891.00
mean 446.00 0.38 2.31 0.52 0.38 32.20
std 257.35 0.49 0.84 1.10 0.81 49.69
min 1.00 0.00 1.00 0.00 0.00 0.00
25% 223.50 0.00 2.00 0.00 0.00 7.91
50% 446.00 0.00 3.00 0.00 0.00 14.45
75% 668.50 1.00 3.00 1.00 0.00 31.00
max 891.00 1.00 3.00 8.00 6.00 512.33
df.dropna()
passenger_id survived pclass name sex age sib_sp parch ticket fare cabin embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 - S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 - S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 - S
... ... ... ... ... ... ... ... ... ... ... ... ...
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 - S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female - 1 2 W./C. 6607 23.4500 - S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 - Q

891 rows × 12 columns

Source: homework_2_for_lecture_3.ipynb

homework_for_lecture_2-pet_names

import pandas as pd
df = pd.read_csv('seattle_pet_licenses.csv')
df
animal_s_name license_issue_date license_number primary_breed secondary_breed species zip_code
0 Ozzy 2005-03-29T00:00:00.000 130651.0 Dachshund, Standard Smooth Haired NaN Dog 98104
1 Jack 2009-12-23T00:00:00.000 898148.0 Schnauzer, Miniature Terrier, Rat Dog 98107
2 Ginger 2006-01-20T00:00:00.000 29654.0 Retriever, Golden Retriever, Labrador Dog 98117
3 Pepper 2006-02-07T00:00:00.000 75432.0 Manx Mix Cat 98103
4 Addy 2006-08-04T00:00:00.000 729899.0 Retriever, Golden NaN Dog 98105
... ... ... ... ... ... ... ...
66037 Lily 2016-12-27T00:00:00.000 NaN Domestic Shorthair Mix Cat 98117
66038 Ellie 2016-11-29T00:00:00.000 NaN German Shepherd Mix Dog 98105
66039 Sammy 2016-12-05T00:00:00.000 NaN Terrier Maltese Dog 98105
66040 Buddy 2016-12-06T00:00:00.000 NaN Bullmastiff Mix Dog 98105
66041 Aku 2016-12-07T00:00:00.000 NaN Chihuahua, Short Coat Terrier Dog 98106

66042 rows × 7 columns

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66042 entries, 0 to 66041
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   animal_s_name       64685 non-null  object 
 1   license_issue_date  66042 non-null  object 
 2   license_number      43885 non-null  float64
 3   primary_breed       66042 non-null  object 
 4   secondary_breed     22538 non-null  object 
 5   species             66042 non-null  object 
 6   zip_code            65884 non-null  object 
dtypes: float64(1), object(6)
memory usage: 3.5+ MB
df['animal_s_name'].value_counts()
animal_s_name
Lucy          566
Bella         451
Charlie       447
Max           374
Luna          361
             ... 
Manasseh        1
Taba            1
Miriam          1
Number Six      1
Rollins         1
Name: count, Length: 15795, dtype: int64
Source: homework_for_lecture_2-pet_names.ipynb

homework_for_lecture_4

import pandas as pd
df = pd.read_csv('all-ages.csv')
df
Major_code Major Major_category Total Employed Employed_full_time_year_round Unemployed Unemployment_rate Median P25th P75th
0 1100 GENERAL AGRICULTURE Agriculture & Natural Resources 128148 90245 74078 2423 0.026147 50000 34000 80000.0
1 1101 AGRICULTURE PRODUCTION AND MANAGEMENT Agriculture & Natural Resources 95326 76865 64240 2266 0.028636 54000 36000 80000.0
2 1102 AGRICULTURAL ECONOMICS Agriculture & Natural Resources 33955 26321 22810 821 0.030248 63000 40000 98000.0
3 1103 ANIMAL SCIENCES Agriculture & Natural Resources 103549 81177 64937 3619 0.042679 46000 30000 72000.0
4 1104 FOOD SCIENCE Agriculture & Natural Resources 24280 17281 12722 894 0.049188 62000 38500 90000.0
... ... ... ... ... ... ... ... ... ... ... ...
168 6211 HOSPITALITY MANAGEMENT Business 200854 163393 122499 8862 0.051447 49000 33000 70000.0
169 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS Business 156673 134478 118249 6186 0.043977 72000 50000 100000.0
170 6299 MISCELLANEOUS BUSINESS & MEDICAL ADMINISTRATION Business 102753 77471 61603 4308 0.052679 53000 36000 83000.0
171 6402 HISTORY Humanities & Liberal Arts 712509 478416 354163 33725 0.065851 50000 35000 80000.0
172 6403 UNITED STATES HISTORY Humanities & Liberal Arts 17746 11887 8204 943 0.073500 50000 39000 81000.0

173 rows × 11 columns

# 按照专业分组,并把失业率从低到高升序排列
result = df.groupby(["Major"]).sum().sort_values(["Unemployment_rate"])
print(result)
                                            Major_code  \
Major                                                    
GEOLOGICAL AND GEOPHYSICAL ENGINEERING            2411   
EDUCATIONAL ADMINISTRATION AND SUPERVISION        2301   
PHARMACOLOGY                                      3607   
MATERIALS SCIENCE                                 5008   
MATHEMATICS AND COMPUTER SCIENCE                  4005   
...                                                ...   
LIBRARY SCIENCE                                   3501   
SCHOOL STUDENT COUNSELING                         2303   
MILITARY TECHNOLOGIES                             3801   
CLINICAL PSYCHOLOGY                               5202   
MISCELLANEOUS FINE ARTS                           6099   

                                                                 Major_category  \
Major                                                                             
GEOLOGICAL AND GEOPHYSICAL ENGINEERING                              Engineering   
EDUCATIONAL ADMINISTRATION AND SUPERVISION                            Education   
PHARMACOLOGY                                             Biology & Life Science   
MATERIALS SCIENCE                                                   Engineering   
MATHEMATICS AND COMPUTER SCIENCE                        Computers & Mathematics   
...                                                                         ...   
LIBRARY SCIENCE                                                       Education   
SCHOOL STUDENT COUNSELING                                             Education   
MILITARY TECHNOLOGIES                       Industrial Arts & Consumer Services   
CLINICAL PSYCHOLOGY                                    Psychology & Social Work   
MISCELLANEOUS FINE ARTS                                                    Arts   

                                            Total  Employed  \
Major                                                         
GEOLOGICAL AND GEOPHYSICAL ENGINEERING       6264      4120   
EDUCATIONAL ADMINISTRATION AND SUPERVISION   4037      3113   
PHARMACOLOGY                                 5015      3481   
MATERIALS SCIENCE                            7208      5866   
MATHEMATICS AND COMPUTER SCIENCE             7184      5874   
...                                           ...       ...   
LIBRARY SCIENCE                             16193      7091   
SCHOOL STUDENT COUNSELING                    2396      1492   
MILITARY TECHNOLOGIES                        4315      1650   
CLINICAL PSYCHOLOGY                          7638      5128   
MISCELLANEOUS FINE ARTS                      8511      6431   

                                            Employed_full_time_year_round  \
Major                                                                       
GEOLOGICAL AND GEOPHYSICAL ENGINEERING                               3350   
EDUCATIONAL ADMINISTRATION AND SUPERVISION                           2468   
PHARMACOLOGY                                                         2579   
MATERIALS SCIENCE                                                    4505   
MATHEMATICS AND COMPUTER SCIENCE                                     5039   
...                                                                   ...   
LIBRARY SCIENCE                                                      4330   
SCHOOL STUDENT COUNSELING                                            1093   
MILITARY TECHNOLOGIES                                                1708   
CLINICAL PSYCHOLOGY                                                  3297   
MISCELLANEOUS FINE ARTS                                              3802   

                                            Unemployed  Unemployment_rate  \
Major                                                                       
GEOLOGICAL AND GEOPHYSICAL ENGINEERING               0           0.000000   
EDUCATIONAL ADMINISTRATION AND SUPERVISION           0           0.000000   
PHARMACOLOGY                                        57           0.016111   
MATERIALS SCIENCE                                  134           0.022333   
MATHEMATICS AND COMPUTER SCIENCE                   150           0.024900   
...                                                ...                ...   
LIBRARY SCIENCE                                    743           0.094843   
SCHOOL STUDENT COUNSELING                          169           0.101746   
MILITARY TECHNOLOGIES                              187           0.101796   
CLINICAL PSYCHOLOGY                                587           0.102712   
MISCELLANEOUS FINE ARTS                           1190           0.156147   

                                            Median  P25th     P75th  
Major                                                                
GEOLOGICAL AND GEOPHYSICAL ENGINEERING       85000  55000  125000.0  
EDUCATIONAL ADMINISTRATION AND SUPERVISION   58000  44750   79000.0  
PHARMACOLOGY                                 60000  35000  105000.0  
MATERIALS SCIENCE                            75000  60000  100000.0  
MATHEMATICS AND COMPUTER SCIENCE             92000  53000  136000.0  
...                                            ...    ...       ...  
LIBRARY SCIENCE                              40000  30000   55000.0  
SCHOOL STUDENT COUNSELING                    41000  33200   50000.0  
MILITARY TECHNOLOGIES                        64000  39750   90000.0  
CLINICAL PSYCHOLOGY                          45000  26100   62000.0  
MISCELLANEOUS FINE ARTS                      45000  30000   60000.0  

[173 rows x 10 columns]
import pandas as pd
df = pd.read_csv('recent-grads.csv')
df
Rank Major_code Major Total Men Women Major_category ShareWomen Sample_size Employed ... Part_time Full_time_year_round Unemployed Unemployment_rate Median P25th P75th College_jobs Non_college_jobs Low_wage_jobs
0 1 2419 PETROLEUM ENGINEERING 2339.0 2057.0 282.0 Engineering 0.120564 36 1976 ... 270 1207 37 0.018381 110000 95000 125000 1534 364 193
1 2 2416 MINING AND MINERAL ENGINEERING 756.0 679.0 77.0 Engineering 0.101852 7 640 ... 170 388 85 0.117241 75000 55000 90000 350 257 50
2 3 2415 METALLURGICAL ENGINEERING 856.0 725.0 131.0 Engineering 0.153037 3 648 ... 133 340 16 0.024096 73000 50000 105000 456 176 0
3 4 2417 NAVAL ARCHITECTURE AND MARINE ENGINEERING 1258.0 1123.0 135.0 Engineering 0.107313 16 758 ... 150 692 40 0.050125 70000 43000 80000 529 102 0
4 5 2405 CHEMICAL ENGINEERING 32260.0 21239.0 11021.0 Engineering 0.341631 289 25694 ... 5180 16697 1672 0.061098 65000 50000 75000 18314 4440 972
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
168 169 3609 ZOOLOGY 8409.0 3050.0 5359.0 Biology & Life Science 0.637293 47 6259 ... 2190 3602 304 0.046320 26000 20000 39000 2771 2947 743
169 170 5201 EDUCATIONAL PSYCHOLOGY 2854.0 522.0 2332.0 Psychology & Social Work 0.817099 7 2125 ... 572 1211 148 0.065112 25000 24000 34000 1488 615 82
170 171 5202 CLINICAL PSYCHOLOGY 2838.0 568.0 2270.0 Psychology & Social Work 0.799859 13 2101 ... 648 1293 368 0.149048 25000 25000 40000 986 870 622
171 172 5203 COUNSELING PSYCHOLOGY 4626.0 931.0 3695.0 Psychology & Social Work 0.798746 21 3777 ... 965 2738 214 0.053621 23400 19200 26000 2403 1245 308
172 173 3501 LIBRARY SCIENCE 1098.0 134.0 964.0 Education 0.877960 2 742 ... 237 410 87 0.104946 22000 20000 22000 288 338 192

173 rows × 21 columns

# 按照专业分组,将女生占比从高到低降序排列
result = df.groupby(["Major"]).sum().sort_values(["ShareWomen"],ascending=False)
print(result)
                                               Rank  Major_code     Total  \
Major                                                                       
EARLY CHILDHOOD EDUCATION                       165        2307   37589.0   
COMMUNICATION DISORDERS SCIENCES AND SERVICES   164        6102   38279.0   
MEDICAL ASSISTING SERVICES                       52        6104   11123.0   
ELEMENTARY EDUCATION                            139        2304  170862.0   
FAMILY AND CONSUMER SCIENCES                    151        2901   58001.0   
...                                             ...         ...       ...   
MINING AND MINERAL ENGINEERING                    2        2416     756.0   
CONSTRUCTION SERVICES                            27        5601   18498.0   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES      67        2504    4790.0   
MILITARY TECHNOLOGIES                            74        3801     124.0   
FOOD SCIENCE                                     22        1104       0.0   

                                                   Men     Women  \
Major                                                              
EARLY CHILDHOOD EDUCATION                       1167.0   36422.0   
COMMUNICATION DISORDERS SCIENCES AND SERVICES   1225.0   37054.0   
MEDICAL ASSISTING SERVICES                       803.0   10320.0   
ELEMENTARY EDUCATION                           13029.0  157833.0   
FAMILY AND CONSUMER SCIENCES                    5166.0   52835.0   
...                                                ...       ...   
MINING AND MINERAL ENGINEERING                   679.0      77.0   
CONSTRUCTION SERVICES                          16820.0    1678.0   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES     4419.0     371.0   
MILITARY TECHNOLOGIES                            124.0       0.0   
FOOD SCIENCE                                       0.0       0.0   

                                                                    Major_category  \
Major                                                                                
EARLY CHILDHOOD EDUCATION                                                Education   
COMMUNICATION DISORDERS SCIENCES AND SERVICES                               Health   
MEDICAL ASSISTING SERVICES                                                  Health   
ELEMENTARY EDUCATION                                                     Education   
FAMILY AND CONSUMER SCIENCES                   Industrial Arts & Consumer Services   
...                                                                            ...   
MINING AND MINERAL ENGINEERING                                         Engineering   
CONSTRUCTION SERVICES                          Industrial Arts & Consumer Services   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES                            Engineering   
MILITARY TECHNOLOGIES                          Industrial Arts & Consumer Services   
FOOD SCIENCE                                       Agriculture & Natural Resources   

                                               ShareWomen  Sample_size  \
Major                                                                    
EARLY CHILDHOOD EDUCATION                        0.968954          342   
COMMUNICATION DISORDERS SCIENCES AND SERVICES    0.967998           95   
MEDICAL ASSISTING SERVICES                       0.927807           67   
ELEMENTARY EDUCATION                             0.923745         1629   
FAMILY AND CONSUMER SCIENCES                     0.910933          518   
...                                                   ...          ...   
MINING AND MINERAL ENGINEERING                   0.101852            7   
CONSTRUCTION SERVICES                            0.090713          295   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES      0.077453           71   
MILITARY TECHNOLOGIES                            0.000000            4   
FOOD SCIENCE                                     0.000000           36   

                                               Employed  Full_time  Part_time  \
Major                                                                           
EARLY CHILDHOOD EDUCATION                         32551      27569       7001   
COMMUNICATION DISORDERS SCIENCES AND SERVICES     29763      19975      13862   
MEDICAL ASSISTING SERVICES                         9168       5643       4107   
ELEMENTARY EDUCATION                             149339     123177      37965   
FAMILY AND CONSUMER SCIENCES                      46624      36747      15872   
...                                                 ...        ...        ...   
MINING AND MINERAL ENGINEERING                      640        556        170   
CONSTRUCTION SERVICES                             16318      15690       1751   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES        4186       4175        247   
MILITARY TECHNOLOGIES                                 0        111          0   
FOOD SCIENCE                                       3149       2558       1121   

                                               Full_time_year_round  \
Major                                                                 
EARLY CHILDHOOD EDUCATION                                     20748   
COMMUNICATION DISORDERS SCIENCES AND SERVICES                 14460   
MEDICAL ASSISTING SERVICES                                     4290   
ELEMENTARY EDUCATION                                          86540   
FAMILY AND CONSUMER SCIENCES                                  26906   
...                                                             ...   
MINING AND MINERAL ENGINEERING                                  388   
CONSTRUCTION SERVICES                                         12313   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES                    3607   
MILITARY TECHNOLOGIES                                           111   
FOOD SCIENCE                                                   1735   

                                               Unemployed  Unemployment_rate  \
Major                                                                          
EARLY CHILDHOOD EDUCATION                            1360           0.040105   
COMMUNICATION DISORDERS SCIENCES AND SERVICES        1487           0.047584   
MEDICAL ASSISTING SERVICES                            407           0.042507   
ELEMENTARY EDUCATION                                 7297           0.046586   
FAMILY AND CONSUMER SCIENCES                         3355           0.067128   
...                                                   ...                ...   
MINING AND MINERAL ENGINEERING                         85           0.117241   
CONSTRUCTION SERVICES                                1042           0.060023   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES           250           0.056357   
MILITARY TECHNOLOGIES                                   0           0.000000   
FOOD SCIENCE                                          338           0.096931   

                                               Median  P25th  P75th  \
Major                                                                 
EARLY CHILDHOOD EDUCATION                       28000  21000  35000   
COMMUNICATION DISORDERS SCIENCES AND SERVICES   28000  20000  40000   
MEDICAL ASSISTING SERVICES                      42000  30000  65000   
ELEMENTARY EDUCATION                            32000  23400  38000   
FAMILY AND CONSUMER SCIENCES                    30000  22900  40000   
...                                               ...    ...    ...   
MINING AND MINERAL ENGINEERING                  75000  55000  90000   
CONSTRUCTION SERVICES                           50000  36000  60000   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES     40000  27000  52000   
MILITARY TECHNOLOGIES                           40000  40000  40000   
FOOD SCIENCE                                    53000  32000  70000   

                                               College_jobs  Non_college_jobs  \
Major                                                                           
EARLY CHILDHOOD EDUCATION                             23515              7705   
COMMUNICATION DISORDERS SCIENCES AND SERVICES         19957              9404   
MEDICAL ASSISTING SERVICES                             2091              6948   
ELEMENTARY EDUCATION                                 108085             36972   
FAMILY AND CONSUMER SCIENCES                          20985             20133   
...                                                     ...               ...   
MINING AND MINERAL ENGINEERING                          350               257   
CONSTRUCTION SERVICES                                  3275              5351   
MECHANICAL ENGINEERING RELATED TECHNOLOGIES            1861              2121   
MILITARY TECHNOLOGIES                                     0                 0   
FOOD SCIENCE                                           1183              1274   

                                               Low_wage_jobs  
Major                                                         
EARLY CHILDHOOD EDUCATION                               2868  
COMMUNICATION DISORDERS SCIENCES AND SERVICES           5125  
MEDICAL ASSISTING SERVICES                              1270  
ELEMENTARY EDUCATION                                   11502  
FAMILY AND CONSUMER SCIENCES                            5248  
...                                                      ...  
MINING AND MINERAL ENGINEERING                            50  
CONSTRUCTION SERVICES                                    703  
MECHANICAL ENGINEERING RELATED TECHNOLOGIES              406  
MILITARY TECHNOLOGIES                                      0  
FOOD SCIENCE                                             485  

[173 rows x 20 columns]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
a=df['Median'].groupby(df['Major_category']).sum()
a.plot.bar()
plt.show()

Source: 按照专业分组,并把失业率从低到高升序排列

homework_for_lecture_5

import pandas as pd
url ='https://raw.githubusercontent.com/tidyverse/datascience-box/refs/heads/main/course-materials/lab-instructions/lab-02/data/plastic-waste.csv'
df = pd.read_csv(url)
df
code entity continent year gdp_per_cap plastic_waste_per_cap mismanaged_plastic_waste_per_cap mismanaged_plastic_waste coastal_pop total_pop
0 AFG Afghanistan Asia 2010 1614.255001 NaN NaN NaN NaN 31411743.0
1 ALB Albania Europe 2010 9927.181841 0.069 0.032 29705.0 2530533.0 3204284.0
2 DZA Algeria Africa 2010 12870.602699 0.144 0.086 520555.0 16556580.0 35468208.0
3 ASM American Samoa Oceania 2010 NaN NaN NaN NaN NaN 68420.0
4 AND Andorra Europe 2010 NaN NaN NaN NaN NaN 84864.0
... ... ... ... ... ... ... ... ... ... ...
235 VNM Vietnam Asia 2010 4408.168612 0.103 0.090 1833819.0 55858245.0 87848445.0
236 ESH Western Sahara Africa 2010 NaN NaN NaN NaN NaN 530500.0
237 YEM Yemen Asia 2010 4478.743599 0.103 0.077 169181.0 6048920.0 NaN
238 ZMB Zambia Africa 2010 3279.277161 NaN NaN NaN NaN 13088570.0
239 ZWE Zimbabwe Africa 2010 1474.877128 NaN NaN NaN NaN 12571454.0

240 rows × 10 columns

%pip install ggplot
Note: you may need to restart the kernel to use updated packages.
Collecting ggplot
  Downloading ggplot-0.11.5-py2.py3-none-any.whl.metadata (2.9 kB)
Collecting brewer2mpl (from ggplot)
  Downloading brewer2mpl-1.4.1-py2.py3-none-any.whl.metadata (3.7 kB)
Requirement already satisfied: cycler in c:\users\shuos\anaconda3\lib\site-packages (from ggplot) (0.11.0)
Requirement already satisfied: matplotlib in c:\users\shuos\anaconda3\lib\site-packages (from ggplot) (3.8.4)
Requirement already satisfied: numpy in c:\users\shuos\anaconda3\lib\site-packages (from ggplot) (1.26.4)
Requirement already satisfied: pandas in c:\users\shuos\anaconda3\lib\site-packages (from ggplot) (2.2.2)
Requirement already satisfied: patsy>=0.4 in c:\users\shuos\anaconda3\lib\site-packages (from ggplot) (0.5.6)
Requirement already satisfied: scipy in c:\users\shuos\anaconda3\lib\site-packages (from ggplot) (1.13.1)
Requirement already satisfied: six in c:\users\shuos\anaconda3\lib\site-packages (from ggplot) (1.16.0)
Requirement already satisfied: statsmodels in c:\users\shuos\anaconda3\lib\site-packages (from ggplot) (0.14.2)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\shuos\anaconda3\lib\site-packages (from matplotlib->ggplot) (1.2.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\shuos\anaconda3\lib\site-packages (from matplotlib->ggplot) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\shuos\anaconda3\lib\site-packages (from matplotlib->ggplot) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\shuos\anaconda3\lib\site-packages (from matplotlib->ggplot) (23.2)
Requirement already satisfied: pillow>=8 in c:\users\shuos\anaconda3\lib\site-packages (from matplotlib->ggplot) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\shuos\anaconda3\lib\site-packages (from matplotlib->ggplot) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\shuos\anaconda3\lib\site-packages (from matplotlib->ggplot) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\shuos\anaconda3\lib\site-packages (from pandas->ggplot) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\shuos\anaconda3\lib\site-packages (from pandas->ggplot) (2023.3)
Downloading ggplot-0.11.5-py2.py3-none-any.whl (2.2 MB)
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
    --------------------------------------- 0.0/2.2 MB 146.3 kB/s eta 0:00:15
    --------------------------------------- 0.0/2.2 MB 146.3 kB/s eta 0:00:15
    --------------------------------------- 0.0/2.2 MB 146.3 kB/s eta 0:00:15
    --------------------------------------- 0.0/2.2 MB 146.3 kB/s eta 0:00:15
    --------------------------------------- 0.0/2.2 MB 93.7 kB/s eta 0:00:24
    --------------------------------------- 0.0/2.2 MB 93.7 kB/s eta 0:00:24
   - -------------------------------------- 0.1/2.2 MB 121.3 kB/s eta 0:00:18
   - -------------------------------------- 0.1/2.2 MB 121.3 kB/s eta 0:00:18
   - -------------------------------------- 0.1/2.2 MB 121.3 kB/s eta 0:00:18
   - -------------------------------------- 0.1/2.2 MB 121.3 kB/s eta 0:00:18
   - -------------------------------------- 0.1/2.2 MB 103.4 kB/s eta 0:00:21
   - -------------------------------------- 0.1/2.2 MB 128.0 kB/s eta 0:00:17
   -- ------------------------------------- 0.1/2.2 MB 142.4 kB/s eta 0:00:15
   -- ------------------------------------- 0.1/2.2 MB 150.2 kB/s eta 0:00:14
   -- ------------------------------------- 0.1/2.2 MB 170.4 kB/s eta 0:00:13
   -- ------------------------------------- 0.2/2.2 MB 169.9 kB/s eta 0:00:13
   --- ------------------------------------ 0.2/2.2 MB 184.1 kB/s eta 0:00:12
   --- ------------------------------------ 0.2/2.2 MB 184.1 kB/s eta 0:00:12
   --- ------------------------------------ 0.2/2.2 MB 193.4 kB/s eta 0:00:11
   --- ------------------------------------ 0.2/2.2 MB 191.7 kB/s eta 0:00:11
   ---- ----------------------------------- 0.2/2.2 MB 205.4 kB/s eta 0:00:10
   ---- ----------------------------------- 0.3/2.2 MB 224.6 kB/s eta 0:00:09
   ---- ----------------------------------- 0.3/2.2 MB 224.6 kB/s eta 0:00:09
   ----- ---------------------------------- 0.3/2.2 MB 233.5 kB/s eta 0:00:09
   ----- ---------------------------------- 0.3/2.2 MB 233.5 kB/s eta 0:00:09
   ----- ---------------------------------- 0.3/2.2 MB 233.5 kB/s eta 0:00:09
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 213.2 kB/s eta 0:00:10
   ----- ---------------------------------- 0.3/2.2 MB 155.8 kB/s eta 0:00:13
   ----- ---------------------------------- 0.3/2.2 MB 157.3 kB/s eta 0:00:13
   ----- ---------------------------------- 0.3/2.2 MB 157.3 kB/s eta 0:00:13
   ------ --------------------------------- 0.3/2.2 MB 157.7 kB/s eta 0:00:12
   ------ --------------------------------- 0.4/2.2 MB 168.7 kB/s eta 0:00:11
   ------- -------------------------------- 0.4/2.2 MB 177.9 kB/s eta 0:00:11
   ------- -------------------------------- 0.4/2.2 MB 185.9 kB/s eta 0:00:10
   ------- -------------------------------- 0.4/2.2 MB 185.9 kB/s eta 0:00:10
   ------- -------------------------------- 0.4/2.2 MB 184.7 kB/s eta 0:00:10
   ------- -------------------------------- 0.4/2.2 MB 184.7 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 183.0 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 183.0 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 184.3 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 184.3 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 184.3 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 184.3 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 184.3 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 184.3 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 184.3 kB/s eta 0:00:10
   -------- ------------------------------- 0.5/2.2 MB 168.4 kB/s eta 0:00:11
   --------- ------------------------------ 0.5/2.2 MB 172.8 kB/s eta 0:00:10
   --------- ------------------------------ 0.5/2.2 MB 178.1 kB/s eta 0:00:10
   --------- ------------------------------ 0.5/2.2 MB 178.1 kB/s eta 0:00:10
   ---------- ----------------------------- 0.6/2.2 MB 181.9 kB/s eta 0:00:10
   ---------- ----------------------------- 0.6/2.2 MB 191.1 kB/s eta 0:00:09
   ----------- ---------------------------- 0.6/2.2 MB 196.3 kB/s eta 0:00:09
   ----------- ---------------------------- 0.6/2.2 MB 204.2 kB/s eta 0:00:08
   ------------ --------------------------- 0.7/2.2 MB 208.7 kB/s eta 0:00:08
   ------------ --------------------------- 0.7/2.2 MB 214.2 kB/s eta 0:00:08
   ------------ --------------------------- 0.7/2.2 MB 214.2 kB/s eta 0:00:08
   ------------ --------------------------- 0.7/2.2 MB 217.4 kB/s eta 0:00:07
   ------------ --------------------------- 0.7/2.2 MB 217.4 kB/s eta 0:00:07
   ------------ --------------------------- 0.7/2.2 MB 217.4 kB/s eta 0:00:07
   ------------ --------------------------- 0.7/2.2 MB 217.4 kB/s eta 0:00:07
   ------------- -------------------------- 0.7/2.2 MB 206.6 kB/s eta 0:00:08
   ------------- -------------------------- 0.7/2.2 MB 206.6 kB/s eta 0:00:08
   ------------- -------------------------- 0.8/2.2 MB 210.8 kB/s eta 0:00:07
   -------------- ------------------------- 0.8/2.2 MB 212.8 kB/s eta 0:00:07
   -------------- ------------------------- 0.8/2.2 MB 218.5 kB/s eta 0:00:07
   -------------- ------------------------- 0.8/2.2 MB 218.5 kB/s eta 0:00:07
   --------------- ------------------------ 0.9/2.2 MB 229.2 kB/s eta 0:00:06
   ---------------- ----------------------- 0.9/2.2 MB 229.1 kB/s eta 0:00:06
   ---------------- ----------------------- 0.9/2.2 MB 229.1 kB/s eta 0:00:06
   ---------------- ----------------------- 0.9/2.2 MB 229.1 kB/s eta 0:00:06
   ----------------- ---------------------- 1.0/2.2 MB 238.1 kB/s eta 0:00:06
   ----------------- ---------------------- 1.0/2.2 MB 238.1 kB/s eta 0:00:06
   ----------------- ---------------------- 1.0/2.2 MB 241.1 kB/s eta 0:00:06
   ------------------ --------------------- 1.0/2.2 MB 245.8 kB/s eta 0:00:05
   ------------------- -------------------- 1.1/2.2 MB 255.3 kB/s eta 0:00:05
   ------------------- -------------------- 1.1/2.2 MB 255.3 kB/s eta 0:00:05
   ------------------- -------------------- 1.1/2.2 MB 255.3 kB/s eta 0:00:05
   ------------------- -------------------- 1.1/2.2 MB 255.3 kB/s eta 0:00:05
   -------------------- ------------------- 1.1/2.2 MB 250.4 kB/s eta 0:00:05
   -------------------- ------------------- 1.1/2.2 MB 253.3 kB/s eta 0:00:05
   -------------------- ------------------- 1.1/2.2 MB 253.0 kB/s eta 0:00:05
   -------------------- ------------------- 1.1/2.2 MB 253.0 kB/s eta 0:00:05
   --------------------- ------------------ 1.2/2.2 MB 259.0 kB/s eta 0:00:04
   ---------------------- ----------------- 1.2/2.2 MB 263.0 kB/s eta 0:00:04
   ---------------------- ----------------- 1.2/2.2 MB 265.7 kB/s eta 0:00:04
   ---------------------- ----------------- 1.2/2.2 MB 265.7 kB/s eta 0:00:04
   ----------------------- ---------------- 1.3/2.2 MB 270.4 kB/s eta 0:00:04
   ----------------------- ---------------- 1.3/2.2 MB 270.8 kB/s eta 0:00:04
   ----------------------- ---------------- 1.3/2.2 MB 270.8 kB/s eta 0:00:04
   ------------------------ --------------- 1.4/2.2 MB 276.9 kB/s eta 0:00:04
   ------------------------- -------------- 1.4/2.2 MB 279.7 kB/s eta 0:00:03
   ------------------------- -------------- 1.4/2.2 MB 287.4 kB/s eta 0:00:03
   -------------------------- ------------- 1.5/2.2 MB 288.8 kB/s eta 0:00:03
   -------------------------- ------------- 1.5/2.2 MB 291.3 kB/s eta 0:00:03
   --------------------------- ------------ 1.5/2.2 MB 295.3 kB/s eta 0:00:03
   --------------------------- ------------ 1.5/2.2 MB 297.7 kB/s eta 0:00:03
   --------------------------- ------------ 1.5/2.2 MB 297.0 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 296.5 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 296.5 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 296.9 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 296.9 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 296.9 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 296.9 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 296.9 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 296.9 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 284.5 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 284.5 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 284.5 kB/s eta 0:00:03
   ---------------------------- ----------- 1.6/2.2 MB 284.5 kB/s eta 0:00:03
   ----------------------------- ---------- 1.6/2.2 MB 279.9 kB/s eta 0:00:03
   ----------------------------- ---------- 1.6/2.2 MB 281.1 kB/s eta 0:00:02
   ------------------------------ --------- 1.7/2.2 MB 286.9 kB/s eta 0:00:02
   ------------------------------ --------- 1.7/2.2 MB 286.9 kB/s eta 0:00:02
   ------------------------------ --------- 1.7/2.2 MB 284.8 kB/s eta 0:00:02
   ------------------------------ --------- 1.7/2.2 MB 284.8 kB/s eta 0:00:02
   ------------------------------- -------- 1.7/2.2 MB 283.8 kB/s eta 0:00:02
   ------------------------------- -------- 1.8/2.2 MB 284.2 kB/s eta 0:00:02
   ------------------------------- -------- 1.8/2.2 MB 283.7 kB/s eta 0:00:02
   -------------------------------- ------- 1.8/2.2 MB 285.6 kB/s eta 0:00:02
   -------------------------------- ------- 1.8/2.2 MB 285.6 kB/s eta 0:00:02
   -------------------------------- ------- 1.8/2.2 MB 285.6 kB/s eta 0:00:02
   -------------------------------- ------- 1.8/2.2 MB 285.6 kB/s eta 0:00:02
   --------------------------------- ------ 1.8/2.2 MB 283.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.8/2.2 MB 285.4 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   --------------------------------- ------ 1.9/2.2 MB 285.8 kB/s eta 0:00:02
   ---------------------------------- ----- 1.9/2.2 MB 270.1 kB/s eta 0:00:02
   ---------------------------------- ----- 1.9/2.2 MB 270.1 kB/s eta 0:00:02
   ---------------------------------- ----- 1.9/2.2 MB 270.1 kB/s eta 0:00:02
   ---------------------------------- ----- 1.9/2.2 MB 270.1 kB/s eta 0:00:02
   ---------------------------------- ----- 1.9/2.2 MB 270.1 kB/s eta 0:00:02
   ---------------------------------- ----- 1.9/2.2 MB 262.7 kB/s eta 0:00:02
   ---------------------------------- ----- 1.9/2.2 MB 264.4 kB/s eta 0:00:02
   ----------------------------------- ---- 1.9/2.2 MB 266.9 kB/s eta 0:00:01
   ----------------------------------- ---- 1.9/2.2 MB 266.9 kB/s eta 0:00:01
   ----------------------------------- ---- 1.9/2.2 MB 266.9 kB/s eta 0:00:01
   ----------------------------------- ---- 2.0/2.2 MB 265.8 kB/s eta 0:00:01
   ------------------------------------ --- 2.0/2.2 MB 267.1 kB/s eta 0:00:01
   ------------------------------------ --- 2.0/2.2 MB 266.2 kB/s eta 0:00:01
   ------------------------------------ --- 2.0/2.2 MB 266.2 kB/s eta 0:00:01
   ------------------------------------ --- 2.0/2.2 MB 266.2 kB/s eta 0:00:01
   ------------------------------------ --- 2.0/2.2 MB 266.2 kB/s eta 0:00:01
   ------------------------------------ --- 2.0/2.2 MB 264.0 kB/s eta 0:00:01
   ------------------------------------ --- 2.0/2.2 MB 264.0 kB/s eta 0:00:01
   ------------------------------------- -- 2.0/2.2 MB 262.4 kB/s eta 0:00:01
   ------------------------------------- -- 2.0/2.2 MB 262.4 kB/s eta 0:00:01
   ------------------------------------- -- 2.0/2.2 MB 262.4 kB/s eta 0:00:01
   ------------------------------------- -- 2.0/2.2 MB 262.4 kB/s eta 0:00:01
   ------------------------------------- -- 2.1/2.2 MB 258.0 kB/s eta 0:00:01
   ------------------------------------- -- 2.1/2.2 MB 259.6 kB/s eta 0:00:01
   ------------------------------------- -- 2.1/2.2 MB 258.8 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 260.4 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 243.6 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   -------------------------------------- - 2.1/2.2 MB 226.1 kB/s eta 0:00:01
   ---------------------------------------  2.2/2.2 MB 217.4 kB/s eta 0:00:01
   ---------------------------------------  2.2/2.2 MB 219.1 kB/s eta 0:00:01
   ---------------------------------------- 2.2/2.2 MB 220.0 kB/s eta 0:00:00
Downloading brewer2mpl-1.4.1-py2.py3-none-any.whl (24 kB)
Installing collected packages: brewer2mpl, ggplot
Successfully installed brewer2mpl-1.4.1 ggplot-0.11.5
import pandas as pd
from lets_plot import *

LetsPlot.setup_html()
(
    ggplot(df, aes("continent", "plastic_waste_per_cap")) +
    geom_histogram()
)
(
    ggplot(df, aes("continent", "plastic_waste_per_cap"))
    + geom_boxplot()
)
import seaborn as sns
import matplotlib.pyplot as plt

# 利用violinplot函数绘制小提琴图
sns.violinplot(x=df["continent"], y=df["plastic_waste_per_cap"])

plt.show()

(
    ggplot(df, aes("plastic_waste_per_cap", "mismanaged_plastic_waste_per_cap")) +
  geom_point()
)
(
    ggplot(df, aes("plastic_waste_per_cap", "mismanaged_plastic_waste_per_cap", colour="continent")) +
  geom_point()
)
(
ggplot(df, aes(x="total_pop", y="plastic_waste_per_cap", colour="continent")) +
  geom_point() +
  xlim(0, 150000000) +
  ylim(0, 0.8)
)
(
ggplot(df, aes(x="coastal_pop", y="plastic_waste_per_cap", colour="continent")) +
  geom_point() +
  xlim(0, 50000000) +
  ylim(0, 0.8)
)
Source: 利用violinplot函数绘制小提琴图