import pandas as pd
= pd.read_csv('all-ages.csv') df
In [12]:
In [13]:
df
Major_code | Major | Major_category | Total | Employed | Employed_full_time_year_round | Unemployed | Unemployment_rate | Median | P25th | P75th | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1100 | GENERAL AGRICULTURE | Agriculture & Natural Resources | 128148 | 90245 | 74078 | 2423 | 0.026147 | 50000 | 34000 | 80000.0 |
1 | 1101 | AGRICULTURE PRODUCTION AND MANAGEMENT | Agriculture & Natural Resources | 95326 | 76865 | 64240 | 2266 | 0.028636 | 54000 | 36000 | 80000.0 |
2 | 1102 | AGRICULTURAL ECONOMICS | Agriculture & Natural Resources | 33955 | 26321 | 22810 | 821 | 0.030248 | 63000 | 40000 | 98000.0 |
3 | 1103 | ANIMAL SCIENCES | Agriculture & Natural Resources | 103549 | 81177 | 64937 | 3619 | 0.042679 | 46000 | 30000 | 72000.0 |
4 | 1104 | FOOD SCIENCE | Agriculture & Natural Resources | 24280 | 17281 | 12722 | 894 | 0.049188 | 62000 | 38500 | 90000.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
168 | 6211 | HOSPITALITY MANAGEMENT | Business | 200854 | 163393 | 122499 | 8862 | 0.051447 | 49000 | 33000 | 70000.0 |
169 | 6212 | MANAGEMENT INFORMATION SYSTEMS AND STATISTICS | Business | 156673 | 134478 | 118249 | 6186 | 0.043977 | 72000 | 50000 | 100000.0 |
170 | 6299 | MISCELLANEOUS BUSINESS & MEDICAL ADMINISTRATION | Business | 102753 | 77471 | 61603 | 4308 | 0.052679 | 53000 | 36000 | 83000.0 |
171 | 6402 | HISTORY | Humanities & Liberal Arts | 712509 | 478416 | 354163 | 33725 | 0.065851 | 50000 | 35000 | 80000.0 |
172 | 6403 | UNITED STATES HISTORY | Humanities & Liberal Arts | 17746 | 11887 | 8204 | 943 | 0.073500 | 50000 | 39000 | 81000.0 |
173 rows × 11 columns
In [14]:
# 按照专业分组,并把失业率从低到高升序排列
= df.groupby(["Major"]).sum().sort_values(["Unemployment_rate"])
result print(result)
Major_code \
Major
GEOLOGICAL AND GEOPHYSICAL ENGINEERING 2411
EDUCATIONAL ADMINISTRATION AND SUPERVISION 2301
PHARMACOLOGY 3607
MATERIALS SCIENCE 5008
MATHEMATICS AND COMPUTER SCIENCE 4005
... ...
LIBRARY SCIENCE 3501
SCHOOL STUDENT COUNSELING 2303
MILITARY TECHNOLOGIES 3801
CLINICAL PSYCHOLOGY 5202
MISCELLANEOUS FINE ARTS 6099
Major_category \
Major
GEOLOGICAL AND GEOPHYSICAL ENGINEERING Engineering
EDUCATIONAL ADMINISTRATION AND SUPERVISION Education
PHARMACOLOGY Biology & Life Science
MATERIALS SCIENCE Engineering
MATHEMATICS AND COMPUTER SCIENCE Computers & Mathematics
... ...
LIBRARY SCIENCE Education
SCHOOL STUDENT COUNSELING Education
MILITARY TECHNOLOGIES Industrial Arts & Consumer Services
CLINICAL PSYCHOLOGY Psychology & Social Work
MISCELLANEOUS FINE ARTS Arts
Total Employed \
Major
GEOLOGICAL AND GEOPHYSICAL ENGINEERING 6264 4120
EDUCATIONAL ADMINISTRATION AND SUPERVISION 4037 3113
PHARMACOLOGY 5015 3481
MATERIALS SCIENCE 7208 5866
MATHEMATICS AND COMPUTER SCIENCE 7184 5874
... ... ...
LIBRARY SCIENCE 16193 7091
SCHOOL STUDENT COUNSELING 2396 1492
MILITARY TECHNOLOGIES 4315 1650
CLINICAL PSYCHOLOGY 7638 5128
MISCELLANEOUS FINE ARTS 8511 6431
Employed_full_time_year_round \
Major
GEOLOGICAL AND GEOPHYSICAL ENGINEERING 3350
EDUCATIONAL ADMINISTRATION AND SUPERVISION 2468
PHARMACOLOGY 2579
MATERIALS SCIENCE 4505
MATHEMATICS AND COMPUTER SCIENCE 5039
... ...
LIBRARY SCIENCE 4330
SCHOOL STUDENT COUNSELING 1093
MILITARY TECHNOLOGIES 1708
CLINICAL PSYCHOLOGY 3297
MISCELLANEOUS FINE ARTS 3802
Unemployed Unemployment_rate \
Major
GEOLOGICAL AND GEOPHYSICAL ENGINEERING 0 0.000000
EDUCATIONAL ADMINISTRATION AND SUPERVISION 0 0.000000
PHARMACOLOGY 57 0.016111
MATERIALS SCIENCE 134 0.022333
MATHEMATICS AND COMPUTER SCIENCE 150 0.024900
... ... ...
LIBRARY SCIENCE 743 0.094843
SCHOOL STUDENT COUNSELING 169 0.101746
MILITARY TECHNOLOGIES 187 0.101796
CLINICAL PSYCHOLOGY 587 0.102712
MISCELLANEOUS FINE ARTS 1190 0.156147
Median P25th P75th
Major
GEOLOGICAL AND GEOPHYSICAL ENGINEERING 85000 55000 125000.0
EDUCATIONAL ADMINISTRATION AND SUPERVISION 58000 44750 79000.0
PHARMACOLOGY 60000 35000 105000.0
MATERIALS SCIENCE 75000 60000 100000.0
MATHEMATICS AND COMPUTER SCIENCE 92000 53000 136000.0
... ... ... ...
LIBRARY SCIENCE 40000 30000 55000.0
SCHOOL STUDENT COUNSELING 41000 33200 50000.0
MILITARY TECHNOLOGIES 64000 39750 90000.0
CLINICAL PSYCHOLOGY 45000 26100 62000.0
MISCELLANEOUS FINE ARTS 45000 30000 60000.0
[173 rows x 10 columns]
In [20]:
import pandas as pd
= pd.read_csv('recent-grads.csv')
df df
Rank | Major_code | Major | Total | Men | Women | Major_category | ShareWomen | Sample_size | Employed | ... | Part_time | Full_time_year_round | Unemployed | Unemployment_rate | Median | P25th | P75th | College_jobs | Non_college_jobs | Low_wage_jobs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2419 | PETROLEUM ENGINEERING | 2339.0 | 2057.0 | 282.0 | Engineering | 0.120564 | 36 | 1976 | ... | 270 | 1207 | 37 | 0.018381 | 110000 | 95000 | 125000 | 1534 | 364 | 193 |
1 | 2 | 2416 | MINING AND MINERAL ENGINEERING | 756.0 | 679.0 | 77.0 | Engineering | 0.101852 | 7 | 640 | ... | 170 | 388 | 85 | 0.117241 | 75000 | 55000 | 90000 | 350 | 257 | 50 |
2 | 3 | 2415 | METALLURGICAL ENGINEERING | 856.0 | 725.0 | 131.0 | Engineering | 0.153037 | 3 | 648 | ... | 133 | 340 | 16 | 0.024096 | 73000 | 50000 | 105000 | 456 | 176 | 0 |
3 | 4 | 2417 | NAVAL ARCHITECTURE AND MARINE ENGINEERING | 1258.0 | 1123.0 | 135.0 | Engineering | 0.107313 | 16 | 758 | ... | 150 | 692 | 40 | 0.050125 | 70000 | 43000 | 80000 | 529 | 102 | 0 |
4 | 5 | 2405 | CHEMICAL ENGINEERING | 32260.0 | 21239.0 | 11021.0 | Engineering | 0.341631 | 289 | 25694 | ... | 5180 | 16697 | 1672 | 0.061098 | 65000 | 50000 | 75000 | 18314 | 4440 | 972 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
168 | 169 | 3609 | ZOOLOGY | 8409.0 | 3050.0 | 5359.0 | Biology & Life Science | 0.637293 | 47 | 6259 | ... | 2190 | 3602 | 304 | 0.046320 | 26000 | 20000 | 39000 | 2771 | 2947 | 743 |
169 | 170 | 5201 | EDUCATIONAL PSYCHOLOGY | 2854.0 | 522.0 | 2332.0 | Psychology & Social Work | 0.817099 | 7 | 2125 | ... | 572 | 1211 | 148 | 0.065112 | 25000 | 24000 | 34000 | 1488 | 615 | 82 |
170 | 171 | 5202 | CLINICAL PSYCHOLOGY | 2838.0 | 568.0 | 2270.0 | Psychology & Social Work | 0.799859 | 13 | 2101 | ... | 648 | 1293 | 368 | 0.149048 | 25000 | 25000 | 40000 | 986 | 870 | 622 |
171 | 172 | 5203 | COUNSELING PSYCHOLOGY | 4626.0 | 931.0 | 3695.0 | Psychology & Social Work | 0.798746 | 21 | 3777 | ... | 965 | 2738 | 214 | 0.053621 | 23400 | 19200 | 26000 | 2403 | 1245 | 308 |
172 | 173 | 3501 | LIBRARY SCIENCE | 1098.0 | 134.0 | 964.0 | Education | 0.877960 | 2 | 742 | ... | 237 | 410 | 87 | 0.104946 | 22000 | 20000 | 22000 | 288 | 338 | 192 |
173 rows × 21 columns
In [21]:
# 按照专业分组,将女生占比从高到低降序排列
= df.groupby(["Major"]).sum().sort_values(["ShareWomen"],ascending=False)
result print(result)
Rank Major_code Total \
Major
EARLY CHILDHOOD EDUCATION 165 2307 37589.0
COMMUNICATION DISORDERS SCIENCES AND SERVICES 164 6102 38279.0
MEDICAL ASSISTING SERVICES 52 6104 11123.0
ELEMENTARY EDUCATION 139 2304 170862.0
FAMILY AND CONSUMER SCIENCES 151 2901 58001.0
... ... ... ...
MINING AND MINERAL ENGINEERING 2 2416 756.0
CONSTRUCTION SERVICES 27 5601 18498.0
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 67 2504 4790.0
MILITARY TECHNOLOGIES 74 3801 124.0
FOOD SCIENCE 22 1104 0.0
Men Women \
Major
EARLY CHILDHOOD EDUCATION 1167.0 36422.0
COMMUNICATION DISORDERS SCIENCES AND SERVICES 1225.0 37054.0
MEDICAL ASSISTING SERVICES 803.0 10320.0
ELEMENTARY EDUCATION 13029.0 157833.0
FAMILY AND CONSUMER SCIENCES 5166.0 52835.0
... ... ...
MINING AND MINERAL ENGINEERING 679.0 77.0
CONSTRUCTION SERVICES 16820.0 1678.0
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 4419.0 371.0
MILITARY TECHNOLOGIES 124.0 0.0
FOOD SCIENCE 0.0 0.0
Major_category \
Major
EARLY CHILDHOOD EDUCATION Education
COMMUNICATION DISORDERS SCIENCES AND SERVICES Health
MEDICAL ASSISTING SERVICES Health
ELEMENTARY EDUCATION Education
FAMILY AND CONSUMER SCIENCES Industrial Arts & Consumer Services
... ...
MINING AND MINERAL ENGINEERING Engineering
CONSTRUCTION SERVICES Industrial Arts & Consumer Services
MECHANICAL ENGINEERING RELATED TECHNOLOGIES Engineering
MILITARY TECHNOLOGIES Industrial Arts & Consumer Services
FOOD SCIENCE Agriculture & Natural Resources
ShareWomen Sample_size \
Major
EARLY CHILDHOOD EDUCATION 0.968954 342
COMMUNICATION DISORDERS SCIENCES AND SERVICES 0.967998 95
MEDICAL ASSISTING SERVICES 0.927807 67
ELEMENTARY EDUCATION 0.923745 1629
FAMILY AND CONSUMER SCIENCES 0.910933 518
... ... ...
MINING AND MINERAL ENGINEERING 0.101852 7
CONSTRUCTION SERVICES 0.090713 295
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 0.077453 71
MILITARY TECHNOLOGIES 0.000000 4
FOOD SCIENCE 0.000000 36
Employed Full_time Part_time \
Major
EARLY CHILDHOOD EDUCATION 32551 27569 7001
COMMUNICATION DISORDERS SCIENCES AND SERVICES 29763 19975 13862
MEDICAL ASSISTING SERVICES 9168 5643 4107
ELEMENTARY EDUCATION 149339 123177 37965
FAMILY AND CONSUMER SCIENCES 46624 36747 15872
... ... ... ...
MINING AND MINERAL ENGINEERING 640 556 170
CONSTRUCTION SERVICES 16318 15690 1751
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 4186 4175 247
MILITARY TECHNOLOGIES 0 111 0
FOOD SCIENCE 3149 2558 1121
Full_time_year_round \
Major
EARLY CHILDHOOD EDUCATION 20748
COMMUNICATION DISORDERS SCIENCES AND SERVICES 14460
MEDICAL ASSISTING SERVICES 4290
ELEMENTARY EDUCATION 86540
FAMILY AND CONSUMER SCIENCES 26906
... ...
MINING AND MINERAL ENGINEERING 388
CONSTRUCTION SERVICES 12313
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 3607
MILITARY TECHNOLOGIES 111
FOOD SCIENCE 1735
Unemployed Unemployment_rate \
Major
EARLY CHILDHOOD EDUCATION 1360 0.040105
COMMUNICATION DISORDERS SCIENCES AND SERVICES 1487 0.047584
MEDICAL ASSISTING SERVICES 407 0.042507
ELEMENTARY EDUCATION 7297 0.046586
FAMILY AND CONSUMER SCIENCES 3355 0.067128
... ... ...
MINING AND MINERAL ENGINEERING 85 0.117241
CONSTRUCTION SERVICES 1042 0.060023
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 250 0.056357
MILITARY TECHNOLOGIES 0 0.000000
FOOD SCIENCE 338 0.096931
Median P25th P75th \
Major
EARLY CHILDHOOD EDUCATION 28000 21000 35000
COMMUNICATION DISORDERS SCIENCES AND SERVICES 28000 20000 40000
MEDICAL ASSISTING SERVICES 42000 30000 65000
ELEMENTARY EDUCATION 32000 23400 38000
FAMILY AND CONSUMER SCIENCES 30000 22900 40000
... ... ... ...
MINING AND MINERAL ENGINEERING 75000 55000 90000
CONSTRUCTION SERVICES 50000 36000 60000
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 40000 27000 52000
MILITARY TECHNOLOGIES 40000 40000 40000
FOOD SCIENCE 53000 32000 70000
College_jobs Non_college_jobs \
Major
EARLY CHILDHOOD EDUCATION 23515 7705
COMMUNICATION DISORDERS SCIENCES AND SERVICES 19957 9404
MEDICAL ASSISTING SERVICES 2091 6948
ELEMENTARY EDUCATION 108085 36972
FAMILY AND CONSUMER SCIENCES 20985 20133
... ... ...
MINING AND MINERAL ENGINEERING 350 257
CONSTRUCTION SERVICES 3275 5351
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 1861 2121
MILITARY TECHNOLOGIES 0 0
FOOD SCIENCE 1183 1274
Low_wage_jobs
Major
EARLY CHILDHOOD EDUCATION 2868
COMMUNICATION DISORDERS SCIENCES AND SERVICES 5125
MEDICAL ASSISTING SERVICES 1270
ELEMENTARY EDUCATION 11502
FAMILY AND CONSUMER SCIENCES 5248
... ...
MINING AND MINERAL ENGINEERING 50
CONSTRUCTION SERVICES 703
MECHANICAL ENGINEERING RELATED TECHNOLOGIES 406
MILITARY TECHNOLOGIES 0
FOOD SCIENCE 485
[173 rows x 20 columns]
In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
=df['Median'].groupby(df['Major_category']).sum()
a
a.plot.bar() plt.show()