import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
font = FontProperties(fname="../../simhei.ttf")
GDP = pd.read_csv("../datasets/Chinas GDP in Province Zh.csv",index_col=0)
GDP_new = GDP.sort_index().T
GDP_new
1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | ... | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
北京市 | 710.2 | 888.9 | 1149.8 | 1516.2 | 1819.4 | 2118.1 | 2439.1 | 2759.8 | 3277.8 | 3861.5 | ... | 17188.8 | 19024.7 | 21134.6 | 22926.0 | 24779.1 | 27041.2 | 29883.0 | 33106.0 | 35445.1 | 36102.6 |
天津市 | 411.0 | 538.9 | 732.9 | 932.0 | 1121.9 | 1264.6 | 1344.7 | 1435.6 | 1591.7 | 1756.9 | ... | 8112.5 | 9043.0 | 9945.4 | 10640.6 | 10879.5 | 11477.2 | 12450.6 | 13362.9 | 14055.5 | 14083.7 |
河北省 | 1278.5 | 1620.8 | 2114.5 | 2701.2 | 3198.0 | 3652.1 | 3924.5 | 4158.9 | 4628.2 | 5062.9 | ... | 21384.7 | 23077.5 | 24259.6 | 25208.9 | 26398.4 | 28474.1 | 30640.8 | 32494.6 | 34978.6 | 36206.9 |
山西省 | 551.1 | 680.4 | 826.7 | 1076.0 | 1292.1 | 1476.0 | 1611.1 | 1667.1 | 1845.7 | 2029.5 | ... | 10894.4 | 11683.1 | 11987.2 | 12094.7 | 11836.4 | 11946.4 | 14484.3 | 15958.1 | 16961.6 | 17651.9 |
内蒙古自治区 | 421.7 | 537.8 | 695.1 | 857.1 | 1023.1 | 1153.5 | 1262.5 | 1379.3 | 1539.1 | 1713.8 | ... | 9458.1 | 10470.1 | 11392.4 | 12158.2 | 12949.0 | 13789.3 | 14898.1 | 16140.8 | 17212.5 | 17359.8 |
辽宁省 | 1473.0 | 2010.8 | 2461.8 | 2793.4 | 3157.7 | 3582.5 | 3881.7 | 4171.7 | 4669.1 | 5033.1 | ... | 16354.9 | 17848.6 | 19208.8 | 20025.7 | 20210.3 | 20392.5 | 21693.0 | 23510.5 | 24855.3 | 25115.0 |
吉林省 | 558.1 | 718.6 | 937.7 | 1137.2 | 1346.8 | 1464.3 | 1577.1 | 1673.0 | 1751.4 | 1900.9 | ... | 7734.6 | 8678.0 | 9427.9 | 9966.5 | 10018.0 | 10427.0 | 10922.0 | 11253.8 | 11726.8 | 12311.3 |
黑龙江省 | 857.4 | 1075.3 | 1448.1 | 1790.2 | 2137.6 | 2397.6 | 2470.2 | 2536.9 | 2855.5 | 3043.4 | ... | 9935.0 | 11015.8 | 11849.1 | 12170.8 | 11690.0 | 11895.0 | 12313.0 | 12846.5 | 13544.4 | 13698.5 |
上海市 | 1114.3 | 1519.2 | 1990.9 | 2518.1 | 2980.8 | 3465.3 | 3831.0 | 4222.3 | 4812.2 | 5257.7 | ... | 20009.7 | 21305.6 | 23204.1 | 25269.8 | 26887.0 | 29887.0 | 32925.0 | 36011.8 | 37987.6 | 38700.6 |
江苏省 | 2136.0 | 2998.2 | 4057.4 | 5155.3 | 6004.2 | 6680.3 | 7200.0 | 7697.8 | 8553.7 | 9456.8 | ... | 48839.2 | 53701.9 | 59349.4 | 64830.5 | 71255.9 | 77350.9 | 85869.8 | 93207.6 | 98656.8 | 102719.0 |
浙江省 | 1375.7 | 1925.9 | 2689.3 | 3563.9 | 4195.8 | 4695.9 | 5065.5 | 5461.3 | 6164.8 | 6927.7 | ... | 31854.8 | 34382.4 | 37334.6 | 40023.5 | 43507.7 | 47254.0 | 52403.1 | 58002.8 | 62462.0 | 64613.3 |
安徽省 | 827.0 | 1073.5 | 1378.9 | 1891.2 | 2199.7 | 2485.4 | 2711.7 | 2907.8 | 3125.3 | 3502.8 | ... | 16284.9 | 18341.7 | 20584.0 | 22519.7 | 23831.2 | 26307.7 | 29676.2 | 34010.9 | 36845.5 | 38680.6 |
福建省 | 784.7 | 1114.2 | 1644.4 | 2094.9 | 2484.3 | 2870.9 | 3159.9 | 3414.2 | 3764.5 | 4072.9 | ... | 17917.7 | 20190.7 | 22503.8 | 24942.1 | 26819.5 | 29609.4 | 33842.4 | 38687.8 | 42326.6 | 43903.9 |
江西省 | 572.6 | 723.0 | 948.2 | 1169.7 | 1409.7 | 1605.8 | 1719.9 | 1853.7 | 2003.1 | 2175.7 | ... | 11584.5 | 12807.7 | 14300.2 | 15667.8 | 16780.9 | 18388.6 | 20210.8 | 22716.5 | 24667.3 | 25691.5 |
山东省 | 2196.5 | 2770.4 | 3844.5 | 4953.4 | 5883.8 | 6537.1 | 7021.4 | 7493.8 | 8278.1 | 9076.2 | ... | 39064.9 | 42957.3 | 47344.3 | 50774.8 | 55288.8 | 58762.5 | 63012.1 | 66648.9 | 70540.5 | 73129.0 |
河南省 | 1279.8 | 1660.2 | 2216.8 | 2988.4 | 3634.7 | 4041.1 | 4308.2 | 4517.9 | 5053.0 | 5533.0 | ... | 26318.7 | 28961.9 | 31632.5 | 34574.8 | 37084.1 | 40249.3 | 44824.9 | 49935.9 | 53717.8 | 54997.1 |
湖北省 | 1088.4 | 1325.8 | 1700.9 | 2109.4 | 2499.8 | 2856.5 | 3114.0 | 3229.3 | 3545.4 | 3880.5 | ... | 19942.5 | 22590.9 | 25378.0 | 28242.1 | 30344.0 | 33353.0 | 37235.0 | 42022.0 | 45429.0 | 43443.5 |
湖南省 | 987.0 | 1244.7 | 1650.0 | 2132.1 | 2540.1 | 2849.3 | 3025.5 | 3214.5 | 3551.5 | 3831.9 | ... | 18915.0 | 21207.2 | 23545.2 | 25881.3 | 28538.6 | 30853.5 | 33828.1 | 36329.7 | 39894.1 | 41781.5 |
广东省 | 2447.5 | 3469.3 | 4619.0 | 5940.3 | 6848.2 | 7793.0 | 8555.3 | 9289.6 | 10810.2 | 12126.6 | ... | 53072.8 | 57007.7 | 62503.4 | 68173.0 | 74732.4 | 82163.2 | 91648.7 | 99945.2 | 107986.9 | 110760.9 |
广西壮族自治区 | 646.6 | 871.7 | 1198.3 | 1497.6 | 1697.9 | 1817.3 | 1911.3 | 1971.4 | 2080.0 | 2279.3 | ... | 10299.9 | 11303.6 | 12448.4 | 13587.8 | 14797.8 | 16116.6 | 17790.7 | 19627.8 | 21237.1 | 22156.7 |
海南省 | 184.9 | 260.4 | 332.0 | 363.3 | 389.7 | 411.2 | 442.1 | 476.7 | 526.8 | 579.2 | ... | 2463.8 | 2789.4 | 3115.9 | 3449.0 | 3734.2 | 4090.2 | 4497.5 | 4910.7 | 5330.8 | 5532.4 |
重庆市 | 462.5 | 611.1 | 838.1 | 1130.6 | 1326.4 | 1525.3 | 1622.4 | 1687.8 | 1822.1 | 2014.6 | ... | 10161.2 | 11595.4 | 13027.6 | 14623.8 | 16040.5 | 18023.0 | 20066.3 | 21588.8 | 23605.8 | 25002.8 |
四川省 | 1177.3 | 1486.1 | 2001.4 | 2443.2 | 2871.7 | 3241.5 | 3474.1 | 3649.1 | 3928.2 | 4293.5 | ... | 21050.9 | 23922.4 | 26518.0 | 28891.3 | 30342.0 | 33138.5 | 37905.1 | 42902.1 | 46363.8 | 48598.8 |
贵州省 | 339.9 | 417.7 | 524.5 | 636.2 | 723.2 | 805.8 | 858.4 | 937.5 | 1029.9 | 1133.3 | ... | 5615.6 | 6742.2 | 7973.1 | 9173.1 | 10541.0 | 11792.4 | 13605.4 | 15353.2 | 16769.3 | 17826.6 |
云南省 | 618.7 | 783.3 | 983.8 | 1222.2 | 1517.7 | 1676.2 | 1831.3 | 1899.8 | 2030.1 | 2159.0 | ... | 9523.1 | 11097.4 | 12825.5 | 14041.7 | 14960.0 | 16369.0 | 18486.0 | 20880.6 | 23223.8 | 24521.9 |
西藏自治区 | 33.3 | 37.4 | 46.0 | 56.1 | 65.0 | 77.2 | 91.5 | 106.0 | 117.8 | 139.2 | ... | 611.5 | 710.2 | 828.2 | 939.7 | 1043.0 | 1173.0 | 1349.0 | 1548.4 | 1697.8 | 1902.7 |
陕西省 | 531.6 | 678.2 | 839.0 | 1036.9 | 1215.8 | 1363.6 | 1458.4 | 1592.6 | 1804.0 | 2010.6 | ... | 12175.1 | 14142.4 | 15905.4 | 17402.5 | 17898.8 | 19045.8 | 21473.5 | 23941.9 | 25793.2 | 26181.9 |
甘肃省 | 317.8 | 372.2 | 453.6 | 557.8 | 722.5 | 793.6 | 887.7 | 956.3 | 1052.9 | 1125.4 | ... | 4816.9 | 5393.1 | 6014.5 | 6518.4 | 6556.6 | 6907.9 | 7336.7 | 8104.1 | 8718.3 | 9016.7 |
青海省 | 87.5 | 109.7 | 138.4 | 167.8 | 184.2 | 202.8 | 220.9 | 239.4 | 263.7 | 300.1 | ... | 1370.4 | 1528.5 | 1713.3 | 1847.7 | 2011.0 | 2258.2 | 2465.1 | 2748.0 | 2941.1 | 3005.9 |
宁夏回族自治区 | 83.1 | 104.5 | 136.3 | 175.2 | 202.9 | 224.6 | 245.5 | 264.6 | 295.0 | 337.4 | ... | 1931.8 | 2131.0 | 2327.7 | 2473.9 | 2579.4 | 2781.4 | 3200.3 | 3510.2 | 3748.5 | 3920.6 |
新疆维吾尔自治区 | 402.3 | 495.3 | 662.3 | 814.9 | 900.9 | 1039.8 | 1107.0 | 1163.2 | 1363.6 | 1491.6 | ... | 6532.0 | 7411.8 | 8392.6 | 9264.5 | 9306.9 | 9630.8 | 11159.9 | 12809.4 | 13597.1 | 13797.6 |
31 rows × 29 columns
GDP_TMP =GDP_new.stack().reset_index().rename(columns={"level_0":"地区","level_1":"年份",0:"GDP"})
GDP_TMP
地区 | 年份 | GDP | |
---|---|---|---|
0 | 北京市 | 1992 | 710.2 |
1 | 北京市 | 1993 | 888.9 |
2 | 北京市 | 1994 | 1149.8 |
3 | 北京市 | 1995 | 1516.2 |
4 | 北京市 | 1996 | 1819.4 |
... | ... | ... | ... |
894 | 新疆维吾尔自治区 | 2016 | 9630.8 |
895 | 新疆维吾尔自治区 | 2017 | 11159.9 |
896 | 新疆维吾尔自治区 | 2018 | 12809.4 |
897 | 新疆维吾尔自治区 | 2019 | 13597.1 |
898 | 新疆维吾尔自治区 | 2020 | 13797.6 |
899 rows × 3 columns
# 柱形图
plt.figure(figsize=(20,8))
sns.set(font=font.get_name())
sns.barplot(data=GDP_TMP[(GDP_TMP["年份"]==1992) | (GDP_TMP["年份"]==1993)],x="地区",y="GDP",hue="年份")
plt.xticks(rotation=40)
plt.show()
iris = pd.read_csv("../datasets/iris.data.txt")
iris
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
150 rows × 5 columns
# 散点图
plt.figure(figsize=(8,6))
sns.scatterplot(data=iris,x="petal_width",y="petal_length",hue="species",style="species")
plt.show()
titanic = pd.read_csv("../datasets/taitanic_train.csv")
titanic.dropna(subset=["Age"],inplace=True)
sns.boxplot(data=titanic,y="Age",showmeans=True)
<AxesSubplot:ylabel='Age'>
sec_house = pd.read_csv("../datasets/house.csv")
sec_house_qu = sec_house.groupby("区")[["价格","均价"]].mean()
sec_house_qu["均价"] = sec_house_qu["均价"]/10000
sec_house_qu
价格 | 均价 | |
---|---|---|
区 | ||
嘉定区 | 298.100956 | 3.346117 |
奉贤区 | 244.201167 | 2.349471 |
宝山区 | 392.535022 | 4.403582 |
徐汇区 | 661.666783 | 7.670740 |
普陀区 | 522.432921 | 6.090767 |
杨浦区 | 478.377571 | 6.364288 |
松江区 | 517.619973 | 3.798396 |
浦东新区 | 573.529333 | 5.670892 |
虹口区 | 570.579395 | 6.647432 |
金山区 | 206.206807 | 2.029167 |
长宁区 | 733.222088 | 7.141097 |
闵行区 | 521.933185 | 5.112870 |
青浦区 | 685.365884 | 3.667959 |
静安区 | 698.138963 | 7.651275 |
黄浦区 | 1255.955154 | 9.825114 |
plt.figure(figsize=(10,8))
sns.boxplot(data=sec_house,x="区",y="均价",order=sec_house_qu.index,showmeans=True)
sns.set(font=font.get_name())
plt.xticks(rotation=40)
plt.show()
titanic_age = titanic[["Sex","Age"]]
titanic_age
Sex | Age | |
---|---|---|
0 | male | 22.0 |
1 | female | 38.0 |
2 | female | 26.0 |
3 | female | 35.0 |
4 | male | 35.0 |
... | ... | ... |
885 | female | 39.0 |
886 | male | 27.0 |
887 | female | 19.0 |
889 | male | 26.0 |
890 | male | 32.0 |
714 rows × 2 columns
from scipy.stats import norm
sns.distplot(titanic_age[titanic_age["Sex"]=="male"]["Age"],kde=True,bins=30,)
sns.distplot(titanic_age[titanic_age["Sex"]=="male"]["Age"],fit=norm,bins=30,)
C:\Users\Administrator\Envs\py3.6.5_tf2\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) C:\Users\Administrator\Envs\py3.6.5_tf2\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
<AxesSubplot:xlabel='Age', ylabel='Density'>
sns.displot(titanic_age,kde=True,bins=30,x="Age",hue="Sex")
plt.show()
GDP_TMP
地区 | 年份 | GDP | |
---|---|---|---|
0 | 北京市 | 1992 | 710.2 |
1 | 北京市 | 1993 | 888.9 |
2 | 北京市 | 1994 | 1149.8 |
3 | 北京市 | 1995 | 1516.2 |
4 | 北京市 | 1996 | 1819.4 |
... | ... | ... | ... |
894 | 新疆维吾尔自治区 | 2016 | 9630.8 |
895 | 新疆维吾尔自治区 | 2017 | 11159.9 |
896 | 新疆维吾尔自治区 | 2018 | 12809.4 |
897 | 新疆维吾尔自治区 | 2019 | 13597.1 |
898 | 新疆维吾尔自治区 | 2020 | 13797.6 |
899 rows × 3 columns
plt.figure(figsize=(18,20))
sns.lineplot(x="年份",y="GDP",data=GDP_TMP,hue="地区")
<AxesSubplot:xlabel='年份', ylabel='GDP'>
iris
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
150 rows × 5 columns
sns.lmplot(data=iris,x="petal_width",y="petal_length",hue="species",col="species",fit_reg=True)
sns.set(font=font.get_name())
plt.show()
titanic
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
885 | 886 | 0 | 3 | Rice, Mrs. William (Margaret Norton) | female | 39.0 | 0 | 5 | 382652 | 29.1250 | NaN | Q |
886 | 887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
887 | 888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
890 | 891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
714 rows × 12 columns
sns.countplot(data=titanic,x="Pclass",hue="Sex")
<AxesSubplot:xlabel='Pclass', ylabel='count'>