In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [2]:
from matplotlib.font_manager import FontProperties
In [3]:
font = FontProperties(fname="../../simhei.ttf")
In [4]:
GDP = pd.read_csv("../datasets/Chinas GDP in Province Zh.csv",index_col=0)
In [5]:
GDP
Out[5]:
北京市 天津市 河北省 山西省 内蒙古自治区 辽宁省 吉林省 黑龙江省 上海市 江苏省 ... 重庆市 四川省 贵州省 云南省 西藏自治区 陕西省 甘肃省 青海省 宁夏回族自治区 新疆维吾尔自治区
2020 36102.6 14083.7 36206.9 17651.9 17359.8 25115.0 12311.3 13698.5 38700.6 102719.0 ... 25002.8 48598.8 17826.6 24521.9 1902.7 26181.9 9016.7 3005.9 3920.6 13797.6
2019 35445.1 14055.5 34978.6 16961.6 17212.5 24855.3 11726.8 13544.4 37987.6 98656.8 ... 23605.8 46363.8 16769.3 23223.8 1697.8 25793.2 8718.3 2941.1 3748.5 13597.1
2018 33106.0 13362.9 32494.6 15958.1 16140.8 23510.5 11253.8 12846.5 36011.8 93207.6 ... 21588.8 42902.1 15353.2 20880.6 1548.4 23941.9 8104.1 2748.0 3510.2 12809.4
2017 29883.0 12450.6 30640.8 14484.3 14898.1 21693.0 10922.0 12313.0 32925.0 85869.8 ... 20066.3 37905.1 13605.4 18486.0 1349.0 21473.5 7336.7 2465.1 3200.3 11159.9
2016 27041.2 11477.2 28474.1 11946.4 13789.3 20392.5 10427.0 11895.0 29887.0 77350.9 ... 18023.0 33138.5 11792.4 16369.0 1173.0 19045.8 6907.9 2258.2 2781.4 9630.8
2015 24779.1 10879.5 26398.4 11836.4 12949.0 20210.3 10018.0 11690.0 26887.0 71255.9 ... 16040.5 30342.0 10541.0 14960.0 1043.0 17898.8 6556.6 2011.0 2579.4 9306.9
2014 22926.0 10640.6 25208.9 12094.7 12158.2 20025.7 9966.5 12170.8 25269.8 64830.5 ... 14623.8 28891.3 9173.1 14041.7 939.7 17402.5 6518.4 1847.7 2473.9 9264.5
2013 21134.6 9945.4 24259.6 11987.2 11392.4 19208.8 9427.9 11849.1 23204.1 59349.4 ... 13027.6 26518.0 7973.1 12825.5 828.2 15905.4 6014.5 1713.3 2327.7 8392.6
2012 19024.7 9043.0 23077.5 11683.1 10470.1 17848.6 8678.0 11015.8 21305.6 53701.9 ... 11595.4 23922.4 6742.2 11097.4 710.2 14142.4 5393.1 1528.5 2131.0 7411.8
2011 17188.8 8112.5 21384.7 10894.4 9458.1 16354.9 7734.6 9935.0 20009.7 48839.2 ... 10161.2 21050.9 5615.6 9523.1 611.5 12175.1 4816.9 1370.4 1931.8 6532.0
2010 14964.0 6830.8 18003.6 8903.9 8199.9 13896.3 6410.5 8308.3 17915.4 41383.9 ... 8065.3 17224.8 4519.0 7735.3 512.9 9845.2 3943.7 1144.2 1571.7 5360.2
2009 12900.9 5709.6 15306.9 7147.6 7104.2 12815.7 5434.8 7218.9 15742.4 34471.7 ... 6651.2 14190.6 3856.7 6574.4 445.7 7997.8 3268.3 939.7 1266.7 4237.0
2008 11813.1 5182.4 14200.1 7223.0 6242.4 12137.7 4834.7 7134.2 14536.9 30945.5 ... 5899.5 12756.2 3504.5 6016.6 398.2 7177.8 3071.7 896.9 1139.2 4142.5
2007 10425.5 4158.4 12152.9 5935.6 5166.9 10292.2 4080.3 6126.3 12878.7 25988.4 ... 4770.7 10562.1 2847.5 5077.4 344.1 5681.8 2675.1 720.1 877.6 3500.0
2006 8387.0 3538.2 10043.0 4713.6 4161.8 8390.3 3226.5 5329.8 10598.9 21240.8 ... 3900.3 8494.7 2264.1 4090.7 285.9 4595.6 2203.0 585.2 683.3 2957.3
2005 7149.8 3158.6 8773.4 4079.4 3523.7 7260.8 2776.5 4756.4 9197.1 18121.3 ... 3448.4 7195.9 1939.9 3497.7 243.1 3817.2 1864.6 499.4 579.9 2520.5
2004 6252.5 2621.1 7588.6 3496.0 2942.4 6469.8 2455.2 4134.7 8101.6 14823.1 ... 3059.5 6304.0 1649.4 3136.4 217.9 3141.6 1653.6 443.7 519.9 2170.4
2003 5267.2 2257.8 6333.6 2854.3 2388.4 5906.3 2141.0 3609.7 6804.0 12442.9 ... 2615.6 5346.2 1429.0 2633.4 186.0 2587.7 1399.9 385.0 442.6 1889.2
2002 4525.7 1926.9 5518.9 2324.8 1940.9 5458.2 2043.1 3242.7 5795.0 10606.9 ... 2279.8 4725.0 1243.4 2358.7 162.0 2253.4 1232.0 340.7 377.2 1612.6
2001 3861.5 1756.9 5062.9 2029.5 1713.8 5033.1 1900.9 3043.4 5257.7 9456.8 ... 2014.6 4293.5 1133.3 2159.0 139.2 2010.6 1125.4 300.1 337.4 1491.6
2000 3277.8 1591.7 4628.2 1845.7 1539.1 4669.1 1751.4 2855.5 4812.2 8553.7 ... 1822.1 3928.2 1029.9 2030.1 117.8 1804.0 1052.9 263.7 295.0 1363.6
1999 2759.8 1435.6 4158.9 1667.1 1379.3 4171.7 1673.0 2536.9 4222.3 7697.8 ... 1687.8 3649.1 937.5 1899.8 106.0 1592.6 956.3 239.4 264.6 1163.2
1998 2439.1 1344.7 3924.5 1611.1 1262.5 3881.7 1577.1 2470.2 3831.0 7200.0 ... 1622.4 3474.1 858.4 1831.3 91.5 1458.4 887.7 220.9 245.5 1107.0
1997 2118.1 1264.6 3652.1 1476.0 1153.5 3582.5 1464.3 2397.6 3465.3 6680.3 ... 1525.3 3241.5 805.8 1676.2 77.2 1363.6 793.6 202.8 224.6 1039.8
1996 1819.4 1121.9 3198.0 1292.1 1023.1 3157.7 1346.8 2137.6 2980.8 6004.2 ... 1326.4 2871.7 723.2 1517.7 65.0 1215.8 722.5 184.2 202.9 900.9
1995 1516.2 932.0 2701.2 1076.0 857.1 2793.4 1137.2 1790.2 2518.1 5155.3 ... 1130.6 2443.2 636.2 1222.2 56.1 1036.9 557.8 167.8 175.2 814.9
1994 1149.8 732.9 2114.5 826.7 695.1 2461.8 937.7 1448.1 1990.9 4057.4 ... 838.1 2001.4 524.5 983.8 46.0 839.0 453.6 138.4 136.3 662.3
1993 888.9 538.9 1620.8 680.4 537.8 2010.8 718.6 1075.3 1519.2 2998.2 ... 611.1 1486.1 417.7 783.3 37.4 678.2 372.2 109.7 104.5 495.3
1992 710.2 411.0 1278.5 551.1 421.7 1473.0 558.1 857.4 1114.3 2136.0 ... 462.5 1177.3 339.9 618.7 33.3 531.6 317.8 87.5 83.1 402.3

29 rows × 31 columns

In [6]:
GDP.T
Out[6]:
2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 ... 2001 2000 1999 1998 1997 1996 1995 1994 1993 1992
北京市 36102.6 35445.1 33106.0 29883.0 27041.2 24779.1 22926.0 21134.6 19024.7 17188.8 ... 3861.5 3277.8 2759.8 2439.1 2118.1 1819.4 1516.2 1149.8 888.9 710.2
天津市 14083.7 14055.5 13362.9 12450.6 11477.2 10879.5 10640.6 9945.4 9043.0 8112.5 ... 1756.9 1591.7 1435.6 1344.7 1264.6 1121.9 932.0 732.9 538.9 411.0
河北省 36206.9 34978.6 32494.6 30640.8 28474.1 26398.4 25208.9 24259.6 23077.5 21384.7 ... 5062.9 4628.2 4158.9 3924.5 3652.1 3198.0 2701.2 2114.5 1620.8 1278.5
山西省 17651.9 16961.6 15958.1 14484.3 11946.4 11836.4 12094.7 11987.2 11683.1 10894.4 ... 2029.5 1845.7 1667.1 1611.1 1476.0 1292.1 1076.0 826.7 680.4 551.1
内蒙古自治区 17359.8 17212.5 16140.8 14898.1 13789.3 12949.0 12158.2 11392.4 10470.1 9458.1 ... 1713.8 1539.1 1379.3 1262.5 1153.5 1023.1 857.1 695.1 537.8 421.7
辽宁省 25115.0 24855.3 23510.5 21693.0 20392.5 20210.3 20025.7 19208.8 17848.6 16354.9 ... 5033.1 4669.1 4171.7 3881.7 3582.5 3157.7 2793.4 2461.8 2010.8 1473.0
吉林省 12311.3 11726.8 11253.8 10922.0 10427.0 10018.0 9966.5 9427.9 8678.0 7734.6 ... 1900.9 1751.4 1673.0 1577.1 1464.3 1346.8 1137.2 937.7 718.6 558.1
黑龙江省 13698.5 13544.4 12846.5 12313.0 11895.0 11690.0 12170.8 11849.1 11015.8 9935.0 ... 3043.4 2855.5 2536.9 2470.2 2397.6 2137.6 1790.2 1448.1 1075.3 857.4
上海市 38700.6 37987.6 36011.8 32925.0 29887.0 26887.0 25269.8 23204.1 21305.6 20009.7 ... 5257.7 4812.2 4222.3 3831.0 3465.3 2980.8 2518.1 1990.9 1519.2 1114.3
江苏省 102719.0 98656.8 93207.6 85869.8 77350.9 71255.9 64830.5 59349.4 53701.9 48839.2 ... 9456.8 8553.7 7697.8 7200.0 6680.3 6004.2 5155.3 4057.4 2998.2 2136.0
浙江省 64613.3 62462.0 58002.8 52403.1 47254.0 43507.7 40023.5 37334.6 34382.4 31854.8 ... 6927.7 6164.8 5461.3 5065.5 4695.9 4195.8 3563.9 2689.3 1925.9 1375.7
安徽省 38680.6 36845.5 34010.9 29676.2 26307.7 23831.2 22519.7 20584.0 18341.7 16284.9 ... 3502.8 3125.3 2907.8 2711.7 2485.4 2199.7 1891.2 1378.9 1073.5 827.0
福建省 43903.9 42326.6 38687.8 33842.4 29609.4 26819.5 24942.1 22503.8 20190.7 17917.7 ... 4072.9 3764.5 3414.2 3159.9 2870.9 2484.3 2094.9 1644.4 1114.2 784.7
江西省 25691.5 24667.3 22716.5 20210.8 18388.6 16780.9 15667.8 14300.2 12807.7 11584.5 ... 2175.7 2003.1 1853.7 1719.9 1605.8 1409.7 1169.7 948.2 723.0 572.6
山东省 73129.0 70540.5 66648.9 63012.1 58762.5 55288.8 50774.8 47344.3 42957.3 39064.9 ... 9076.2 8278.1 7493.8 7021.4 6537.1 5883.8 4953.4 3844.5 2770.4 2196.5
河南省 54997.1 53717.8 49935.9 44824.9 40249.3 37084.1 34574.8 31632.5 28961.9 26318.7 ... 5533.0 5053.0 4517.9 4308.2 4041.1 3634.7 2988.4 2216.8 1660.2 1279.8
湖北省 43443.5 45429.0 42022.0 37235.0 33353.0 30344.0 28242.1 25378.0 22590.9 19942.5 ... 3880.5 3545.4 3229.3 3114.0 2856.5 2499.8 2109.4 1700.9 1325.8 1088.4
湖南省 41781.5 39894.1 36329.7 33828.1 30853.5 28538.6 25881.3 23545.2 21207.2 18915.0 ... 3831.9 3551.5 3214.5 3025.5 2849.3 2540.1 2132.1 1650.0 1244.7 987.0
广东省 110760.9 107986.9 99945.2 91648.7 82163.2 74732.4 68173.0 62503.4 57007.7 53072.8 ... 12126.6 10810.2 9289.6 8555.3 7793.0 6848.2 5940.3 4619.0 3469.3 2447.5
广西壮族自治区 22156.7 21237.1 19627.8 17790.7 16116.6 14797.8 13587.8 12448.4 11303.6 10299.9 ... 2279.3 2080.0 1971.4 1911.3 1817.3 1697.9 1497.6 1198.3 871.7 646.6
海南省 5532.4 5330.8 4910.7 4497.5 4090.2 3734.2 3449.0 3115.9 2789.4 2463.8 ... 579.2 526.8 476.7 442.1 411.2 389.7 363.3 332.0 260.4 184.9
重庆市 25002.8 23605.8 21588.8 20066.3 18023.0 16040.5 14623.8 13027.6 11595.4 10161.2 ... 2014.6 1822.1 1687.8 1622.4 1525.3 1326.4 1130.6 838.1 611.1 462.5
四川省 48598.8 46363.8 42902.1 37905.1 33138.5 30342.0 28891.3 26518.0 23922.4 21050.9 ... 4293.5 3928.2 3649.1 3474.1 3241.5 2871.7 2443.2 2001.4 1486.1 1177.3
贵州省 17826.6 16769.3 15353.2 13605.4 11792.4 10541.0 9173.1 7973.1 6742.2 5615.6 ... 1133.3 1029.9 937.5 858.4 805.8 723.2 636.2 524.5 417.7 339.9
云南省 24521.9 23223.8 20880.6 18486.0 16369.0 14960.0 14041.7 12825.5 11097.4 9523.1 ... 2159.0 2030.1 1899.8 1831.3 1676.2 1517.7 1222.2 983.8 783.3 618.7
西藏自治区 1902.7 1697.8 1548.4 1349.0 1173.0 1043.0 939.7 828.2 710.2 611.5 ... 139.2 117.8 106.0 91.5 77.2 65.0 56.1 46.0 37.4 33.3
陕西省 26181.9 25793.2 23941.9 21473.5 19045.8 17898.8 17402.5 15905.4 14142.4 12175.1 ... 2010.6 1804.0 1592.6 1458.4 1363.6 1215.8 1036.9 839.0 678.2 531.6
甘肃省 9016.7 8718.3 8104.1 7336.7 6907.9 6556.6 6518.4 6014.5 5393.1 4816.9 ... 1125.4 1052.9 956.3 887.7 793.6 722.5 557.8 453.6 372.2 317.8
青海省 3005.9 2941.1 2748.0 2465.1 2258.2 2011.0 1847.7 1713.3 1528.5 1370.4 ... 300.1 263.7 239.4 220.9 202.8 184.2 167.8 138.4 109.7 87.5
宁夏回族自治区 3920.6 3748.5 3510.2 3200.3 2781.4 2579.4 2473.9 2327.7 2131.0 1931.8 ... 337.4 295.0 264.6 245.5 224.6 202.9 175.2 136.3 104.5 83.1
新疆维吾尔自治区 13797.6 13597.1 12809.4 11159.9 9630.8 9306.9 9264.5 8392.6 7411.8 6532.0 ... 1491.6 1363.6 1163.2 1107.0 1039.8 900.9 814.9 662.3 495.3 402.3

31 rows × 29 columns

In [7]:
GDP.T
Out[7]:
2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 ... 2001 2000 1999 1998 1997 1996 1995 1994 1993 1992
北京市 36102.6 35445.1 33106.0 29883.0 27041.2 24779.1 22926.0 21134.6 19024.7 17188.8 ... 3861.5 3277.8 2759.8 2439.1 2118.1 1819.4 1516.2 1149.8 888.9 710.2
天津市 14083.7 14055.5 13362.9 12450.6 11477.2 10879.5 10640.6 9945.4 9043.0 8112.5 ... 1756.9 1591.7 1435.6 1344.7 1264.6 1121.9 932.0 732.9 538.9 411.0
河北省 36206.9 34978.6 32494.6 30640.8 28474.1 26398.4 25208.9 24259.6 23077.5 21384.7 ... 5062.9 4628.2 4158.9 3924.5 3652.1 3198.0 2701.2 2114.5 1620.8 1278.5
山西省 17651.9 16961.6 15958.1 14484.3 11946.4 11836.4 12094.7 11987.2 11683.1 10894.4 ... 2029.5 1845.7 1667.1 1611.1 1476.0 1292.1 1076.0 826.7 680.4 551.1
内蒙古自治区 17359.8 17212.5 16140.8 14898.1 13789.3 12949.0 12158.2 11392.4 10470.1 9458.1 ... 1713.8 1539.1 1379.3 1262.5 1153.5 1023.1 857.1 695.1 537.8 421.7
辽宁省 25115.0 24855.3 23510.5 21693.0 20392.5 20210.3 20025.7 19208.8 17848.6 16354.9 ... 5033.1 4669.1 4171.7 3881.7 3582.5 3157.7 2793.4 2461.8 2010.8 1473.0
吉林省 12311.3 11726.8 11253.8 10922.0 10427.0 10018.0 9966.5 9427.9 8678.0 7734.6 ... 1900.9 1751.4 1673.0 1577.1 1464.3 1346.8 1137.2 937.7 718.6 558.1
黑龙江省 13698.5 13544.4 12846.5 12313.0 11895.0 11690.0 12170.8 11849.1 11015.8 9935.0 ... 3043.4 2855.5 2536.9 2470.2 2397.6 2137.6 1790.2 1448.1 1075.3 857.4
上海市 38700.6 37987.6 36011.8 32925.0 29887.0 26887.0 25269.8 23204.1 21305.6 20009.7 ... 5257.7 4812.2 4222.3 3831.0 3465.3 2980.8 2518.1 1990.9 1519.2 1114.3
江苏省 102719.0 98656.8 93207.6 85869.8 77350.9 71255.9 64830.5 59349.4 53701.9 48839.2 ... 9456.8 8553.7 7697.8 7200.0 6680.3 6004.2 5155.3 4057.4 2998.2 2136.0
浙江省 64613.3 62462.0 58002.8 52403.1 47254.0 43507.7 40023.5 37334.6 34382.4 31854.8 ... 6927.7 6164.8 5461.3 5065.5 4695.9 4195.8 3563.9 2689.3 1925.9 1375.7
安徽省 38680.6 36845.5 34010.9 29676.2 26307.7 23831.2 22519.7 20584.0 18341.7 16284.9 ... 3502.8 3125.3 2907.8 2711.7 2485.4 2199.7 1891.2 1378.9 1073.5 827.0
福建省 43903.9 42326.6 38687.8 33842.4 29609.4 26819.5 24942.1 22503.8 20190.7 17917.7 ... 4072.9 3764.5 3414.2 3159.9 2870.9 2484.3 2094.9 1644.4 1114.2 784.7
江西省 25691.5 24667.3 22716.5 20210.8 18388.6 16780.9 15667.8 14300.2 12807.7 11584.5 ... 2175.7 2003.1 1853.7 1719.9 1605.8 1409.7 1169.7 948.2 723.0 572.6
山东省 73129.0 70540.5 66648.9 63012.1 58762.5 55288.8 50774.8 47344.3 42957.3 39064.9 ... 9076.2 8278.1 7493.8 7021.4 6537.1 5883.8 4953.4 3844.5 2770.4 2196.5
河南省 54997.1 53717.8 49935.9 44824.9 40249.3 37084.1 34574.8 31632.5 28961.9 26318.7 ... 5533.0 5053.0 4517.9 4308.2 4041.1 3634.7 2988.4 2216.8 1660.2 1279.8
湖北省 43443.5 45429.0 42022.0 37235.0 33353.0 30344.0 28242.1 25378.0 22590.9 19942.5 ... 3880.5 3545.4 3229.3 3114.0 2856.5 2499.8 2109.4 1700.9 1325.8 1088.4
湖南省 41781.5 39894.1 36329.7 33828.1 30853.5 28538.6 25881.3 23545.2 21207.2 18915.0 ... 3831.9 3551.5 3214.5 3025.5 2849.3 2540.1 2132.1 1650.0 1244.7 987.0
广东省 110760.9 107986.9 99945.2 91648.7 82163.2 74732.4 68173.0 62503.4 57007.7 53072.8 ... 12126.6 10810.2 9289.6 8555.3 7793.0 6848.2 5940.3 4619.0 3469.3 2447.5
广西壮族自治区 22156.7 21237.1 19627.8 17790.7 16116.6 14797.8 13587.8 12448.4 11303.6 10299.9 ... 2279.3 2080.0 1971.4 1911.3 1817.3 1697.9 1497.6 1198.3 871.7 646.6
海南省 5532.4 5330.8 4910.7 4497.5 4090.2 3734.2 3449.0 3115.9 2789.4 2463.8 ... 579.2 526.8 476.7 442.1 411.2 389.7 363.3 332.0 260.4 184.9
重庆市 25002.8 23605.8 21588.8 20066.3 18023.0 16040.5 14623.8 13027.6 11595.4 10161.2 ... 2014.6 1822.1 1687.8 1622.4 1525.3 1326.4 1130.6 838.1 611.1 462.5
四川省 48598.8 46363.8 42902.1 37905.1 33138.5 30342.0 28891.3 26518.0 23922.4 21050.9 ... 4293.5 3928.2 3649.1 3474.1 3241.5 2871.7 2443.2 2001.4 1486.1 1177.3
贵州省 17826.6 16769.3 15353.2 13605.4 11792.4 10541.0 9173.1 7973.1 6742.2 5615.6 ... 1133.3 1029.9 937.5 858.4 805.8 723.2 636.2 524.5 417.7 339.9
云南省 24521.9 23223.8 20880.6 18486.0 16369.0 14960.0 14041.7 12825.5 11097.4 9523.1 ... 2159.0 2030.1 1899.8 1831.3 1676.2 1517.7 1222.2 983.8 783.3 618.7
西藏自治区 1902.7 1697.8 1548.4 1349.0 1173.0 1043.0 939.7 828.2 710.2 611.5 ... 139.2 117.8 106.0 91.5 77.2 65.0 56.1 46.0 37.4 33.3
陕西省 26181.9 25793.2 23941.9 21473.5 19045.8 17898.8 17402.5 15905.4 14142.4 12175.1 ... 2010.6 1804.0 1592.6 1458.4 1363.6 1215.8 1036.9 839.0 678.2 531.6
甘肃省 9016.7 8718.3 8104.1 7336.7 6907.9 6556.6 6518.4 6014.5 5393.1 4816.9 ... 1125.4 1052.9 956.3 887.7 793.6 722.5 557.8 453.6 372.2 317.8
青海省 3005.9 2941.1 2748.0 2465.1 2258.2 2011.0 1847.7 1713.3 1528.5 1370.4 ... 300.1 263.7 239.4 220.9 202.8 184.2 167.8 138.4 109.7 87.5
宁夏回族自治区 3920.6 3748.5 3510.2 3200.3 2781.4 2579.4 2473.9 2327.7 2131.0 1931.8 ... 337.4 295.0 264.6 245.5 224.6 202.9 175.2 136.3 104.5 83.1
新疆维吾尔自治区 13797.6 13597.1 12809.4 11159.9 9630.8 9306.9 9264.5 8392.6 7411.8 6532.0 ... 1491.6 1363.6 1163.2 1107.0 1039.8 900.9 814.9 662.3 495.3 402.3

31 rows × 29 columns

In [8]:
plt.figure(figsize=(28,10))
plt.bar(x=GDP.columns.values,height=GDP.iloc[0],width=0.5)
plt.xticks(GDP.columns.values,font=font,rotation=40,fontsize=14)
plt.yticks(fontsize=16)
plt.xlabel("省份",font=font,fontsize=20)
plt.ylabel("GDP",font=font,fontsize=20)
plt.title("2020年GDP情况",font=font,fontsize=20)
plt.show()
In [9]:
help(GDP.iloc)
Help on _iLocIndexer in module pandas.core.indexing object:

class _iLocIndexer(_LocationIndexer)
 |  Purely integer-location based indexing for selection by position.
 |  
 |  ``.iloc[]`` is primarily integer position based (from ``0`` to
 |  ``length-1`` of the axis), but may also be used with a boolean
 |  array.
 |  
 |  Allowed inputs are:
 |  
 |  - An integer, e.g. ``5``.
 |  - A list or array of integers, e.g. ``[4, 3, 0]``.
 |  - A slice object with ints, e.g. ``1:7``.
 |  - A boolean array.
 |  - A ``callable`` function with one argument (the calling Series or
 |    DataFrame) and that returns valid output for indexing (one of the above).
 |    This is useful in method chains, when you don't have a reference to the
 |    calling object, but would like to base your selection on some value.
 |  
 |  ``.iloc`` will raise ``IndexError`` if a requested indexer is
 |  out-of-bounds, except *slice* indexers which allow out-of-bounds
 |  indexing (this conforms with python/numpy *slice* semantics).
 |  
 |  See more at :ref:`Selection by Position <indexing.integer>`.
 |  
 |  See Also
 |  --------
 |  DataFrame.iat : Fast integer location scalar accessor.
 |  DataFrame.loc : Purely label-location based indexer for selection by label.
 |  Series.iloc : Purely integer-location based indexing for
 |                 selection by position.
 |  
 |  Examples
 |  --------
 |  >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
 |  ...           {'a': 100, 'b': 200, 'c': 300, 'd': 400},
 |  ...           {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
 |  >>> df = pd.DataFrame(mydict)
 |  >>> df
 |        a     b     c     d
 |  0     1     2     3     4
 |  1   100   200   300   400
 |  2  1000  2000  3000  4000
 |  
 |  **Indexing just the rows**
 |  
 |  With a scalar integer.
 |  
 |  >>> type(df.iloc[0])
 |  <class 'pandas.core.series.Series'>
 |  >>> df.iloc[0]
 |  a    1
 |  b    2
 |  c    3
 |  d    4
 |  Name: 0, dtype: int64
 |  
 |  With a list of integers.
 |  
 |  >>> df.iloc[[0]]
 |     a  b  c  d
 |  0  1  2  3  4
 |  >>> type(df.iloc[[0]])
 |  <class 'pandas.core.frame.DataFrame'>
 |  
 |  >>> df.iloc[[0, 1]]
 |       a    b    c    d
 |  0    1    2    3    4
 |  1  100  200  300  400
 |  
 |  With a `slice` object.
 |  
 |  >>> df.iloc[:3]
 |        a     b     c     d
 |  0     1     2     3     4
 |  1   100   200   300   400
 |  2  1000  2000  3000  4000
 |  
 |  With a boolean mask the same length as the index.
 |  
 |  >>> df.iloc[[True, False, True]]
 |        a     b     c     d
 |  0     1     2     3     4
 |  2  1000  2000  3000  4000
 |  
 |  With a callable, useful in method chains. The `x` passed
 |  to the ``lambda`` is the DataFrame being sliced. This selects
 |  the rows whose index label even.
 |  
 |  >>> df.iloc[lambda x: x.index % 2 == 0]
 |        a     b     c     d
 |  0     1     2     3     4
 |  2  1000  2000  3000  4000
 |  
 |  **Indexing both axes**
 |  
 |  You can mix the indexer types for the index and columns. Use ``:`` to
 |  select the entire axis.
 |  
 |  With scalar integers.
 |  
 |  >>> df.iloc[0, 1]
 |  2
 |  
 |  With lists of integers.
 |  
 |  >>> df.iloc[[0, 2], [1, 3]]
 |        b     d
 |  0     2     4
 |  2  2000  4000
 |  
 |  With `slice` objects.
 |  
 |  >>> df.iloc[1:3, 0:3]
 |        a     b     c
 |  1   100   200   300
 |  2  1000  2000  3000
 |  
 |  With a boolean array whose length matches the columns.
 |  
 |  >>> df.iloc[:, [True, False, True, False]]
 |        a     c
 |  0     1     3
 |  1   100   300
 |  2  1000  3000
 |  
 |  With a callable function that expects the Series or DataFrame.
 |  
 |  >>> df.iloc[:, lambda df: [0, 2]]
 |        a     c
 |  0     1     3
 |  1   100   300
 |  2  1000  3000
 |  
 |  Method resolution order:
 |      _iLocIndexer
 |      _LocationIndexer
 |      pandas._libs.indexing._NDFrameIndexerBase
 |      builtins.object
 |  
 |  Methods inherited from _LocationIndexer:
 |  
 |  __call__(self, axis=None)
 |      Call self as a function.
 |  
 |  __getitem__(self, key)
 |  
 |  __setitem__(self, key, value)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from _LocationIndexer:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes inherited from _LocationIndexer:
 |  
 |  __annotations__ = {'_valid_types': <class 'str'>}
 |  
 |  axis = None
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from pandas._libs.indexing._NDFrameIndexerBase:
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  __reduce__ = __reduce_cython__(...)
 |  
 |  __setstate__ = __setstate_cython__(...)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from pandas._libs.indexing._NDFrameIndexerBase:
 |  
 |  name
 |  
 |  ndim
 |  
 |  obj

In [10]:
taitanic = pd.read_csv("../datasets/taitanic_train.csv")
In [11]:
taitanic.dropna(subset=["Age"],inplace=True)
In [12]:
plt.hist(x=taitanic["Age"],edgecolor="red",bins=20,density=True)
plt.xlabel("年龄",font=font)
plt.title("年龄分布图",font=font)
plt.ylabel("频率",font=font)
plt.show()
In [13]:
def normalFun(x,mu,sigma):
    return np.exp(-((x-mu)**2) / (2*sigma*2)) / (sigma * np.sqrt(2*np.pi))
In [14]:
mean_x = taitanic["Age"].mean()
std_x = taitanic["Age"].std()
In [15]:
x = np.arange(taitanic["Age"].min(), taitanic["Age"].max()+10,1)
In [16]:
x
Out[16]:
array([ 0.42,  1.42,  2.42,  3.42,  4.42,  5.42,  6.42,  7.42,  8.42,
        9.42, 10.42, 11.42, 12.42, 13.42, 14.42, 15.42, 16.42, 17.42,
       18.42, 19.42, 20.42, 21.42, 22.42, 23.42, 24.42, 25.42, 26.42,
       27.42, 28.42, 29.42, 30.42, 31.42, 32.42, 33.42, 34.42, 35.42,
       36.42, 37.42, 38.42, 39.42, 40.42, 41.42, 42.42, 43.42, 44.42,
       45.42, 46.42, 47.42, 48.42, 49.42, 50.42, 51.42, 52.42, 53.42,
       54.42, 55.42, 56.42, 57.42, 58.42, 59.42, 60.42, 61.42, 62.42,
       63.42, 64.42, 65.42, 66.42, 67.42, 68.42, 69.42, 70.42, 71.42,
       72.42, 73.42, 74.42, 75.42, 76.42, 77.42, 78.42, 79.42, 80.42,
       81.42, 82.42, 83.42, 84.42, 85.42, 86.42, 87.42, 88.42, 89.42])
In [17]:
y = normalFun(x,mean_x,std_x)
In [18]:
y
Out[18]:
array([1.07494290e-08, 2.89458205e-08, 7.53074567e-08, 1.89296133e-07,
       4.59723969e-07, 1.07870874e-06, 2.44547312e-06, 5.35640286e-06,
       1.13353574e-05, 2.31765532e-05, 4.57840411e-05, 8.73838233e-05,
       1.61138608e-04, 2.87091158e-04, 4.94187403e-04, 8.21892789e-04,
       1.32065784e-03, 2.05029854e-03, 3.07535699e-03, 4.45682534e-03,
       6.24032687e-03, 8.44190979e-03, 1.10338156e-02, 1.39335692e-02,
       1.70000689e-02, 2.00396729e-02, 2.28234988e-02, 2.51145534e-02,
       2.67005583e-02, 2.74262777e-02, 2.72185535e-02, 2.60984586e-02,
       2.41777743e-02, 2.16406064e-02, 1.87143255e-02, 1.56361769e-02,
       1.26223034e-02, 9.84460516e-03, 7.41838952e-03, 5.40098050e-03,
       3.79915690e-03, 2.58198370e-03, 1.69539699e-03, 1.07557565e-03,
       6.59268252e-04, 3.90422652e-04, 2.23387824e-04, 1.23491092e-04,
       6.59573824e-05, 3.40363387e-05, 1.69696899e-05, 8.17441384e-06,
       3.80444211e-06, 1.71071232e-06, 7.43215285e-07, 3.11963602e-07,
       1.26515829e-07, 4.95721151e-08, 1.87664306e-08, 6.86400436e-09,
       2.42563287e-09, 8.28179034e-10, 2.73196443e-10, 8.70717940e-11,
       2.68121469e-11, 7.97695850e-12, 2.29295072e-12, 6.36801023e-13,
       1.70869453e-13, 4.42972486e-14, 1.10953426e-14, 2.68507444e-15,
       6.27803289e-16, 1.41821635e-16, 3.09537332e-17, 6.52732483e-18,
       1.32986961e-18, 2.61778842e-19, 4.97865083e-20, 9.14830087e-21,
       1.62413025e-21, 2.78581912e-22, 4.61675279e-23, 7.39217042e-24,
       1.14356003e-24, 1.70921858e-25, 2.46824286e-26, 3.44373576e-27,
       4.64219516e-28, 6.04600655e-29])
In [19]:
plt.hist(x=taitanic["Age"],color="c",edgecolor="r",bins=20,density=True)
plt.plot(x,y,color="r",linewidth=3,label="正态分布图")
plt.xlabel("年龄",font=font)
plt.title("年龄分布图",font=font)
plt.ylabel("频率",font=font)
plt.show()
In [20]:
plt.hist(x=taitanic["Age"],color="c",edgecolor="r",bins=20,density=True,label="直方分布图")
taitanic["Age"].plot(kind="kde",color="r",linewidth=3,xlim=[0,90],label="核密度图")
plt.plot(x,y,color="blue",linewidth=3,label="正态分布图")
plt.xlabel("年龄",font=font)
plt.title("年龄分布图",font=font)
plt.ylabel("频率",font=font)
plt.legend(loc="best",prop=font) #设置字体
plt.show()
In [21]:
sec_house = pd.read_csv("../datasets/house.csv")
In [22]:
plt.figure(figsize=(10,16))
plt.boxplot(x=sec_house["均价"],showmeans=True,showfliers=True)
plt.title("二手房箱线图",font=font)
plt.show()
In [23]:
iris = pd.read_csv("../datasets/iris.data.txt")
In [24]:
iris
Out[24]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica

150 rows × 5 columns

In [25]:
plt.scatter(x=iris.petal_width,y=iris.petal_length)
plt.xlabel("花瓣宽度",font=font)
plt.ylabel("花瓣长度",font=font)
plt.title("鸢尾花花瓣长宽之间的关系",font=font)
plt.show()
In [26]:
color_iris = ['b','r','g']
species=["setosa","virginica","versicolor"]
marker_iris = ["o","s","x"]
In [27]:
iris.petal_width
Out[27]:
0      0.2
1      0.2
2      0.2
3      0.2
4      0.2
      ... 
145    2.3
146    1.9
147    2.0
148    2.3
149    1.8
Name: petal_width, Length: 150, dtype: float64
In [28]:
for i in range(3):
    plt.scatter(x = iris.petal_width[iris["species"]==species[i]],
               y = iris.petal_length[iris["species"]==species[i]],
                marker=marker_iris[i],
                color=color_iris[i])
plt.ylabel("花瓣长度",font=font)
plt.title("鸢尾花花瓣长宽之间的关系",font=font)
plt.show()
In [29]:
iris["species"].value_counts()
Out[29]:
versicolor    50
setosa        50
virginica     50
Name: species, dtype: int64
In [30]:
GDP_new= GDP.reset_index()
In [31]:
GDP_new.sort_values(by="index",inplace=True)
In [32]:
plt.figure(figsize=(8,7))
plt.plot(
    GDP_new["index"],GDP_new["北京市"],'bs--',
    GDP_new["index"],GDP_new["天津市"],'rs--',
    GDP_new["index"],GDP_new["上海市"],'gs--',
    GDP_new["index"],GDP_new["重庆市"],'cs--',
)

plt.legend(labels=['北京市','天津市','上海市','重庆市'],loc="best",prop=font)
plt.xlabel("年份",font=font)
plt.ylabel("GDP",font=font)
plt.title("不同直辖市GDP的变化",font=font)
plt.show()
In [33]:
sec_house
Out[33]:
价格 小区 房屋户型 建筑面积 户型结构 建筑类型 房屋朝向 建筑结构 装修情况 梯户比例 ... 环 室 厅 厨 卫 所处楼层 总层数 有无抵押 抵押情况 均价
0 1410.0 江临天下 4室2厅1厨2卫 165.73 平层 板楼 南 西南 钢混结构 精装 六梯六户 ... 内环内 4 2 1 2 低楼层 35 无 NaN 85078.14
1 680.0 樱花路309弄 2室2厅1厨1卫 78.65 复式 板楼 南 北 钢混结构 精装 一梯两户 ... 内环内 2 2 1 1 高楼层 6 有 150万元 银行抵押 业主自还 86459.00
2 2088.0 盛大金磐 3室2厅1厨3卫 194.65 平层 板楼 南 钢混结构 精装 三梯三户 ... 内环内 3 2 1 3 中楼层 43 无 NaN 107269.46
3 266.0 康桥月苑 2室2厅1厨1卫 72.27 平层 板楼 南 钢混结构 精装 一梯两户 ... 外环外 2 2 1 1 高楼层 6 无 NaN 36806.42
4 2288.0 城市经典 4室2厅1厨3卫 324.29 暂无数据 暂无数据 北 钢混结构 精装 暂无数据 ... 内环至中环 4 2 1 3 中楼层 4 无 NaN 70554.13
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
37478 260.0 绿地金卫新家园 4室2厅1厨2卫 154.74 复式 板楼 南 钢混结构 精装 一梯两户 ... 外环外 4 2 1 2 高楼层 6 有 30万元 公积金 16802.38
37479 135.0 城西花苑 2室2厅1厨1卫 94.00 平层 板楼 南 钢混结构 毛坯 一梯两户 ... 外环外 2 2 1 1 中楼层 6 无 NaN 14361.70
37480 135.0 东礁四村 3室1厅1厨1卫 73.00 平层 板楼 南 钢混结构 简装 一梯三户 ... 外环外 3 1 1 1 高楼层 6 有 20万元 业主自还 18493.15
37481 320.0 万盛金邸 4室1厅1厨2卫 128.56 平层 板楼 南 钢混结构 精装 一梯两户 ... 外环外 4 1 1 2 低楼层 14 有 8万元 业主自还 24891.10
37482 208.0 育秀六区 3室1厅1厨1卫 88.00 平层 板楼 南 砖混结构 精装 一梯两户 ... NaN 3 1 1 1 中楼层 5 无 NaN 23636.36

37483 rows × 29 columns

In [35]:
sec_house.columns
Out[35]:
Index(['价格', '小区', '房屋户型', '建筑面积', '户型结构', '建筑类型', '房屋朝向', '建筑结构', '装修情况',
       '梯户比例', '配备电梯', '挂牌时间', '交易权属', '上次交易', '房屋用途', '房屋年限', '产权所有', '区',
       '镇', '环', '室', '厅', '厨', '卫', '所处楼层', '总层数', '有无抵押', '抵押情况', '均价'],
      dtype='object')
In [37]:
sec_house_1 = sec_house.groupby("区")[["价格","均价"]].mean()
In [39]:
sec_house_1["均价"] =sec_house_1["均价"]/10000
In [40]:
sec_house_1
Out[40]:
价格 均价
区
嘉定区 298.100956 3.346117
奉贤区 244.201167 2.349471
宝山区 392.535022 4.403582
徐汇区 661.666783 7.670740
普陀区 522.432921 6.090767
杨浦区 478.377571 6.364288
松江区 517.619973 3.798396
浦东新区 573.529333 5.670892
虹口区 570.579395 6.647432
金山区 206.206807 2.029167
长宁区 733.222088 7.141097
闵行区 521.933185 5.112870
青浦区 685.365884 3.667959
静安区 698.138963 7.651275
黄浦区 1255.955154 9.825114
In [77]:
fig=plt.figure(figsize=(9,6))
ax1 = fig.add_subplot(111)
ax1.plot(sec_house_1.index,sec_house_1["价格"],"bs-",label="价格")
ax1.set_ylabel("不同地区的总价",font=font)
# plt.legend(loc="upper left",prop=font)
plt.xticks(sec_house_1.index,font=font,rotation=40)
ax2 = ax1.twinx()
ax2.plot(sec_house_1.index,sec_house_1["均价"],"ro-",label="均价")
ax2.set_ylabel("不同地区的均价",font=font)
fig.legend(labels=('价格','均价'),prop=font)
plt.title("不用地区总价与均价的折线图",font=font)
plt.show()
In [ ]:
 
In [ ]: