介绍一个统计分析函数DataFrame.describe()。
下面通过代码进行展示效果,示例代码中的csv文件是谷歌提供的一个包含加利福尼亚州住房数据的文件。
- import pandas as pd
- california_housing_dataframe = pd.read_csv("https://storage.googleapis.com/mledu-datasets/california_housing_train.csv", sep=",")
- print(california_housing_dataframe)
- california_housing_dataframe.describe()
以上代码首先通过pandas提供的read_csv()函数读取了csv文件,将其保存为数据框DataFrame类型的数据。然后使用了DataFrame.describe()函数展示了一些有用的统计信息,这个函数在分析一个较大的csv文件时,作为初步的分析工具非常有用。统计结果包括了数据量、均值、方差、最大值、最小值等。以上代码的输出如下:
- longitude latitude housing_median_age total_rooms total_bedrooms \
- 0 -114.31 34.19 15.0 5612.0 1283.0
- 1 -114.47 34.40 19.0 7650.0 1901.0
- 2 -114.56 33.69 17.0 720.0 174.0
- 3 -114.57 33.64 14.0 1501.0 337.0
- 4 -114.57 33.57 20.0 1454.0 326.0
- 5 -114.58 33.63 29.0 1387.0 236.0
- 6 -114.58 33.61 25.0 2907.0 680.0
- 7 -114.59 34.83 41.0 812.0 168.0
- 8 -114.59 33.61 34.0 4789.0 1175.0
- 9 -114.60 34.83 46.0 1497.0 309.0
- 10 -114.60 33.62 16.0 3741.0 801.0
- 11 -114.60 33.60 21.0 1988.0 483.0
- 12 -114.61 34.84 48.0 1291.0 248.0
- 13 -114.61 34.83 31.0 2478.0 464.0
- 14 -114.63 32.76 15.0 1448.0 378.0
- 15 -114.65 34.89 17.0 2556.0 587.0
- 16 -114.65 33.60 28.0 1678.0 322.0
- 17 -114.65 32.79 21.0 44.0 33.0
- 18 -114.66 32.74 17.0 1388.0 386.0
- 19 -114.67 33.92 17.0 97.0 24.0
- 20 -114.68 33.49 20.0 1491.0 360.0
- 21 -114.73 33.43 24.0 796.0 243.0
- 22 -114.94 34.55 20.0 350.0 95.0
- 23 -114.98 33.82 15.0 644.0 129.0
- 24 -115.22 33.54 18.0 1706.0 397.0
- 25 -115.32 32.82 34.0 591.0 139.0
- 26 -115.37 32.82 30.0 1602.0 322.0
- 27 -115.37 32.82 14.0 1276.0 270.0
- 28 -115.37 32.81 32.0 741.0 191.0
- 29 -115.37 32.81 23.0 1458.0 294.0
- ... ... ... ... ... ...
- 16970 -124.17 40.80 52.0 1606.0 419.0
- 16971 -124.17 40.80 52.0 1557.0 344.0
- 16972 -124.17 40.79 43.0 2285.0 479.0
- 16973 -124.17 40.78 39.0 1606.0 330.0
- 16974 -124.17 40.77 30.0 1895.0 366.0
- 16975 -124.17 40.76 26.0 1776.0 361.0
- 16976 -124.17 40.75 13.0 2171.0 339.0
- 16977 -124.17 40.62 32.0 1595.0 309.0
- 16978 -124.18 40.79 39.0 1836.0 352.0
- 16979 -124.18 40.78 37.0 1453.0 293.0
- 16980 -124.18 40.78 34.0 1592.0 364.0
- 16981 -124.18 40.78 33.0 1076.0 222.0
- 16982 -124.18 40.62 35.0 952.0 178.0
- 16983 -124.19 41.78 15.0 3140.0 714.0
- 16984 -124.19 40.78 37.0 1371.0 319.0
- 16985 -124.19 40.77 30.0 2975.0 634.0
- 16986 -124.19 40.73 21.0 5694.0 1056.0
- 16987 -124.21 41.77 17.0 3461.0 722.0
- 16988 -124.21 41.75 20.0 3810.0 787.0
- 16989 -124.21 40.75 32.0 1218.0 331.0
- 16990 -124.22 41.73 28.0 3003.0 699.0
- 16991 -124.23 41.75 11.0 3159.0 616.0
- 16992 -124.23 40.81 52.0 1112.0 209.0
- 16993 -124.23 40.54 52.0 2694.0 453.0
- 16994 -124.25 40.28 32.0 1430.0 419.0
- 16995 -124.26 40.58 52.0 2217.0 394.0
- 16996 -124.27 40.69 36.0 2349.0 528.0
- 16997 -124.30 41.84 17.0 2677.0 531.0
- 16998 -124.30 41.80 19.0 2672.0 552.0
- 16999 -124.35 40.54 52.0 1820.0 300.0
- population households median_income median_house_value
- 0 1015.0 472.0 1.4936 66900.0
- 1 1129.0 463.0 1.8200 80100.0
- 2 333.0 117.0 1.6509 85700.0
- 3 515.0 226.0 3.1917 73400.0
- 4 624.0 262.0 1.9250 65500.0
- 5 671.0 239.0 3.3438 74000.0
- 6 1841.0 633.0 2.6768 82400.0
- 7 375.0 158.0 1.7083 48500.0
- 8 3134.0 1056.0 2.1782 58400.0
- 9 787.0 271.0 2.1908 48100.0
- 10 2434.0 824.0 2.6797 86500.0
- 11 1182.0 437.0 1.6250 62000.0
- 12 580.0 211.0 2.1571 48600.0
- 13 1346.0 479.0 3.2120 70400.0
- 14 949.0 300.0 0.8585 45000.0
- 15 1005.0 401.0 1.6991 69100.0
- 16 666.0 256.0 2.9653 94900.0
- 17 64.0 27.0 0.8571 25000.0
- 18 775.0 320.0 1.2049 44000.0
- 19 29.0 15.0 1.2656 27500.0
- 20 1135.0 303.0 1.6395 44400.0
- 21 227.0 139.0 0.8964 59200.0
- 22 119.0 58.0 1.6250 50000.0
- 23 137.0 52.0 3.2097 71300.0
- 24 3424.0 283.0 1.6250 53500.0
- 25 327.0 89.0 3.6528 100000.0
- 26 1130.0 335.0 3.5735 71100.0
- 27 867.0 261.0 1.9375 80900.0
- 28 623.0 169.0 1.7604 68600.0
- 29 866.0 275.0 2.3594 74300.0
- ... ... ... ... ...
- 16970 891.0 367.0 1.5850 75500.0
- 16971 758.0 319.0 1.8529 62500.0
- 16972 1169.0 482.0 1.9688 70500.0
- 16973 731.0 327.0 1.6369 68300.0
- 16974 990.0 359.0 2.2227 81300.0
- 16975 992.0 380.0 2.8056 82800.0
- 16976 951.0 353.0 4.8516 116100.0
- 16977 706.0 277.0 2.8958 86400.0
- 16978 883.0 337.0 1.7450 70500.0
- 16979 867.0 310.0 2.5536 70200.0
- 16980 950.0 317.0 2.1607 67000.0
- 16981 656.0 236.0 2.5096 72200.0
- 16982 480.0 179.0 3.0536 107000.0
- 16983 1645.0 640.0 1.6654 74600.0
- 16984 640.0 260.0 1.8242 70000.0
- 16985 1367.0 583.0 2.4420 69000.0
- 16986 2907.0 972.0 3.5363 90100.0
- 16987 1947.0 647.0 2.5795 68400.0
- 16988 1993.0 721.0 2.0074 66900.0
- 16989 620.0 268.0 1.6528 58100.0
- 16990 1530.0 653.0 1.7038 78300.0
- 16991 1343.0 479.0 2.4805 73200.0
- 16992 544.0 172.0 3.3462 50800.0
- 16993 1152.0 435.0 3.0806 106700.0
- 16994 434.0 187.0 1.9417 76100.0
- 16995 907.0 369.0 2.3571 111400.0
- 16996 1194.0 465.0 2.5179 79000.0
- 16997 1244.0 456.0 3.0313 103600.0
- 16998 1298.0 478.0 1.9797 85800.0
- 16999 806.0 270.0 3.0147 94600.0
- [17000 rows x 9 columns]
- longitude latitude housing_median_age total_rooms total_bedrooms population households median_income median_house_value
- count 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000 17000.000000
- mean -119.562108 35.625225 28.589353 2643.664412 539.410824 1429.573941 501.221941 3.883578 207300.912353
- std 2.005166 2.137340 12.586937 2179.947071 421.499452 1147.852959 384.520841 1.908157 115983.764387
- min -124.350000 32.540000 1.000000 2.000000 1.000000 3.000000 1.000000 0.499900 14999.000000
- 25% -121.790000 33.930000 18.000000 1462.000000 297.000000 790.000000 282.000000 2.566375 119400.000000
- 50% -118.490000 34.250000 29.000000 2127.000000 434.000000 1167.000000 409.000000 3.544600 180400.000000
- 75% -118.000000 37.720000 37.000000 3151.250000 648.250000 1721.000000 605.250000 4.767000 265000.000000
- max -114.310000 41.950000 52.000000 37937.000000 6445.000000 35682.000000 6082.000000 15.000100 500001.000000
联系客服