import pandas as pdimport matplotlib.pyplot as pltfrom matplotlib import stylestyle.use('ggplot') # 設(shè)置圖片顯示的主題樣式# 解決matplotlib顯示中文問(wèn)題plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默認(rèn)字體plt.rcParams['axes.unicode_minus'] = False # 解決保存圖像是負(fù)號(hào)'-'顯示為方塊的問(wèn)題dataset_path = './dataset/Mountains.csv'def preview_data(data): """ 數(shù)據(jù)預(yù)覽 """ # 數(shù)據(jù)預(yù)覽 print(data.head()) # 數(shù)據(jù)信息 print(data.info())def proc_success(val): """ 處理 'Ascents bef. 2004' 列中的數(shù)據(jù) """ if '>' in str(val): return 200 elif 'Many' in str(val): return 160 else: return valdef run_main(): """ 主函數(shù) """ data = pd.read_csv(dataset_path) preview_data(data) # 數(shù)據(jù)重構(gòu) # 重命名列名 data.rename(columns={'Height (m)': 'Height', 'Ascents bef. 2004': 'Success', 'Failed attempts bef. 2004': 'Failed'}, inplace=True) # 數(shù)據(jù)清洗 data['Failed'] = data['Failed'].fillna(0).astype(int) #空值補(bǔ)零并轉(zhuǎn)換為int data['Success'] = data['Success'].apply(proc_success) #給原始數(shù)據(jù)中的非數(shù)字值設(shè)置成指定數(shù)字 data['Success'] = data['Success'].fillna(0).astype(int) #空值補(bǔ)零并轉(zhuǎn)換為int data = data[data['First ascent'] != 'unclimbed'] #過(guò)濾未登頂?shù)臄?shù)據(jù) data['First ascent'] = data['First ascent'].astype(int) #轉(zhuǎn)換類型 # 可視化數(shù)據(jù) # 1. 登頂次數(shù) vs 年份 plt.hist(data['First ascent'].astype(int), bins=20) plt.ylabel('高峰數(shù)量') plt.xlabel('年份') plt.title('登頂次數(shù)') plt.savefig('./first_ascent_vs_year.png') plt.show() # 2. 高峰vs海拔 data['Height'].plot.hist(color='steelblue', bins=20) plt.bar(data['Height'], (data['Height'] - data['Height'].min()) / (data['Height'].max() - data['Height'].min()) * 23, # 按比例縮放 color='red', width=30, alpha=0.2) plt.ylabel('高峰數(shù)量') plt.xlabel('海拔') plt.text(8750, 20, "海拔", color='red') plt.title('高峰vs海拔') plt.savefig('./mountain_vs_height.png') plt.show() # 3. 首次登頂 data['Attempts'] = data['Failed'] + data['Success'] # 攀登嘗試次數(shù) fig = plt.figure(figsize=(13, 7)) fig.add_subplot(211) plt.scatter(data['First ascent'], data['Height'], c=data['Attempts'], alpha=0.8, s=50) plt.ylabel('海拔') plt.xlabel('登頂') fig.add_subplot(212) plt.scatter(data['First ascent'], data['Rank'].max() - data['Rank'], c=data['Attempts'], alpha=0.8, s=50) plt.ylabel('排名') plt.xlabel('登頂') plt.savefig('./mountain_vs_attempts.png') plt.show()if __name__ == '__main__': run_main()
聯(lián)系客服