Copyimport pandas as pdimport numpy as npurl = ('https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv')tips = pd.read_csv(url)output = tips.head()
Output:
Copytotal_bill tip sex smoker day time size0 16.99 1.01 Female No Sun Dinner 21 10.34 1.66 Male No Sun Dinner 32 21.01 3.50 Male No Sun Dinner 33 23.68 3.31 Male No Sun Dinner 24 24.59 3.61 Female No Sun Dinner 4
sql 語句: SELECT total_bill, tip, smoker, time FROM tips LIMIT 5;
。
Copyoutput = tips[['total_bill', 'tip', 'smoker', 'time']].head(5)
Output:
Copytotal_bill tip smoker time0 16.99 1.01 No Dinner1 10.34 1.66 No Dinner2 21.01 3.50 No Dinner3 23.68 3.31 No Dinner4 24.59 3.61 No Dinner
sql 語句: SELECT * FROM tips WHERE time = 'Dinner' LIMIT 5;
Copyoutput = tips[tips['time'] == 'Dinner'].head(5)# 或者output = tips.query("time == 'Dinner'").head(5)
Output:
Copytotal_bill tip sex smoker day time size0 16.99 1.01 Female No Sun Dinner 21 10.34 1.66 Male No Sun Dinner 32 21.01 3.50 Male No Sun Dinner 33 23.68 3.31 Male No Sun Dinner 24 24.59 3.61 Female No Sun Dinner 4
==
、 大于 >
、 大于等于 >=
、小于等于 <=
、不等于 !=
==
sql 語句:SELECT * FROM tips WHERE time = 'Dinner';
。
Copyoutput = tips[(tips['time'] == 'Dinner')]
>
sql 語句:SELECT * FROM tips WHERE tip > 5.00;
。
Copyoutput = tips[(tips['tip'] > 5.00)]
>=
sql 語句:SELECT * FROM tips WHERE tip >= 5.00;
。
Copyoutput = tips[(tips['size'] >= 5)]
<=
sql 語句:SELECT * FROM tips WHERE tip <= 5.00;
。
Copyoutput = tips[(tips['size'] <= 5)]
!=
sql 語句:SELECT * FROM tips WHERE tip <> 5.00;
。
Copyoutput = tips[(tips['size'] != 5)]
&
、或 |
、非 -
&
sql 語句:SELECT * FROM tips WHERE time = 'Dinner' AND tip > 5.00;
Copyoutput = tips[(tips['time'] == 'Dinner') & (tips['tip'] > 5.00)]
|
sql 語句:SELECT * FROM tips WHERE size >= 5 OR total_bill > 45;
。
Copyoutput = tips[(tips['size'] >= 5) | (tips['total_bill'] > 45)]
-
sql 語句:SELECT * FROM tips WHERE not (size <> 5 AND size > 4);
Copyoutput = df[-((df['size'] != 5) & (df['size'] > 4))]
這里重新定義一個包含 NaN
數(shù)據(jù)的 DataFrame
。
Copyframe = pd.DataFrame({ 'col1': ['A', 'B', np.NaN, 'C', 'D'], 'col2': ['F', np.NaN, 'G', 'H', 'I'] })output = frame
Output:
Copycol1 col20 A F1 B NaN2 NaN G3 C H4 D I
sql 語句:SELECT * FROM frame WHERE col2 IS NULL;
。
Copyoutput = frame[frame['col2'].isna()]
Output:
Copycol1 col21 B NaN
sql 語句:SELECT * FROM frame WHERE col1 IS NOT NULL;
。
Copyoutput = frame[frame['col1'].notna()]
Output:
Copycol1 col20 A F1 B NaN3 C H4 D I
sql 語句:SELECT * FROM tips WHERE siez in (5, 6);
。
Copyoutput = tips[tips['size'].isin([2, 5])]
sql 語句:SELECT * FROM tips WHERE time like 'Din%';
。
Copyoutput = tips[tips.time.str.contains('Din*')]
sql 語句:SELECT sex, count(*) FROM tips GROUP BY sex;
Copyoutput = tips.groupby('sex').size()# 獲取相應的結(jié)果output['Male']output['Female']
Copyoutput = tips.groupby('sex').count()# 獲取相應的結(jié)果output['tip']['Female']
Copyoutput = tips.groupby('sex')['total_bill'].count()# 獲取相應的結(jié)果output['Male']output['Female']
sql 語句:SELECT day, AVG(tip), COUNT(*) FROM tips GROUP BY day;
Copyoutput = tips.groupby('day').agg({'tip': np.mean, 'day': np.size})# 獲取相應的結(jié)果output['day']['Fri']output['tip']['Fri']
sql 語句:SELECT smoker, day, COUNT(*), AVG(tip) FROM tips GROUP BY smoker, day;
Copyoutput = tips.groupby(['smoker', 'day']).agg({'tip': [np.size, np.mean]})# 獲取相應的結(jié)果output['tip']['size']['No']['Fri']
sql 語句:SELECT tip, count(distinct sex) FROM tips GROUP BY tip;
Copyoutput = tips.groupby('tip').agg({'sex': pd.Series.nunique})
定義兩個 DataFrame。
Copydf1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], 'value': np.random.randn(4)})df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'], 'value': np.random.randn(4)})
sql 語句:SELECT * FROM df1 INNER JOIN df2 ON df1.key = df2.key;
Copyoutput = pd.merge(df1, df2, on='key')# 或indexed_df2 = df2.set_index('key')pd.merge(df1, indexed_df2, left_on='key', right_index=True)
sql 語句:SELECT * FROM df1 LEFT OUTER JOIN df2 ON df1.key = df2.key;
Copyoutput = pd.merge(df1, df2, on='key', how='left')# 或output = df1.join(df2, on='key', how='left')
sql 語句:SELECT * FROM df1 RIGHT OUTER JOIN df2 ON df1.key = df2.key;
Copyoutput = pd.merge(df1, df2, on='key', how='right')
sql 語句:SELECT * FROM df1 FULL OUTER JOIN df2 ON df1.key = df2.key;
Copyoutput = pd.merge(df1, df2, on='key', how='outer')
Copydf1 = pd.DataFrame({'city': ['Chicago', 'San Francisco', 'New York City'], 'rank': range(1, 4)})df2 = pd.DataFrame({'city': ['Chicago', 'Boston', 'Los Angeles'], 'rank': [1, 4, 5]})
sql 語句:SELECT city, rank FROM df1 UNION ALL SELECT city, rank FROM df2;
Copyoutput = pd.concat([df1, df2])
sql 語句:SELECT city, rank FROM df1 UNION SELECT city, rank FROM df2;
Copyoutput = pd.concat([df1, df2]).drop_duplicates()
sql 語句:SELECT DISTINCT sex FROM tips;
Copyoutput = tips.drop_duplicates(subset=['sex'], keep='first', inplace=False)
sql 語句:SELECT total_bill AS total, sex AS xes FROM tips;
Copyoutput = tips.rename(columns={'total_bill': 'total', 'sex': 'xes'}, inplace=False)
sql 語句:SELECT * FROM tips ORDER BY tip DESC LIMIT 10 OFFSET 5;
Copyoutput = tips.nlargest(10 + 5, columns='tip').tail(10)
sql 語句:
CopySELECT * FROM ( SELECT t.*, ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn FROM tips t)WHERE rn < 3ORDER BY day, rn;
Copyoutput = tips.assign(rn=tips.sort_values(['total_bill'], ascending=False). groupby(['day']).cumcount() + 1). query('rn < 3'). sort_values(['day', 'rn'])
sql 語句:UPDATE tips SET tip = tip*2 WHERE tip < 2;
Copyoutput = tips.loc[tips['tip'] < 2, 'tip'] *= 2
sql 語句:DELETE FROM tips WHERE tip > 9;
Copyoutput = tips = tips.loc[tips['tip'] <= 9]
聯(lián)系客服