(코로나 바이러스 예측)Analysis of COVID-19 data using Python
mport urllib
import datetime as dt
from matplotlib import pyplot as plt
import matplotlib
import pandas as pd
import seaborn as sns
url = "https://covid.ourworldindata.org/data/ecdc/full_data.csv"
CVD = pd.read_csv(url)
print(CVD.head(5))
date location new_cases new_deaths total_cases total_deaths \
0 2019-12-31 Afghanistan 0.0 0.0 NaN NaN
1 2020-01-01 Afghanistan 0.0 0.0 NaN NaN
2 2020-01-02 Afghanistan 0.0 0.0 NaN NaN
3 2020-01-03 Afghanistan 0.0 0.0 NaN NaN
4 2020-01-04 Afghanistan 0.0 0.0 NaN NaN
weekly_cases weekly_deaths biweekly_cases biweekly_deaths
0 NaN NaN NaN NaN
1 NaN NaN NaN NaN
2 NaN NaN NaN NaN
3 NaN NaN NaN NaN
4 NaN NaN NaN NaN
print(CVD.dtypes)
date object
location object
new_cases float64
new_deaths float64
total_cases float64
total_deaths float64
weekly_cases float64
weekly_deaths float64
biweekly_cases float64
biweekly_deaths float64
dtype: object
#날짜의 데이트 형식을 변환
CVD['date'] = [dt.datetime.strptime(x, '%Y-%m-%d') for x in CVD['date']]
print(CVD.dtypes)
date datetime64[ns]
location object
new_cases float64
new_deaths float64
total_cases float64
total_deaths float64
weekly_cases float64
weekly_deaths float64
biweekly_cases float64
biweekly_deaths float64
dtype: object
countries = ['United States', 'Spain', 'Italy', 'South Korea']
CVD_country = CVD[CVD.location.isin(countries)]
CVD_country
date location new_cases new_deaths total_cases total_deaths weekly_cases weekly_deaths biweekly_cases biweekly_deaths
27241 2019-12-31 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
27242 2020-01-01 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
27243 2020-01-02 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
27244 2020-01-03 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
27245 2020-01-04 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ...
56361 2020-11-25 United States 170293.0 2224.0 12591165.0 259925.0 1231363.0 11238.0 2333339.0 20242.0
56362 2020-11-26 United States 186589.0 2341.0 12777754.0 262266.0 1247947.0 11729.0 2376622.0 20466.0
56363 2020-11-27 United States 106091.0 1189.0 12883845.0 263455.0 1166018.0 10900.0 2329044.0 21025.0
56364 2020-11-28 United States 207913.0 1404.0 13091758.0 264859.0 1177814.0 10446.0 2352144.0 20514.0
56365 2020-11-29 United States 154893.0 1204.0 13246651.0 266063.0 1157213.0 10164.0 2341760.0 20463.0
1339 rows × 10 columns
CVD_country.set_index('date', inplace= True)
CVD_country
location new_cases new_deaths total_cases total_deaths weekly_cases weekly_deaths biweekly_cases biweekly_deaths
date
2019-12-31 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
2020-01-01 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
2020-01-02 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
2020-01-03 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
2020-01-04 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ...
2020-11-25 United States 170293.0 2224.0 12591165.0 259925.0 1231363.0 11238.0 2333339.0 20242.0
2020-11-26 United States 186589.0 2341.0 12777754.0 262266.0 1247947.0 11729.0 2376622.0 20466.0
2020-11-27 United States 106091.0 1189.0 12883845.0 263455.0 1166018.0 10900.0 2329044.0 21025.0
2020-11-28 United States 207913.0 1404.0 13091758.0 264859.0 1177814.0 10446.0 2352144.0 20514.0
2020-11-29 United States 154893.0 1204.0 13246651.0 266063.0 1157213.0 10164.0 2341760.0 20463.0
1339 rows × 9 columns
CVD_country['mortality_rate'] = CVD_country['total_deaths']/CVD_country['total_cases']
<ipython-input-76-9aae57fb628b>:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
CVD_country['mortality_rate'] = CVD_country['total_deaths']/CVD_country['total_cases']
CVD_country
location new_cases new_deaths total_cases total_deaths weekly_cases weekly_deaths biweekly_cases biweekly_deaths mortality_rate
date
2019-12-31 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN
2020-01-01 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN
2020-01-02 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN
2020-01-03 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN
2020-01-04 Italy 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ...
2020-11-25 United States 170293.0 2224.0 12591165.0 259925.0 1231363.0 11238.0 2333339.0 20242.0 0.020643
2020-11-26 United States 186589.0 2341.0 12777754.0 262266.0 1247947.0 11729.0 2376622.0 20466.0 0.020525
2020-11-27 United States 106091.0 1189.0 12883845.0 263455.0 1166018.0 10900.0 2329044.0 21025.0 0.020448
2020-11-28 United States 207913.0 1404.0 13091758.0 264859.0 1177814.0 10446.0 2352144.0 20514.0 0.020231
2020-11-29 United States 154893.0 1204.0 13246651.0 266063.0 1157213.0 10164.0 2341760.0 20463.0 0.020085
1339 rows × 10 columns
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14,20))