用 Python 对新冠病毒做数据分析,我们得出哪些结论?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#reading data from the csv file
data= pd.read_csv("/kaggle/input/novel-corona-virus-2019-dataset/2019_nCoV_data.csv")
#checking the number of rows and columns
data.shape
#checking the top 5 rows
data.head()
#dropping the 1st and 5th column
data.drop("Sno", axis=1, inplace=True)
data.drop("Last Update", axis=1, inplace=True)
#getting a summary of the columns
data.info()
data.describe()
#checking for duplicate rows
duplicate_rows=data.duplicated(['Country','Province/State','Date'])
data[duplicate_rows]
#listing all the countries where the virus has spread to
country_list=list(data['Country'].unique())
print(country_list)
print(len(country_list))
#merging China and Mainland China
data.loc[data['Country']=='Mainland China','Country']='China'
print(list(data['Date'].unique()))
print(len(list(data['Date'].unique())))
#converting 'Date' column to datetime object
data['Date'] = pd.to_datetime(data['Date'])
#extracting dates from timestamps
data['Date_date']=data['Date'].apply(lambda x:x.date())
#getting the total number of confirmed cases for each country
df_country=data.groupby(['Country']).max().reset_index(drop=None)
print(df_country[['Country','Confirmed','Deaths','Recovered']])
#preparing data for a time-series analysis
df_by_date=data.groupby(['Date_date']).sum().reset_index(drop=None)
df_by_date['daily_cases']=df_by_date.Confirmed.diff()
df_by_date['daily_deaths']=df_by_date.Deaths.diff()
df_by_date['daily_recoveries']=df_by_date.Recovered.diff()
print(df_by_date)
#plotting a bar chart of confirmed cases over time
sns.axes_style("whitegrid")
sns.barplot(
x="Date_date",
y="Confirmed", data=data.groupby(['Date_date']).sum().reset_index(drop=None)
)
plt.xticks(rotation=60)
plt.ylabel('Number of confirmed cases',fontsize=15)
plt.xlabel('Dates',fontsize=15)
#plotting two line plots for deaths and recoveries respectively
plt.plot('date_updated', 'Deaths', data=data.groupby(['date_updated']).sum().reset_index(drop=None), color='red')
plt.plot('date_updated', 'Recovered', data=data.groupby(['date_updated']).sum().reset_index(drop=None), color='green')
plt.xticks(rotation=60)
plt.ylabel('Number of cases',fontsize=15)
plt.xlabel('Dates',fontsize=15)
plt.legend()
plt.show()
#We know that China is the most affected country by a large margin,
#so lets create a bar plot to compare countries other than China
#increasing the figure size
plt.rcParams['figure.figsize']=(15,7)
sns.barplot(
x="Country",
y="Confirmed",
data=df_country[df_country.Country!='China'].nlargest(10,'Confirmed'),
palette=sns.cubehelix_palette(15, reverse=True)
)
plt.ylabel('Number of cases',fontsize=15)
plt.xlabel('Countries',fontsize=15)
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
#The mortality rate, at any point in time, can be roughly calculated
#by dividing the number of deaths by the number of confirmed cases
df_by_date['mrate']=df_by_date.apply(lambda x: x['Deaths']*100/(x['Confirmed']), axis=1)
plt.plot('Date_date','mrate',data=df_by_date, color='red')
plt.show()
#creating a separate dataframe for provinces
df_province=data[data['Country']=='China'].groupby(['Province/State']).max().reset_index(drop=None)
#selecting 10 most affected provinces
df_province=df_province.nlargest(10,'Confirmed')
df_province=df_province[['Province/State','Deaths','Recovered']]
#for multi-bar plots in seaborn, we need to melt the dataframe so #that the the deaths and recovered values are in the same column
df_province= df_province.melt(id_vars=['Province/State'])
sns.barplot(
x='Province/State',
y='value',
hue='variable',
data=df_province
)
plt.xlabel('Provinces',fontsize=15)
plt.ylabel('Number of cases',fontsize=15)
自 1 月 28 日以来,每天报告的病例数量增加了近250%。2 月 4 日报告的病例数为 3915 例。这表明该病毒具有高度的传染性,正在迅速传播。 在第一周,死亡率高于康复率。自 1 月 31 日以来,康复率迅速上升,并呈现出积极的趋势。2 月 4 日有 255 人康复,而死亡人数为 66 人。随着越来越多的人了解症状并及时寻求药物治疗,康复率将继续提高。 与在地理上和中国位置相近的国家,如泰国、日本和新加坡,报告的病例比其他亚洲和欧洲国家多。德国是一个例外,其拥有的病例在欧洲最多。 死亡率从未超过 3%,正在逐渐下降到 2%。未来几周更多的康复病例可能会进一步降低这一数字。 中国湖北省是此次疫情的中心,报告的病例明显多于其他所有省份的总和。有些省份没有死亡病例,所有受感染的病人都康复了。
点击 阅读原文,查看:「肺炎 X 光病灶识别」挑战:面对疫情,开发者能做的还有很多!
最新评论
推荐文章
作者最新文章
你可能感兴趣的文章
Copyright Disclaimer: The copyright of contents (including texts, images, videos and audios) posted above belong to the User who shared or the third-party website which the User shared from. If you found your copyright have been infringed, please send a DMCA takedown notice to [email protected]. For more detail of the source, please click on the button "Read Original Post" below. For other communications, please send to [email protected].
版权声明:以上内容为用户推荐收藏至CareerEngine平台,其内容(含文字、图片、视频、音频等)及知识版权均属用户或用户转发自的第三方网站,如涉嫌侵权,请通知[email protected]进行信息删除。如需查看信息来源,请点击“查看原文”。如需洽谈其它事宜,请联系[email protected]。
版权声明:以上内容为用户推荐收藏至CareerEngine平台,其内容(含文字、图片、视频、音频等)及知识版权均属用户或用户转发自的第三方网站,如涉嫌侵权,请通知[email protected]进行信息删除。如需查看信息来源,请点击“查看原文”。如需洽谈其它事宜,请联系[email protected]。