diff --git a/Youtube API + Data Analysis/yt.ipynb b/Youtube API + Data Analysis/yt.ipynb index 7ca49d1..ef091f2 100644 --- a/Youtube API + Data Analysis/yt.ipynb +++ b/Youtube API + Data Analysis/yt.ipynb @@ -43,7 +43,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### **Getting Channel ID's.**" + "### **I. Scraping Channel Statistics.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Getting Channel ID's." ] }, { @@ -64,7 +71,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### **Building YouTube API Service.**" + "#### Building YouTube API Service." ] }, { @@ -80,7 +87,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### **Function to obtain channel statistics.**" + "#### Function to obtain channel statistics." ] }, { @@ -104,6 +111,8 @@ " subscribers=response[\"items\"][i][\"statistics\"][\"subscriberCount\"],\n", " videos=response[\"items\"][i][\"statistics\"][\"videoCount\"],\n", " views=response[\"items\"][i][\"statistics\"][\"viewCount\"],\n", + " # --------\n", + " playlist_id=response[\"items\"][i][\"contentDetails\"][\"relatedPlaylists\"]['uploads'],\n", " )\n", "\n", " data.append(info)\n", @@ -115,35 +124,278 @@ "cell_type": "code", "execution_count": 6, "metadata": {}, + "outputs": [], + "source": [ + "channel_statistics = get_channel_statistics(youtube, channel_ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create a dataframe. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
channel_namesubscribersvideosviewsplaylist_id
0Breaking Bad & Better Call Saul746000773593154876UUc0YbtMkRdhcqwhu3Oad-lw
1Harry Mack2640000498231600882UU59ZRYCHev_IqjUhremZ8Tg
2UFC17500000159267784942599UUvgfXK4nTYKudb0rFR6noLA
3MrBeast23300000077441694966373UUX6OQ3DkcsbYNE6H8uQQuVA
\n", + "
" + ], + "text/plain": [ + " channel_name subscribers videos views \\\n", + "0 Breaking Bad & Better Call Saul 746000 773 593154876 \n", + "1 Harry Mack 2640000 498 231600882 \n", + "2 UFC 17500000 15926 7784942599 \n", + "3 MrBeast 233000000 774 41694966373 \n", + "\n", + " playlist_id \n", + "0 UUc0YbtMkRdhcqwhu3Oad-lw \n", + "1 UU59ZRYCHev_IqjUhremZ8Tg \n", + "2 UUvgfXK4nTYKudb0rFR6noLA \n", + "3 UUX6OQ3DkcsbYNE6H8uQQuVA " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "channel_df = pd.DataFrame(channel_statistics)\n", + "channel_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Change datatype from object to integer." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "channel_name object\n", + "subscribers object\n", + "videos object\n", + "views object\n", + "playlist_id object\n", + "dtype: object" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "channel_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "exclude_cols = ['channel_name']\n", + "\n", + "cols_to_include = [col for col in channel_df.columns if col not in exclude_cols]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "channel_df[cols_to_include] = channel_df[cols_to_include].applymap(pd.to_numeric, errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "channel_name object\n", + "subscribers int64\n", + "videos int64\n", + "views int64\n", + "playlist_id float64\n", + "dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "channel_df.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **II. Scraping Video Details.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Function to get video ID's" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def get_video_ids(youtube, playlist_id):\n", + " video_ids = []\n", + "\n", + " request = youtube.playlistItems().list(\n", + " part=\"contentDetails\", playlistId=playlist_id, maxResults=50\n", + " )\n", + "\n", + " response = request.execute()\n", + "\n", + " for i in range(len(response['items'])):\n", + " video_ids.append(response['items'][i]['contentDetails']['videoId'])\n", + "\n", + " next_page_token = response.get(\"nextPageToken\")\n", + "\n", + " while next_page_token is not None:\n", + " request = youtube.playlistItems().list(\n", + " part=\"contentDetails\",\n", + " playlistId=playlist_id,\n", + " maxResults=50,\n", + " pageToken=next_page_token,\n", + " )\n", + "\n", + " response = request.execute()\n", + "\n", + " for i in range(len(response['items'])):\n", + " video_ids.append(response['items'][i]['contentDetails']['videoId'])\n", + "\n", + " next_page_token = response.get(\"nextPageToken\")\n", + "\n", + " return video_ids" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# Breaking Bad & Better call Saul.\n", + "playlist_id = \"UUc0YbtMkRdhcqwhu3Oad-lw\"" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "video_ids = get_video_ids(youtube, playlist_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'channel_name': 'UFC',\n", - " 'subscribers': '17500000',\n", - " 'videos': '15926',\n", - " 'views': '7784942599'},\n", - " {'channel_name': 'MrBeast',\n", - " 'subscribers': '233000000',\n", - " 'videos': '774',\n", - " 'views': '41694966373'},\n", - " {'channel_name': 'Breaking Bad & Better Call Saul',\n", - " 'subscribers': '746000',\n", - " 'videos': '772',\n", - " 'views': '593154876'},\n", - " {'channel_name': 'Harry Mack',\n", - " 'subscribers': '2640000',\n", - " 'videos': '498',\n", - " 'views': '231600882'}]" + "774" ] }, - "execution_count": 6, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "get_channel_statistics(youtube, channel_ids)" + "len(video_ids)" ] } ],