Skip to content

Commit

Permalink
add: function to get video id's
Browse files Browse the repository at this point in the history
  • Loading branch information
faizanxmulla committed Jan 21, 2024
1 parent 702016f commit c6e70d3
Showing 1 changed file with 273 additions and 21 deletions.
294 changes: 273 additions & 21 deletions Youtube API + Data Analysis/yt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#### **Getting Channel ID's.**"
"### **I. Scraping Channel Statistics.**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Getting Channel ID's."
]
},
{
Expand All @@ -64,7 +71,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#### **Building YouTube API Service.**"
"#### Building YouTube API Service."
]
},
{
Expand All @@ -80,7 +87,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#### **Function to obtain channel statistics.**"
"#### Function to obtain channel statistics."
]
},
{
Expand All @@ -104,6 +111,8 @@
" subscribers=response[\"items\"][i][\"statistics\"][\"subscriberCount\"],\n",
" videos=response[\"items\"][i][\"statistics\"][\"videoCount\"],\n",
" views=response[\"items\"][i][\"statistics\"][\"viewCount\"],\n",
" # --------\n",
" playlist_id=response[\"items\"][i][\"contentDetails\"][\"relatedPlaylists\"]['uploads'],\n",
" )\n",
"\n",
" data.append(info)\n",
Expand All @@ -115,35 +124,278 @@
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"channel_statistics = get_channel_statistics(youtube, channel_ids)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Create a dataframe. "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>channel_name</th>\n",
" <th>subscribers</th>\n",
" <th>videos</th>\n",
" <th>views</th>\n",
" <th>playlist_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Breaking Bad &amp; Better Call Saul</td>\n",
" <td>746000</td>\n",
" <td>773</td>\n",
" <td>593154876</td>\n",
" <td>UUc0YbtMkRdhcqwhu3Oad-lw</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Harry Mack</td>\n",
" <td>2640000</td>\n",
" <td>498</td>\n",
" <td>231600882</td>\n",
" <td>UU59ZRYCHev_IqjUhremZ8Tg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>UFC</td>\n",
" <td>17500000</td>\n",
" <td>15926</td>\n",
" <td>7784942599</td>\n",
" <td>UUvgfXK4nTYKudb0rFR6noLA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>MrBeast</td>\n",
" <td>233000000</td>\n",
" <td>774</td>\n",
" <td>41694966373</td>\n",
" <td>UUX6OQ3DkcsbYNE6H8uQQuVA</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" channel_name subscribers videos views \\\n",
"0 Breaking Bad & Better Call Saul 746000 773 593154876 \n",
"1 Harry Mack 2640000 498 231600882 \n",
"2 UFC 17500000 15926 7784942599 \n",
"3 MrBeast 233000000 774 41694966373 \n",
"\n",
" playlist_id \n",
"0 UUc0YbtMkRdhcqwhu3Oad-lw \n",
"1 UU59ZRYCHev_IqjUhremZ8Tg \n",
"2 UUvgfXK4nTYKudb0rFR6noLA \n",
"3 UUX6OQ3DkcsbYNE6H8uQQuVA "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"channel_df = pd.DataFrame(channel_statistics)\n",
"channel_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Change datatype from object to integer."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"channel_name object\n",
"subscribers object\n",
"videos object\n",
"views object\n",
"playlist_id object\n",
"dtype: object"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"channel_df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"exclude_cols = ['channel_name']\n",
"\n",
"cols_to_include = [col for col in channel_df.columns if col not in exclude_cols]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"channel_df[cols_to_include] = channel_df[cols_to_include].applymap(pd.to_numeric, errors='coerce')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"channel_name object\n",
"subscribers int64\n",
"videos int64\n",
"views int64\n",
"playlist_id float64\n",
"dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"channel_df.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### **II. Scraping Video Details.**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Function to get video ID's"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def get_video_ids(youtube, playlist_id):\n",
" video_ids = []\n",
"\n",
" request = youtube.playlistItems().list(\n",
" part=\"contentDetails\", playlistId=playlist_id, maxResults=50\n",
" )\n",
"\n",
" response = request.execute()\n",
"\n",
" for i in range(len(response['items'])):\n",
" video_ids.append(response['items'][i]['contentDetails']['videoId'])\n",
"\n",
" next_page_token = response.get(\"nextPageToken\")\n",
"\n",
" while next_page_token is not None:\n",
" request = youtube.playlistItems().list(\n",
" part=\"contentDetails\",\n",
" playlistId=playlist_id,\n",
" maxResults=50,\n",
" pageToken=next_page_token,\n",
" )\n",
"\n",
" response = request.execute()\n",
"\n",
" for i in range(len(response['items'])):\n",
" video_ids.append(response['items'][i]['contentDetails']['videoId'])\n",
"\n",
" next_page_token = response.get(\"nextPageToken\")\n",
"\n",
" return video_ids"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# Breaking Bad & Better call Saul.\n",
"playlist_id = \"UUc0YbtMkRdhcqwhu3Oad-lw\""
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"video_ids = get_video_ids(youtube, playlist_id)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'channel_name': 'UFC',\n",
" 'subscribers': '17500000',\n",
" 'videos': '15926',\n",
" 'views': '7784942599'},\n",
" {'channel_name': 'MrBeast',\n",
" 'subscribers': '233000000',\n",
" 'videos': '774',\n",
" 'views': '41694966373'},\n",
" {'channel_name': 'Breaking Bad & Better Call Saul',\n",
" 'subscribers': '746000',\n",
" 'videos': '772',\n",
" 'views': '593154876'},\n",
" {'channel_name': 'Harry Mack',\n",
" 'subscribers': '2640000',\n",
" 'videos': '498',\n",
" 'views': '231600882'}]"
"774"
]
},
"execution_count": 6,
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_channel_statistics(youtube, channel_ids)"
"len(video_ids)"
]
}
],
Expand Down

0 comments on commit c6e70d3

Please sign in to comment.