diff --git a/Youtube API + Data Analysis/yt.ipynb b/Youtube API + Data Analysis/yt.ipynb
index 7ca49d1..ef091f2 100644
--- a/Youtube API + Data Analysis/yt.ipynb
+++ b/Youtube API + Data Analysis/yt.ipynb
@@ -43,7 +43,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "#### **Getting Channel ID's.**"
+ "### **I. Scraping Channel Statistics.**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Getting Channel ID's."
]
},
{
@@ -64,7 +71,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "#### **Building YouTube API Service.**"
+ "#### Building YouTube API Service."
]
},
{
@@ -80,7 +87,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "#### **Function to obtain channel statistics.**"
+ "#### Function to obtain channel statistics."
]
},
{
@@ -104,6 +111,8 @@
" subscribers=response[\"items\"][i][\"statistics\"][\"subscriberCount\"],\n",
" videos=response[\"items\"][i][\"statistics\"][\"videoCount\"],\n",
" views=response[\"items\"][i][\"statistics\"][\"viewCount\"],\n",
+ " # --------\n",
+ " playlist_id=response[\"items\"][i][\"contentDetails\"][\"relatedPlaylists\"]['uploads'],\n",
" )\n",
"\n",
" data.append(info)\n",
@@ -115,35 +124,278 @@
"cell_type": "code",
"execution_count": 6,
"metadata": {},
+ "outputs": [],
+ "source": [
+ "channel_statistics = get_channel_statistics(youtube, channel_ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Create a dataframe. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " channel_name | \n",
+ " subscribers | \n",
+ " videos | \n",
+ " views | \n",
+ " playlist_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Breaking Bad & Better Call Saul | \n",
+ " 746000 | \n",
+ " 773 | \n",
+ " 593154876 | \n",
+ " UUc0YbtMkRdhcqwhu3Oad-lw | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Harry Mack | \n",
+ " 2640000 | \n",
+ " 498 | \n",
+ " 231600882 | \n",
+ " UU59ZRYCHev_IqjUhremZ8Tg | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " UFC | \n",
+ " 17500000 | \n",
+ " 15926 | \n",
+ " 7784942599 | \n",
+ " UUvgfXK4nTYKudb0rFR6noLA | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MrBeast | \n",
+ " 233000000 | \n",
+ " 774 | \n",
+ " 41694966373 | \n",
+ " UUX6OQ3DkcsbYNE6H8uQQuVA | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " channel_name subscribers videos views \\\n",
+ "0 Breaking Bad & Better Call Saul 746000 773 593154876 \n",
+ "1 Harry Mack 2640000 498 231600882 \n",
+ "2 UFC 17500000 15926 7784942599 \n",
+ "3 MrBeast 233000000 774 41694966373 \n",
+ "\n",
+ " playlist_id \n",
+ "0 UUc0YbtMkRdhcqwhu3Oad-lw \n",
+ "1 UU59ZRYCHev_IqjUhremZ8Tg \n",
+ "2 UUvgfXK4nTYKudb0rFR6noLA \n",
+ "3 UUX6OQ3DkcsbYNE6H8uQQuVA "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "channel_df = pd.DataFrame(channel_statistics)\n",
+ "channel_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Change datatype from object to integer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "channel_name object\n",
+ "subscribers object\n",
+ "videos object\n",
+ "views object\n",
+ "playlist_id object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "channel_df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "exclude_cols = ['channel_name']\n",
+ "\n",
+ "cols_to_include = [col for col in channel_df.columns if col not in exclude_cols]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "channel_df[cols_to_include] = channel_df[cols_to_include].applymap(pd.to_numeric, errors='coerce')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "channel_name object\n",
+ "subscribers int64\n",
+ "videos int64\n",
+ "views int64\n",
+ "playlist_id float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "channel_df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### **II. Scraping Video Details.**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Function to get video ID's"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_video_ids(youtube, playlist_id):\n",
+ " video_ids = []\n",
+ "\n",
+ " request = youtube.playlistItems().list(\n",
+ " part=\"contentDetails\", playlistId=playlist_id, maxResults=50\n",
+ " )\n",
+ "\n",
+ " response = request.execute()\n",
+ "\n",
+ " for i in range(len(response['items'])):\n",
+ " video_ids.append(response['items'][i]['contentDetails']['videoId'])\n",
+ "\n",
+ " next_page_token = response.get(\"nextPageToken\")\n",
+ "\n",
+ " while next_page_token is not None:\n",
+ " request = youtube.playlistItems().list(\n",
+ " part=\"contentDetails\",\n",
+ " playlistId=playlist_id,\n",
+ " maxResults=50,\n",
+ " pageToken=next_page_token,\n",
+ " )\n",
+ "\n",
+ " response = request.execute()\n",
+ "\n",
+ " for i in range(len(response['items'])):\n",
+ " video_ids.append(response['items'][i]['contentDetails']['videoId'])\n",
+ "\n",
+ " next_page_token = response.get(\"nextPageToken\")\n",
+ "\n",
+ " return video_ids"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Breaking Bad & Better call Saul.\n",
+ "playlist_id = \"UUc0YbtMkRdhcqwhu3Oad-lw\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "video_ids = get_video_ids(youtube, playlist_id)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[{'channel_name': 'UFC',\n",
- " 'subscribers': '17500000',\n",
- " 'videos': '15926',\n",
- " 'views': '7784942599'},\n",
- " {'channel_name': 'MrBeast',\n",
- " 'subscribers': '233000000',\n",
- " 'videos': '774',\n",
- " 'views': '41694966373'},\n",
- " {'channel_name': 'Breaking Bad & Better Call Saul',\n",
- " 'subscribers': '746000',\n",
- " 'videos': '772',\n",
- " 'views': '593154876'},\n",
- " {'channel_name': 'Harry Mack',\n",
- " 'subscribers': '2640000',\n",
- " 'videos': '498',\n",
- " 'views': '231600882'}]"
+ "774"
]
},
- "execution_count": 6,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "get_channel_statistics(youtube, channel_ids)"
+ "len(video_ids)"
]
}
],