From c4b129c1aed5b5da1dd7c1ff4c0afae530877ff6 Mon Sep 17 00:00:00 2001 From: Faizan Mulla Date: Wed, 24 Jan 2024 14:31:51 +0530 Subject: [PATCH] initial commit + add: function to get channel stats. --- Youtube API + Data Analysis/yt.ipynb | 216 +++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 Youtube API + Data Analysis/yt.ipynb diff --git a/Youtube API + Data Analysis/yt.ipynb b/Youtube API + Data Analysis/yt.ipynb new file mode 100644 index 0000000..160eb90 --- /dev/null +++ b/Youtube API + Data Analysis/yt.ipynb @@ -0,0 +1,216 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Importing Libraries**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "from googleapiclient.discovery import build" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Setting up YouTube API.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "api_service_name = \"youtube\"\n", + "api_version = \"v3\"\n", + "\n", + "yt_api_key = os.environ[\"YT_API_KEY\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **I. Scraping Channel Statistics.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Getting Channel ID's." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "channel_ids = [\n", + " \"UCX6OQ3DkcsbYNE6H8uQQuVA\",\n", + " \"UC59ZRYCHev_IqjUhremZ8Tg\",\n", + " \"UCvgfXK4nTYKudb0rFR6noLA\",\n", + " \"UCc0YbtMkRdhcqwhu3Oad-lw\",\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Building YouTube API Service." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "youtube = build(api_service_name, api_version, developerKey=yt_api_key) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Function to obtain channel statistics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_channel_statistics(youtube, channel_ids):\n", + " data = []\n", + "\n", + " request = youtube.channels().list(\n", + " part=\"snippet,contentDetails,statistics\", id=\",\".join(channel_ids)\n", + " )\n", + "\n", + " response = request.execute()\n", + "\n", + " for i in range(len(response[\"items\"])):\n", + " info = dict(\n", + " channel_name=response[\"items\"][i][\"snippet\"][\"title\"],\n", + " subscribers=response[\"items\"][i][\"statistics\"][\"subscriberCount\"],\n", + " videos=response[\"items\"][i][\"statistics\"][\"videoCount\"],\n", + " views=response[\"items\"][i][\"statistics\"][\"viewCount\"],\n", + " # --------\n", + " playlist_id=response[\"items\"][i][\"contentDetails\"][\"relatedPlaylists\"]['uploads'],\n", + " )\n", + "\n", + " data.append(info)\n", + "\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "channel_statistics = get_channel_statistics(youtube, channel_ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create a dataframe. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "channel_df = pd.DataFrame(channel_statistics)\n", + "channel_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Change datatype from object to integer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "channel_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exclude_cols = ['channel_name']\n", + "\n", + "cols_to_include = [col for col in channel_df.columns if col not in exclude_cols]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "channel_df[cols_to_include] = channel_df[cols_to_include].applymap(pd.to_numeric, errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "channel_df.dtypes" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}