diff --git a/extension_2_masters_project_occupational_group.ipynb b/extension_2_masters_project_occupational_group.ipynb new file mode 100644 index 0000000..f82af6a --- /dev/null +++ b/extension_2_masters_project_occupational_group.ipynb @@ -0,0 +1,43761 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Project imports #\n", + "import pandas as pd\n", + "import numpy as np\n", + "import psycopg2 as pg2\n", + "import matplotlib.pyplot as plt\n", + "import json\n", + "import csv\n", + "import re\n", + "from statsmodels.tsa.seasonal import STL\n", + "import statsmodels.api as sm\n", + "from datetime import datetime\n", + "import calendar\n", + "from calendar import monthrange" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extension of Project - Extension 1\n", + "#### Objective: To see how local labor demand has changed based on the jobads available on platsbanken between 2006-2020\n", + "\n", + "#### Project outline\n", + "- Step 1: Converting JSON files to CSV and extracting relevant data (+data cleaning).\n", + "- Step 2: Inserting data into a PostgreSQL database and setting up a query.\n", + "- Step 3: Visualization and discussion of results.\n", + "- Step 4: Time series analysis (in progress).\n", + "- Concluding remarks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1: Converting JSON files to CSV and extracting relevant data (+data cleaning)\n", + "#### All job ads data used in this project is from [JobTechDev](https://jobtechdev.se/en/docs/apis/historical/), an initiative by the Swedish public employment service.\n", + "\n", + "- Variables to be extracted for this project are the following:\n", + " - **headline**: The ad headline.\n", + " - **number_of_vacancies**: The number of advertised jobs for a given ad, a majority only advertise 1 job (5M out of 6.4M ads).\n", + " - **publication_date**: Date the ad was published on Platsbanken, the job ad platform.\n", + " - **application_deadline**: The last date to apply for the job.\n", + " - **last_publication_date**: The last date the ad was public, used as substitutet for application_deadline for 2017 where application_deadline is missing.\n", + " - **municipality_code**: The unique code associated with each municipality in Sweden" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "valid ads for 2006: 209153 | erroneous ads for 2006: 36074\n", + "valid ads for 2007: 259423 | erroneous ads for 2007: 24682\n", + "valid ads for 2008: 240130 | erroneous ads for 2008: 25093\n", + "valid ads for 2009: 174451 | erroneous ads for 2009: 21673\n", + "valid ads for 2010: 263165 | erroneous ads for 2010: 26089\n", + "valid ads for 2011: 335272 | erroneous ads for 2011: 29416\n", + "valid ads for 2012: 346145 | erroneous ads for 2012: 33213\n", + "valid ads for 2013: 355452 | erroneous ads for 2013: 34601\n", + "valid ads for 2014: 419379 | erroneous ads for 2014: 32606\n", + "valid ads for 2015: 583994 | erroneous ads for 2015: 29519\n", + "valid ads for 2016: 693570 | erroneous ads for 2016: 24476\n", + "valid ads for 2017: 694039 | erroneous ads for 2017: 21252\n", + "valid ads for 2018: 659482 | erroneous ads for 2018: 23487\n", + "valid ads for 2019: 514672 | erroneous ads for 2019: 125585\n", + "valid ads for 2020: 449857 | erroneous ads for 2020: 40856\n", + "\n", + "\n", + "total valid ads: 6198184 | total erroneous ads: 528622\n" + ] + } + ], + "source": [ + "total_valid = 0; total_errors = 0\n", + "for file in range(2006,2021):\n", + " with open(f'/Users/Kevin/Desktop/project_dta/json_pb2006_2020/{file}.json') as f:\n", + " ads = json.load(f)\n", + " file_ads = []\n", + " error_rows = 0\n", + " \n", + " # For 2017 application_deadline is null, last_publication_date used as a proxy, vars 99.8% equivalent #\n", + " for ad in ads:\n", + " # Removal of special characters using regex, events of \\n causes errors in csv file #\n", + " head_line = re.sub('[!,*)@#%(&$_?.^\\\\\\\\\\n/]', '', str(ad['headline']))\n", + " if file != 2017:\n", + " try: \n", + " # Slicing of date variables is to ensure only dates of the format 'yyyy-mm-dd' are included, no time data needed #\n", + " ad_select = [head_line, int(ad['number_of_vacancies']), ad['publication_date'][:10], ad['application_deadline'][:10], ad['workplace_address']['municipality_code']]\n", + " except:\n", + " error_rows += 1\n", + " continue\n", + " else:\n", + " try:\n", + " ad_select = [head_line, int(ad['number_of_vacancies']), ad['publication_date'][:10], ad['last_publication_date'][:10], ad['workplace_address']['municipality_code']]\n", + " except:\n", + " error_rows += 1\n", + " continue\n", + " \n", + " # Jobs ads with no vacancies (=0) or missing values (None) in dates or vacancies are removed using all() #\n", + " if all(ad_select[1:]) and 'ALLA' not in ad_select:\n", + " file_ads.append(ad_select)\n", + " else:\n", + " error_rows += 1\n", + " continue\n", + " # We write the extracted data to a csv file for easy insertion into PostgreSQL # \n", + " with open(f'/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_city/{file}_municode.csv', mode='w', newline=\"\") as file_writer:\n", + " write = csv.writer(file_writer)\n", + " for row in file_ads:\n", + " write.writerow(row)\n", + " print(f'valid ads for {file}: ' + str(len(file_ads)), ' | ', f'erroneous ads for {file}: ' + str(error_rows))\n", + " total_valid += len(file_ads); total_errors += error_rows\n", + "print('\\n')\n", + "print('total valid ads: ' + str(total_valid), ' | ', 'total erroneous ads: '+ str(total_errors))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- More rows are filtered out now that restrictions are harder. Now, if the municipality code is NULL, row is discarded" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Inserting data into a postgreSQL database and setting up relevant query\n", + " - For efficient analysis and storage of the dataset we import the data into PostgreSQL." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "# DB created in pgAdmin4 GUI, login and connect cursor #\n", + "conn = pg2.connect(database='job_ads', user='postgres', password='World930920')\n", + "cur = conn.cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# Table creation # \n", + "# If data cleaning method in previous step missed any invalid ads, NOT NULL ascertains that no invalid ads are included #\n", + "cur.execute(\"\"\"\n", + "CREATE TABLE historic_ads_muni(\n", + " ad_id SERIAL PRIMARY KEY,\n", + " headline VARCHAR,\n", + " number_of_vacancies INTEGER NOT NULL,\n", + " publication_date DATE NOT NULL,\n", + " application_deadline DATE NOT NULL,\n", + " municipality_code INTEGER NOT NULL)\n", + " ;\"\"\")\n", + "conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "# Load in keys for join #\n", + "cur.execute(\"\"\"\n", + "CREATE TABLE municipality_codes(\n", + " groups VARCHAR NOT NULL,\n", + " municipality_code INTEGER NOT NULL,\n", + " municipality_name VARCHAR NOT NULL,\n", + " main_group_code VARCHAR NOT NULL,\n", + " main_group_name VARCHAR NOT NULL,\n", + " municipality_group_code INTEGER NOT NULL,\n", + " municipality_group_name VARCHAR NOT NULL,\n", + " county_code INTEGER NOT NULL,\n", + " county_name VARCHAR NOT NULL)\n", + " ;\"\"\")\n", + "conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Importing csv files into PostgreSQL #\n", + "for file in range(2006, 2021):\n", + " with open(f'/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_muni_int/{file}_municode.csv', 'r') as csv_file:\n", + " cur.copy_from(csv_file, 'historic_ads_muni', sep=',', columns=('headline', 'number_of_vacancies', 'publication_date', 'application_deadline', 'municipality_code'))\n", + " conn.commit()\n", + "# Importin keys for \n", + "with open('/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_muni_int/municipality_codes_int', 'r') as csv_file:\n", + " cur.copy_from(csv_file, 'municipality_codes', sep=',', columns=('groups', 'municipality_code', 'municipality_name', 'main_group_code', 'main_group_name', 'municipality_group_code', 'municipality_group_name', 'county_code', 'county_name'))\n", + " conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# Full inner join #\n", + "cur.execute(\"\"\"\n", + " CREATE TABLE historic_ads_extension_1 AS \n", + " (SELECT ad_id, headline, number_of_vacancies, publication_date, application_deadline, historic_ads_muni.municipality_code, municipality_codes.municipality_name, municipality_codes.municipality_group_code, municipality_codes.municipality_group_name, municipality_codes.county_code, municipality_codes.county_name\n", + " FROM historic_ads_muni\n", + " INNER JOIN municipality_codes\n", + " ON historic_ads_muni.municipality_code = municipality_codes.municipality_code);\n", + " ;\"\"\")\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- In the cell below we check, for each day between 2006-2020, how many ads were active for a given day and sum the vacancies of these ads. This is done by looping through each day in the time interval and checking how many ads have the given day (e.g. 2006-05-22) in between it's publication and deadline date (e.g. pub 2006-04-27 -- dead 2006-05-23). For all active ads, vacancies are summed and appended to a list as results." + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "# Analysis of labor demand by city size #\n", + "vacancies_per_day = []\n", + "vacancies_per_day.append([\"Date\", \"Rural Municipality\", \"Rural municipality with hospitality industry\", \"Low commuting municipality near major city\"\n", + ", \"Small town/urban area\", \"Commuting municipality near smaller urban area\", \"Commuting municipality near big city\", \"Commuting municipality near major city\"\n", + ",\"Metropolitan areas\", \"Big cities\"])\n", + "\n", + "for year in range(2006, 2021):\n", + " for month in range(1,13):\n", + " # We have to skip first month since some groups have values of zero #\n", + " if year == 2006 and month == 1:\n", + " continue\n", + " for day in range(1, calendar.monthrange(year, month)[1] + 1):\n", + " cur.execute(f\"\"\"\n", + " SELECT SUM(number_of_vacancies), municipality_group_name FROM historic_ads_extension_1 \n", + " WHERE '{year}-{month}-{day}' >= publication_date AND '{year}-{month}-{day}' <= application_deadline\n", + " GROUP BY municipality_group_name;\"\"\")\n", + " muni_ads = cur.fetchall()\n", + " vacancies_per_day.append([f'{year}-{month}-{day}', muni_ads[0][0], muni_ads[1][0], muni_ads[2][0], muni_ads[3][0],\n", + " muni_ads[4][0], muni_ads[5][0],muni_ads[6][0],muni_ads[7][0],muni_ads[8][0]])\n", + "\n", + "# We save the results to a csv file #\n", + "with open(f'/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_muni_int/results.csv', mode='w', newline=\"\") as file_writer:\n", + " write = csv.writer(file_writer)\n", + " for row in vacancies_per_day:\n", + " write.writerow(row) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Visualization and discussion of results\n", + "#### The comparison data from Statistics Sweden and can be found [here](https://www.statistikdatabasen.scb.se/pxweb/en/ssd/START__AM__AM0701__AM0701A/KVLedigajobbSektor/).\n", + "- The comparison data is based on survey results that are published quarterly by Sweden's statistical agency, Statistics Sweden (SCB).\n", + "- The selected comparison data is between Q1 2006 - Q3 2020 which coincides with our job ads data." + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing results and comparison csv files and adding appropriate datetime indexing #\n", + "results = pd.read_csv('/Users/Kevin/Desktop/project_dta/csv_pd2006_2020/results.csv', names=['date', 'ad_vacancies'])\n", + "results = results.set_index(pd.to_datetime(results['date'])).drop('date', axis=1)\n", + "comparison = pd.read_csv('/Users/Kevin/Desktop/project_dta/csv_pd2006_2020/lediga_jobb.csv').drop('quarter', axis=1)\n", + "comparison = comparison.set_index(pd.date_range('2006-01-01','2020-12-01', freq='Q'))" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing csv files and adding appropriate datetime indexing #\n", + "muni_results = pd.read_csv('/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_muni_int/results.csv')\n", + "muni_results = muni_results.set_index(pd.to_datetime(muni_results['Date'])).drop('Date', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "# Creating range and labels for plot x-axis #\n", + "date_index = pd.date_range('2006','2022', freq='Y')\n", + "date_index = date_index.values.astype('datetime64[Y]')\n", + "date_range = np.arange(2006, 2022)" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "plt.style.use('seaborn')\n", + "%config InlineBackend.figure_format = 'svg'\n", + "fig = plt.figure(figsize=(14,7))\n", + "\n", + "for cols in range(len(muni_results.columns)):\n", + " plt.plot(muni_results.index, muni_results.iloc[:,cols], label=muni_results.columns[cols])\n", + " \n", + "plt.plot(results.index, results, label='Daily labor demand index')\n", + "\n", + "plt.xticks(ticks=date_index, labels=date_range)\n", + "plt.xlim(datetime(2005,10,1), datetime(2021,3,1))\n", + "plt.legend()\n", + "\n", + "plt.savefig('/Users/Kevin/Desktop/project_dta/figure_results/results.svg', bbox_inches='tight', facecolor=fig.get_facecolor(), edgecolor='none')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Discussion\n", + "- In the graph above we observe the daily labor demand index plotted together with Statistics Sweden's quarterly survey. Visually, the two time series show a clear co-movement. Thus, it is expect that much of the variation in the official survey could be explained by the daily index.\n", + "- It seems that changes in labor demand in the daily index based on job ads happen about one quarter before the change is seen in the quarterly survey index. \n", + "- Given that the daily labor demand index is behaving as a higher-frequency version of Statistics Sweden's quarterly survey, it could potentially be used as a cheap and timely complement." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4: Time series analysis (In progress).\n", + "- Planned substeps include:\n", + " - Time series decomposition using Loess (LOcally Estimated Scatterplot Smooting)\n", + " - Regressions analysis\n", + " - Postestimation tests: Engel-Granger test (i.e. unit-root and co-integration test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time series decomposition" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [], + "source": [ + "# Decomposition of the daily index # Results == res #\n", + "res_stl = STL(results, period =365).fit()\n", + "res_seasonal, res_trend, res_resid = res_stl.seasonal, res_stl.trend, res_stl.resid\n", + "\n", + "# Decomposition of the comparison index # Comparison == comp # \n", + "comp_stl = STL(comparison, period =4).fit()\n", + "comp_seasonal, comp_trend, comp_resid = comp_stl.seasonal, comp_stl.trend, comp_stl.resid\n", + "\n", + "# All components gatherd in a dictionary for ease of use #\n", + "all_components = {('Original indices', 1):(results, comparison), ('Trend', 2):(res_trend, comp_trend), ('Seasonal', 3):(res_seasonal, comp_seasonal), ('Residual', 4):(res_resid, comp_resid)}" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "plt.style.use('seaborn')\n", + "%config InlineBackend.figure_format = 'svg'\n", + "plt.figure(figsize=(12,12))\n", + "for plots, component in all_components.items():\n", + " plt.subplot(4,1, plots[-1])\n", + " plt.plot(component[0], label='Daily labor demand index', color='steelblue')\n", + " plt.plot(component[1], label='Quarterly vacancy survey index', color='indianred')\n", + " plt.title(plots[0], fontsize=16)\n", + " plt.xticks(ticks=date_index, labels=date_range)\n", + " plt.xlim(datetime(2005,10,1), datetime(2021,3,1))\n", + " if plots[-1] == 1:\n", + " plt.legend()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression - Indices" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ad_vacanciessurvey_vacancies
date
2006-03-3136269.88888949486
2006-06-3036032.84615447554
2006-09-3030377.69565244060
2020-03-31164516.571429132367
2020-06-30113403.63736386775
2020-09-3062550.40217467046
\n", + "
" + ], + "text/plain": [ + " ad_vacancies survey_vacancies\n", + "date \n", + "2006-03-31 36269.888889 49486\n", + "2006-06-30 36032.846154 47554\n", + "2006-09-30 30377.695652 44060\n", + "2020-03-31 164516.571429 132367\n", + "2020-06-30 113403.637363 86775\n", + "2020-09-30 62550.402174 67046" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# We use the mean job ads vacancies for each quarter #\n", + "results_resampled = results.resample('Q').mean().iloc[:-1,:]\n", + "res_comp = results_resampled.join(comparison).rename(columns={'Total':'survey_vacancies'})\n", + "# First and last three observations #\n", + "display(res_comp.iloc[[0, 1, 2, -3, -2, -1],:])" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "=======================================================================================\n", + "Dep. Variable: survey_vacancies R-squared (uncentered): 0.980\n", + "Model: OLS Adj. R-squared (uncentered): 0.979\n", + "Method: Least Squares F-statistic: 2774.\n", + "Date: Wed, 27 Jan 2021 Prob (F-statistic): 1.12e-50\n", + "Time: 10:14:30 Log-Likelihood: -635.50\n", + "No. Observations: 59 AIC: 1273.\n", + "Df Residuals: 58 BIC: 1275.\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "================================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "--------------------------------------------------------------------------------\n", + "ad_vacancies 0.8994 0.017 52.673 0.000 0.865 0.934\n", + "==============================================================================\n", + "Omnibus: 4.837 Durbin-Watson: 0.692\n", + "Prob(Omnibus): 0.089 Jarque-Bera (JB): 4.640\n", + "Skew: -0.683 Prob(JB): 0.0983\n", + "Kurtosis: 2.853 Cond. No. 1.00\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n", + "[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "ols = sm.OLS(res_comp.iloc[:,1], res_comp.iloc[:,0]).fit()\n", + "print(ols.summary())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression - Trend and Seasonality Components" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [], + "source": [ + "# In progress #" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Concluding remarks\n", + "#### Thank you for examining my project! Pleas contact me if you have any feedback on possible improvement or other comments on the project.\n", + "\n", + "Contact: [kevin.dee.boman@gmail.com](kevin.dee.boman@gmail.com)\n", + "\n", + "#### Future project plans:\n", + "\n", + " - In a future project, a current labor demand index will be constructed using JobTechDevs API service in combination with ads scraped from supplementary sources that systematically are not posted on platsbanken (The public employment service job ads platform).\n", + " - The index may also be disaggregated by county/city and occupational group using [SSYK classification](https://www.scb.se/dokumentation/klassifikationer-och-standarder/standard-for-svensk-yrkesklassificering-ssyk/) (Swedens official classification system for occupational groups, only available in Swedish). This would extend the use of the index as regional labor demand and labor demand among different occupational groups could be estimated." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/extension_3_masters_project_muni_and_occupation.ipynb b/extension_3_masters_project_muni_and_occupation.ipynb new file mode 100644 index 0000000..8fc32f2 --- /dev/null +++ b/extension_3_masters_project_muni_and_occupation.ipynb @@ -0,0 +1,43769 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Project imports #\n", + "import pandas as pd\n", + "import numpy as np\n", + "import psycopg2 as pg2\n", + "import matplotlib.pyplot as plt\n", + "import json\n", + "import csv\n", + "import re\n", + "from statsmodels.tsa.seasonal import STL\n", + "import statsmodels.api as sm\n", + "from arch.unitroot import engle_granger\n", + "from datetime import datetime\n", + "import calendar\n", + "from calendar import monthrange\n", + "# Matplotlib settings #\n", + "%matplotlib inline\n", + "plt.style.use('seaborn')\n", + "%config InlineBackend.figure_format = 'svg'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extension of Project - Extension 3\n", + "#### Objective: To see how local labor demand has changed based on the jobads available on platsbanken between 2006-2020\n", + "#### Observe; because of restrictions on data quality for 2006 and 2007, the timespan will be limited to 2008-2020\n", + "\n", + "#### Project outline\n", + "- Step 1: Converting JSON files to CSV and extracting relevant data (+data cleaning).\n", + "- Step 2: Inserting data into a PostgreSQL database and setting up a query.\n", + "- Step 3: Visualization and discussion of results.\n", + "- Step 4: Time series analysis (in progress).\n", + "- Concluding remarks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1: Converting JSON files to CSV and extracting relevant data (+data cleaning)\n", + "#### All job ads data used in this project is from [JobTechDev](https://jobtechdev.se/en/docs/apis/historical/), an initiative by the Swedish public employment service.\n", + "\n", + "- Variables to be extracted for this project are the following:\n", + " - **headline**: The ad headline.\n", + " - **number_of_vacancies**: The number of advertised jobs for a given ad, a majority only advertise 1 job (5M out of 6.4M ads).\n", + " - **publication_date**: Date the ad was published on Platsbanken, the job ad platform.\n", + " - **application_deadline**: The last date to apply for the job.\n", + " - **last_publication_date**: The last date the ad was public, used as substitutet for application_deadline for 2017 where application_deadline is missing.\n", + " - **municipality_code**: The unique code associated with each municipality in Sweden\n", + " - **occupation_group**: Occupation group on the 4th level of SSYK" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "valid ads for 2006: 21 | erroneous ads for 2006: 245206\n", + "valid ads for 2007: 33591 | erroneous ads for 2007: 250514\n", + "valid ads for 2008: 177998 | erroneous ads for 2008: 87225\n", + "valid ads for 2009: 137174 | erroneous ads for 2009: 58950\n", + "valid ads for 2010: 214008 | erroneous ads for 2010: 75246\n", + "valid ads for 2011: 296940 | erroneous ads for 2011: 67748\n", + "valid ads for 2012: 309110 | erroneous ads for 2012: 70248\n", + "valid ads for 2013: 316092 | erroneous ads for 2013: 73961\n", + "valid ads for 2014: 388634 | erroneous ads for 2014: 63351\n", + "valid ads for 2015: 574481 | erroneous ads for 2015: 39032\n", + "valid ads for 2016: 690854 | erroneous ads for 2016: 27192\n", + "valid ads for 2017: 692449 | erroneous ads for 2017: 22842\n", + "valid ads for 2018: 658276 | erroneous ads for 2018: 24693\n", + "valid ads for 2019: 514061 | erroneous ads for 2019: 126196\n", + "valid ads for 2020: 449788 | erroneous ads for 2020: 40925\n", + "\n", + "\n", + "total valid ads: 5453477 | total erroneous ads: 1273329\n" + ] + } + ], + "source": [ + "total_valid = 0; total_errors = 0\n", + "for file in range(2006,2021):\n", + " with open(f'/Users/Kevin/Desktop/project_dta/new_pb_2006_2020/{file}.json') as f:\n", + " ads = json.load(f)\n", + " file_ads = []\n", + " error_rows = 0\n", + " \n", + " # For 2017 application_deadline is null, last_publication_date used as a proxy, vars 99.8% equivalent #\n", + " for ad in ads:\n", + " # Removal of special characters using regex, events of \\n causes errors in csv file #\n", + " head_line = re.sub('[!,*)@#%(&$_?.^\\\\\\\\\\n/]', '', str(ad['headline']))\n", + " if file != 2017:\n", + " try: \n", + " # Slicing of date variables is to ensure only dates of the format 'yyyy-mm-dd' are included, no time data needed #\n", + " ad_select = [head_line, int(ad['number_of_vacancies']), ad['publication_date'][:10], ad['application_deadline'][:10], int(ad['workplace_address']['municipality_code']), int(ad['occupation_group']['legacy_ams_taxonomy_id'])]\n", + " except:\n", + " error_rows += 1\n", + " continue\n", + " else:\n", + " try:\n", + " ad_select = [head_line, int(ad['number_of_vacancies']), ad['publication_date'][:10], ad['last_publication_date'][:10], int(ad['workplace_address']['municipality_code']), int(ad['occupation_group']['legacy_ams_taxonomy_id'])]\n", + " except:\n", + " error_rows += 1\n", + " continue\n", + " \n", + " # Jobs ads with no vacancies (=0) or missing values (None) in dates or vacancies are removed using all() #\n", + " if all(ad_select[1:]):\n", + " file_ads.append(ad_select)\n", + " else:\n", + " error_rows += 1\n", + " continue\n", + " # We write the extracted data to a csv file for easy insertion into PostgreSQL # \n", + " with open(f'/Users/Kevin/Desktop/project_dta/extension_3/{file}_municode_occupation.csv', mode='w', newline=\"\") as file_writer:\n", + " write = csv.writer(file_writer)\n", + " for row in file_ads:\n", + " write.writerow(row)\n", + " print(f'valid ads for {file}: ' + str(len(file_ads)), ' | ', f'erroneous ads for {file}: ' + str(error_rows))\n", + " total_valid += len(file_ads); total_errors += error_rows\n", + "print('\\n')\n", + "print('total valid ads: ' + str(total_valid), ' | ', 'total erroneous ads: '+ str(total_errors))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- More rows are filtered out now that restrictions are harder. Now, if the municipality code is NULL, row is discarded" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Inserting data into a postgreSQL database and setting up relevant query\n", + " - For efficient analysis and storage of the dataset we import the data into PostgreSQL." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "# DB created in pgAdmin4 GUI, login and connect cursor #\n", + "conn = pg2.connect(database='job_ads', user='postgres', password='World930920')\n", + "cur = conn.cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# Table creation # \n", + "# If data cleaning method in previous step missed any invalid ads, NOT NULL ascertains that no invalid ads are included #\n", + "cur.execute(\"\"\"\n", + "CREATE TABLE historic_ads_muni(\n", + " ad_id SERIAL PRIMARY KEY,\n", + " headline VARCHAR,\n", + " number_of_vacancies INTEGER NOT NULL,\n", + " publication_date DATE NOT NULL,\n", + " application_deadline DATE NOT NULL,\n", + " municipality_code INTEGER NOT NULL\n", + " occupation_id INTEGER NOT NULL)\n", + " ;\"\"\")\n", + "conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "# Load in keys for join #\n", + "cur.execute(\"\"\"\n", + "CREATE TABLE municipality_codes(\n", + " groups VARCHAR NOT NULL,\n", + " municipality_code INTEGER NOT NULL,\n", + " municipality_name VARCHAR NOT NULL,\n", + " main_group_code VARCHAR NOT NULL,\n", + " main_group_name VARCHAR NOT NULL,\n", + " municipality_group_code INTEGER NOT NULL,\n", + " municipality_group_name VARCHAR NOT NULL,\n", + " county_code INTEGER NOT NULL,\n", + " county_name VARCHAR NOT NULL)\n", + " ;\"\"\")\n", + "conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Importing csv files into PostgreSQL #\n", + "for file in range(2006, 2021):\n", + " with open(f'/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_muni_int/{file}_municode.csv', 'r') as csv_file:\n", + " cur.copy_from(csv_file, 'historic_ads_muni', sep=',', columns=('headline', 'number_of_vacancies', 'publication_date', 'application_deadline', 'municipality_code'))\n", + " conn.commit()\n", + "# Importin keys for \n", + "with open('/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_muni_int/municipality_codes_int', 'r') as csv_file:\n", + " cur.copy_from(csv_file, 'municipality_codes', sep=',', columns=('groups', 'municipality_code', 'municipality_name', 'main_group_code', 'main_group_name', 'municipality_group_code', 'municipality_group_name', 'county_code', 'county_name'))\n", + " conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# Full inner join #\n", + "cur.execute(\"\"\"\n", + " CREATE TABLE historic_ads_extension_1 AS \n", + " (SELECT ad_id, headline, number_of_vacancies, publication_date, application_deadline, historic_ads_muni.municipality_code, municipality_codes.municipality_name, municipality_codes.municipality_group_code, municipality_codes.municipality_group_name, municipality_codes.county_code, municipality_codes.county_name\n", + " FROM historic_ads_muni\n", + " INNER JOIN municipality_codes\n", + " ON historic_ads_muni.municipality_code = municipality_codes.municipality_code);\n", + " ;\"\"\")\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- In the cell below we check, for each day between 2006-2020, how many ads were active for a given day and sum the vacancies of these ads. This is done by looping through each day in the time interval and checking how many ads have the given day (e.g. 2006-05-22) in between it's publication and deadline date (e.g. pub 2006-04-27 -- dead 2006-05-23). For all active ads, vacancies are summed and appended to a list as results." + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "# Analysis of labor demand by city size #\n", + "vacancies_per_day = []\n", + "vacancies_per_day.append([\"Date\", \"Rural Municipality\", \"Rural municipality with hospitality industry\", \"Low commuting municipality near major city\"\n", + ", \"Small town/urban area\", \"Commuting municipality near smaller urban area\", \"Commuting municipality near big city\", \"Commuting municipality near major city\"\n", + ",\"Metropolitan areas\", \"Big cities\"])\n", + "\n", + "for year in range(2006, 2021):\n", + " for month in range(1,13):\n", + " # We have to skip first month since some groups have values of zero #\n", + " if year == 2006 and month == 1:\n", + " continue\n", + " for day in range(1, calendar.monthrange(year, month)[1] + 1):\n", + " cur.execute(f\"\"\"\n", + " SELECT SUM(number_of_vacancies), municipality_group_name FROM historic_ads_extension_1 \n", + " WHERE '{year}-{month}-{day}' >= publication_date AND '{year}-{month}-{day}' <= application_deadline\n", + " GROUP BY municipality_group_name;\"\"\")\n", + " muni_ads = cur.fetchall()\n", + " vacancies_per_day.append([f'{year}-{month}-{day}', muni_ads[0][0], muni_ads[1][0], muni_ads[2][0], muni_ads[3][0],\n", + " muni_ads[4][0], muni_ads[5][0],muni_ads[6][0],muni_ads[7][0],muni_ads[8][0]])\n", + "\n", + "# We save the results to a csv file #\n", + "with open(f'/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_muni_int/results.csv', mode='w', newline=\"\") as file_writer:\n", + " write = csv.writer(file_writer)\n", + " for row in vacancies_per_day:\n", + " write.writerow(row) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Visualization and discussion of results\n", + "#### The comparison data from Statistics Sweden and can be found [here](https://www.statistikdatabasen.scb.se/pxweb/en/ssd/START__AM__AM0701__AM0701A/KVLedigajobbSektor/).\n", + "- The comparison data is based on survey results that are published quarterly by Sweden's statistical agency, Statistics Sweden (SCB).\n", + "- The selected comparison data is between Q1 2006 - Q3 2020 which coincides with our job ads data." + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing results and comparison csv files and adding appropriate datetime indexing #\n", + "results = pd.read_csv('/Users/Kevin/Desktop/project_dta/csv_pd2006_2020/results.csv', names=['date', 'ad_vacancies'])\n", + "results = results.set_index(pd.to_datetime(results['date'])).drop('date', axis=1)\n", + "comparison = pd.read_csv('/Users/Kevin/Desktop/project_dta/csv_pd2006_2020/lediga_jobb.csv').drop('quarter', axis=1)\n", + "comparison = comparison.set_index(pd.date_range('2006-01-01','2020-12-01', freq='Q'))" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing csv files and adding appropriate datetime indexing #\n", + "muni_results = pd.read_csv('/Users/Kevin/Desktop/project_dta/csv_pd2006_2020_muni_int/results.csv')\n", + "muni_results = muni_results.set_index(pd.to_datetime(muni_results['Date'])).drop('Date', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "# Creating range and labels for plot x-axis #\n", + "date_index = pd.date_range('2006','2022', freq='Y')\n", + "date_index = date_index.values.astype('datetime64[Y]')\n", + "date_range = np.arange(2006, 2022)" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "plt.style.use('seaborn')\n", + "%config InlineBackend.figure_format = 'svg'\n", + "fig = plt.figure(figsize=(14,7))\n", + "\n", + "for cols in range(len(muni_results.columns)):\n", + " plt.plot(muni_results.index, muni_results.iloc[:,cols], label=muni_results.columns[cols])\n", + " \n", + "plt.plot(results.index, results, label='Daily labor demand index')\n", + "\n", + "plt.xticks(ticks=date_index, labels=date_range)\n", + "plt.xlim(datetime(2005,10,1), datetime(2021,3,1))\n", + "plt.legend()\n", + "\n", + "plt.savefig('/Users/Kevin/Desktop/project_dta/figure_results/results.svg', bbox_inches='tight', facecolor=fig.get_facecolor(), edgecolor='none')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Discussion\n", + "- In the graph above we observe the daily labor demand index plotted together with Statistics Sweden's quarterly survey. Visually, the two time series show a clear co-movement. Thus, it is expect that much of the variation in the official survey could be explained by the daily index.\n", + "- It seems that changes in labor demand in the daily index based on job ads happen about one quarter before the change is seen in the quarterly survey index. \n", + "- Given that the daily labor demand index is behaving as a higher-frequency version of Statistics Sweden's quarterly survey, it could potentially be used as a cheap and timely complement." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4: Time series analysis (In progress).\n", + "- Planned substeps include:\n", + " - Time series decomposition using Loess (LOcally Estimated Scatterplot Smooting)\n", + " - Regressions analysis\n", + " - Postestimation tests: Engel-Granger test (i.e. unit-root and co-integration test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time series decomposition" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [], + "source": [ + "# Decomposition of the daily index # Results == res #\n", + "res_stl = STL(results, period =365).fit()\n", + "res_seasonal, res_trend, res_resid = res_stl.seasonal, res_stl.trend, res_stl.resid\n", + "\n", + "# Decomposition of the comparison index # Comparison == comp # \n", + "comp_stl = STL(comparison, period =4).fit()\n", + "comp_seasonal, comp_trend, comp_resid = comp_stl.seasonal, comp_stl.trend, comp_stl.resid\n", + "\n", + "# All components gatherd in a dictionary for ease of use #\n", + "all_components = {('Original indices', 1):(results, comparison), ('Trend', 2):(res_trend, comp_trend), ('Seasonal', 3):(res_seasonal, comp_seasonal), ('Residual', 4):(res_resid, comp_resid)}" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "plt.style.use('seaborn')\n", + "%config InlineBackend.figure_format = 'svg'\n", + "plt.figure(figsize=(12,12))\n", + "for plots, component in all_components.items():\n", + " plt.subplot(4,1, plots[-1])\n", + " plt.plot(component[0], label='Daily labor demand index', color='steelblue')\n", + " plt.plot(component[1], label='Quarterly vacancy survey index', color='indianred')\n", + " plt.title(plots[0], fontsize=16)\n", + " plt.xticks(ticks=date_index, labels=date_range)\n", + " plt.xlim(datetime(2005,10,1), datetime(2021,3,1))\n", + " if plots[-1] == 1:\n", + " plt.legend()\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression - Indices" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ad_vacanciessurvey_vacancies
date
2006-03-3136269.88888949486
2006-06-3036032.84615447554
2006-09-3030377.69565244060
2020-03-31164516.571429132367
2020-06-30113403.63736386775
2020-09-3062550.40217467046
\n", + "
" + ], + "text/plain": [ + " ad_vacancies survey_vacancies\n", + "date \n", + "2006-03-31 36269.888889 49486\n", + "2006-06-30 36032.846154 47554\n", + "2006-09-30 30377.695652 44060\n", + "2020-03-31 164516.571429 132367\n", + "2020-06-30 113403.637363 86775\n", + "2020-09-30 62550.402174 67046" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# We use the mean job ads vacancies for each quarter #\n", + "results_resampled = results.resample('Q').mean().iloc[:-1,:]\n", + "res_comp = results_resampled.join(comparison).rename(columns={'Total':'survey_vacancies'})\n", + "# First and last three observations #\n", + "display(res_comp.iloc[[0, 1, 2, -3, -2, -1],:])" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "=======================================================================================\n", + "Dep. Variable: survey_vacancies R-squared (uncentered): 0.980\n", + "Model: OLS Adj. R-squared (uncentered): 0.979\n", + "Method: Least Squares F-statistic: 2774.\n", + "Date: Wed, 27 Jan 2021 Prob (F-statistic): 1.12e-50\n", + "Time: 10:14:30 Log-Likelihood: -635.50\n", + "No. Observations: 59 AIC: 1273.\n", + "Df Residuals: 58 BIC: 1275.\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "================================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "--------------------------------------------------------------------------------\n", + "ad_vacancies 0.8994 0.017 52.673 0.000 0.865 0.934\n", + "==============================================================================\n", + "Omnibus: 4.837 Durbin-Watson: 0.692\n", + "Prob(Omnibus): 0.089 Jarque-Bera (JB): 4.640\n", + "Skew: -0.683 Prob(JB): 0.0983\n", + "Kurtosis: 2.853 Cond. No. 1.00\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n", + "[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "ols = sm.OLS(res_comp.iloc[:,1], res_comp.iloc[:,0]).fit()\n", + "print(ols.summary())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regression - Trend and Seasonality Components" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [], + "source": [ + "# In progress #" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Concluding remarks\n", + "#### Thank you for examining my project! Pleas contact me if you have any feedback on possible improvement or other comments on the project.\n", + "\n", + "Contact: [kevin.dee.boman@gmail.com](kevin.dee.boman@gmail.com)\n", + "\n", + "#### Future project plans:\n", + "\n", + " - In a future project, a current labor demand index will be constructed using JobTechDevs API service in combination with ads scraped from supplementary sources that systematically are not posted on platsbanken (The public employment service job ads platform).\n", + " - The index may also be disaggregated by county/city and occupational group using [SSYK classification](https://www.scb.se/dokumentation/klassifikationer-och-standarder/standard-for-svensk-yrkesklassificering-ssyk/) (Swedens official classification system for occupational groups, only available in Swedish). This would extend the use of the index as regional labor demand and labor demand among different occupational groups could be estimated." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}