diff --git a/DESCRIPTION b/DESCRIPTION index 71f0bc0..234e8cb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,8 @@ Suggests: testthat, purrr, tidyr, - magrittr + magrittr, + ggplot2 VignetteBuilder: knitr Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/NEWS.md b/NEWS.md index 85e4ff6..654f1fc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,9 @@ * Now using the `snakecase` package to implement name cleaning, providing a broader range of naming styles. + +* `nomis_get_metadata()` now makes existence of time concept explicit in the + tibble returned by `nomis_get_metadata({id})`. # nomisr 0.4.1 diff --git a/R/metadata.R b/R/metadata.R index d8b8ed3..60939cb 100644 --- a/R/metadata.R +++ b/R/metadata.R @@ -45,7 +45,7 @@ #' \donttest{ #' a <- nomis_get_metadata("NM_1_1") #' -#' tibble::glimpse(a) +#' print(a) #' #' b <- nomis_get_metadata("NM_1_1", "geography") #' @@ -89,9 +89,16 @@ nomis_get_metadata <- function(id, concept = NULL, type = NULL, search = NULL, if (is.null(concept)) { no_code_q <- nomis_data_info(id) - df <- tibble::as_tibble( + df1 <- tibble::as_tibble( as.data.frame(no_code_q$components.dimension) ) + + names(no_code_q) <- gsub("components.timedimension.", "", + names(no_code_q), fixed = TRUE) + + no_code_q <- no_code_q[c("codelist", "conceptref")] + + df <- bind_rows(df1, no_code_q) df$isfrequencydimension[is.na(df$isfrequencydimension)] <- "false" } else { diff --git a/README.Rmd b/README.Rmd index 5434c9b..0c6d42c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -51,7 +51,7 @@ devtools::install_github("ropensci/nomisr") `nomisr` contains functions to search for datasets, identify the query options for different datasets and retrieve data from queries, all done with [`tibbles`](https://tibble.tidyverse.org/), to take advantage of how `tibble` manages list-columns. The use of metadata queries, rather than simply downloading all available data, is useful to avoid overwhelming the rate limits of the API. For full details on all available functions and demonstrations of their use, please see the package [vignette](https://docs.evanodell.com/nomisr/articles/introduction.html). -The example below gets the latest data on Jobseeker's Allowance with rates and proportions, on a national level, with all male claimants and workforce. +The example below demostrates a workflow to retrieve the latest data on Jobseeker's Allowance with rates and proportions, on a national level, with all male claimants and workforce. ```{r example} library(nomisr) @@ -86,11 +86,11 @@ Bug reports, suggestions, and code contributions are all welcome. Please see [CO Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. -Please note that this project is not affiliated with the Office for National Statistics or the University of Durham. +Please note that this project is not affiliated with the Office for National Statistics or the University of Durham (who run Nomis on behalf o the Office for National Statistics). -Get citation information for `nomisr` in R with `citation(package = 'nomisr')` +Please use the reference below when citing `nomisr`, which is the same as `citation(package = 'nomisr')`. -Odell, (2018). nomisr: Access 'Nomis' UK Labour Market Data. Journal of Open Source Software, 3(27), 859, https://doi.org/10.21105/joss.00859. +Odell, (2018). nomisr: Access 'Nomis' UK Labour Market Data. _Journal of Open Source Software_, 3(27), 859, https://doi.org/10.21105/joss.00859. A BibTeX entry for LaTeX users is ``` diff --git a/README.md b/README.md index 4ca92ca..b773173 100644 --- a/README.md +++ b/README.md @@ -65,9 +65,9 @@ available functions and demonstrations of their use, please see the package [vignette](https://docs.evanodell.com/nomisr/articles/introduction.html). -The example below gets the latest data on Jobseeker’s Allowance with -rates and proportions, on a national level, with all male claimants and -workforce. +The example below demostrates a workflow to retrieve the latest data on +Jobseeker’s Allowance with rates and proportions, on a national level, +with all male claimants and workforce. ``` r library(nomisr) @@ -76,20 +76,20 @@ workforce. tibble::glimpse(jobseekers_search) #> Observations: 17 #> Variables: 14 -#> $ agencyid "NOMIS", "NOMIS", "NOMIS", "NOMI… -#> $ id "NM_1_1", "NM_4_1", "NM_8_1", "N… -#> $ uri "Nm-1d1", "Nm-4d1", "Nm-8d1", "N… -#> $ version 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… -#> $ annotations.annotation [, $ components.attribute [, $ components.dimension [, $ components.primarymeasure.conceptref "OBS_VALUE", "OBS_VALUE", "OBS_V… -#> $ components.timedimension.codelist "CL_1_1_TIME", "CL_4_1_TIME", "C… -#> $ components.timedimension.conceptref "TIME", "TIME", "TIME", "TIME", … -#> $ description.value "Records the number of people cl… -#> $ description.lang "en", "en", NA, "en", "en", "en"… -#> $ name.value "Jobseeker's Allowance with rate… -#> $ name.lang "en", "en", "en", "en", "en", "e… +#> $ agencyid "NOMIS", "NOMIS", "NOMIS", "NO... +#> $ id "NM_1_1", "NM_4_1", "NM_8_1", ... +#> $ uri "Nm-1d1", "Nm-4d1", "Nm-8d1", ... +#> $ version 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... +#> $ annotations.annotation [, $ components.attribute [, $ components.dimension [, $ components.primarymeasure.conceptref "OBS_VALUE", "OBS_VALUE", "OBS... +#> $ components.timedimension.codelist "CL_1_1_TIME", "CL_4_1_TIME", ... +#> $ components.timedimension.conceptref "TIME", "TIME", "TIME", "TIME"... +#> $ description.value "Records the number of people ... +#> $ description.lang "en", "en", NA, "en", "en", "e... +#> $ name.value "Jobseeker's Allowance with ra... +#> $ name.lang "en", "en", "en", "en", "en", ... jobseekers_measures <- nomis_get_metadata("NM_1_1", "measures") @@ -106,11 +106,11 @@ workforce. #> # A tibble: 6 x 3 #> id label.en description.en #> -#> 1 TYPE490 government office regions tec / … government office regions tec / lec… -#> 2 TYPE491 government office regions (forme… government office regions (former i… +#> 1 TYPE490 government office regions tec / ~ government office regions tec / lec~ +#> 2 TYPE491 government office regions (forme~ government office regions (former i~ #> 3 TYPE492 standard statistical regions standard statistical regions -#> 4 TYPE496 pre-1996 local authority distric… pre-1996 local authority districts -#> 5 TYPE498 pre-1996 counties / scottish reg… pre-1996 counties / scottish regions +#> 4 TYPE496 pre-1996 local authority distric~ pre-1996 local authority districts +#> 5 TYPE498 pre-1996 counties / scottish reg~ pre-1996 counties / scottish regions #> 6 TYPE499 countries countries jobseekers_sex <- nomis_get_metadata("NM_1_1", "sex", "TYPE") @@ -151,40 +151,40 @@ workforce. tibble::glimpse(z) #> Observations: 70 #> Variables: 34 -#> $ DATE "2019-12", "2019-12", "2019-12", "2019-12", "2019… -#> $ DATE_NAME "December 2019", "December 2019", "December 2019"… -#> $ DATE_CODE "2019-12", "2019-12", "2019-12", "2019-12", "2019… -#> $ DATE_TYPE "date", "date", "date", "date", "date", "date", "… -#> $ DATE_TYPECODE 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… -#> $ DATE_SORTORDER 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… -#> $ GEOGRAPHY 2092957697, 2092957697, 2092957697, 2092957697, 2… -#> $ GEOGRAPHY_NAME "United Kingdom", "United Kingdom", "United Kingd… -#> $ GEOGRAPHY_CODE "K02000001", "K02000001", "K02000001", "K02000001… -#> $ GEOGRAPHY_TYPE "countries", "countries", "countries", "countries… -#> $ GEOGRAPHY_TYPECODE 499, 499, 499, 499, 499, 499, 499, 499, 499, 499,… -#> $ GEOGRAPHY_SORTORDER 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1… -#> $ SEX 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5… -#> $ SEX_NAME "Male", "Male", "Male", "Male", "Male", "Male", "… -#> $ SEX_CODE 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5… -#> $ SEX_TYPE "sex", "sex", "sex", "sex", "sex", "sex", "sex", … -#> $ SEX_TYPECODE 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… -#> $ SEX_SORTORDER 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… -#> $ ITEM 1, 1, 2, 2, 3, 3, 4, 4, 9, 9, 1, 1, 2, 2, 3, 3, 4… -#> $ ITEM_NAME "Total claimants", "Total claimants", "Students o… -#> $ ITEM_CODE 1, 1, 2, 2, 3, 3, 4, 4, 9, 9, 1, 1, 2, 2, 3, 3, 4… -#> $ ITEM_TYPE "item", "item", "item", "item", "item", "item", "… -#> $ ITEM_TYPECODE 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… -#> $ ITEM_SORTORDER 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3… -#> $ MEASURES 20100, 20201, 20100, 20201, 20100, 20201, 20100, … -#> $ MEASURES_NAME "Persons claiming JSA", "Workplace-based estimate… -#> $ OBS_VALUE 106180.0, 0.6, NA, NA, NA, NA, NA, NA, NA, NA, 98… -#> $ OBS_STATUS "A", "A", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "Q",… -#> $ OBS_STATUS_NAME "Normal Value", "Normal Value", "These figures ar… -#> $ OBS_CONF FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, … -#> $ OBS_CONF_NAME "Free (free for publication)", "Free (free for pu… -#> $ URN "Nm-1d1d32316e0d2092957697d5d1d20100", "Nm-1d1d32… -#> $ RECORD_OFFSET 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,… -#> $ RECORD_COUNT 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 7… +#> $ DATE "2020-01", "2020-01", "2020-01", "2020-01", "20... +#> $ DATE_NAME "January 2020", "January 2020", "January 2020",... +#> $ DATE_CODE "2020-01", "2020-01", "2020-01", "2020-01", "20... +#> $ DATE_TYPE "date", "date", "date", "date", "date", "date",... +#> $ DATE_TYPECODE 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... +#> $ DATE_SORTORDER 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... +#> $ GEOGRAPHY 2092957697, 2092957697, 2092957697, 2092957697,... +#> $ GEOGRAPHY_NAME "United Kingdom", "United Kingdom", "United Kin... +#> $ GEOGRAPHY_CODE "K02000001", "K02000001", "K02000001", "K020000... +#> $ GEOGRAPHY_TYPE "countries", "countries", "countries", "countri... +#> $ GEOGRAPHY_TYPECODE 499, 499, 499, 499, 499, 499, 499, 499, 499, 49... +#> $ GEOGRAPHY_SORTORDER 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,... +#> $ SEX 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,... +#> $ SEX_NAME "Male", "Male", "Male", "Male", "Male", "Male",... +#> $ SEX_CODE 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,... +#> $ SEX_TYPE "sex", "sex", "sex", "sex", "sex", "sex", "sex"... +#> $ SEX_TYPECODE 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... +#> $ SEX_SORTORDER 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... +#> $ ITEM 1, 1, 2, 2, 3, 3, 4, 4, 9, 9, 1, 1, 2, 2, 3, 3,... +#> $ ITEM_NAME "Total claimants", "Total claimants", "Students... +#> $ ITEM_CODE 1, 1, 2, 2, 3, 3, 4, 4, 9, 9, 1, 1, 2, 2, 3, 3,... +#> $ ITEM_TYPE "item", "item", "item", "item", "item", "item",... +#> $ ITEM_TYPECODE 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... +#> $ ITEM_SORTORDER 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 0, 0, 1, 1, 2, 2,... +#> $ MEASURES 20100, 20201, 20100, 20201, 20100, 20201, 20100... +#> $ MEASURES_NAME "Persons claiming JSA", "Workplace-based estima... +#> $ OBS_VALUE 105592.0, 0.6, NA, NA, NA, NA, NA, NA, NA, NA, ... +#> $ OBS_STATUS "A", "A", "Q", "Q", "Q", "Q", "Q", "Q", "Q", "Q... +#> $ OBS_STATUS_NAME "Normal Value", "Normal Value", "These figures ... +#> $ OBS_CONF FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE... +#> $ OBS_CONF_NAME "Free (free for publication)", "Free (free for ... +#> $ URN "Nm-1d1d32321e0d2092957697d5d1d20100", "Nm-1d1d... +#> $ RECORD_OFFSET 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1... +#> $ RECORD_COUNT 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,... ``` There is a lot of data available through Nomis, and there are some @@ -204,13 +204,15 @@ Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. Please note that this project is not affiliated with the Office for -National Statistics or the University of Durham. +National Statistics or the University of Durham (who run Nomis on behalf +o the Office for National Statistics). -Get citation information for `nomisr` in R with `citation(package = -'nomisr')` +Please use the reference below when citing `nomisr`, which is the same +as `citation(package = 'nomisr')`. -Odell, (2018). nomisr: Access ‘Nomis’ UK Labour Market Data. Journal of -Open Source Software, 3(27), 859, . +Odell, (2018). nomisr: Access ‘Nomis’ UK Labour Market Data. *Journal of +Open Source Software*, 3(27), 859, +. A BibTeX entry for LaTeX users is diff --git a/man/nomis_get_metadata.Rd b/man/nomis_get_metadata.Rd index d0e7d1e..710068a 100644 --- a/man/nomis_get_metadata.Rd +++ b/man/nomis_get_metadata.Rd @@ -56,7 +56,7 @@ metadata for a given dataset, see \code{\link[=nomis_overview]{nomis_overview()} \donttest{ a <- nomis_get_metadata("NM_1_1") -tibble::glimpse(a) +print(a) b <- nomis_get_metadata("NM_1_1", "geography") diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 871c4a2..97d7e41 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -29,7 +29,7 @@ The `nomis_data_info()` function is focused on the structure and coverage of the Use the `nomis_data_info()` function without any parameters to get a tibble with metadata for all available datasets: -```{r, echo=TRUE} +```{r all-datasets, echo=TRUE} library(nomisr) x <- nomis_data_info() head(x) @@ -38,7 +38,7 @@ head(x) `nomis_data_info()` can also be used to query metadata from a specific dataset, using its ID. The example below uses the "LC4408EW - Tenure by number of persons per bedroom in household by household type" dataset from the 2011 census, which has the ID "NM_893_1". -```{r, echo=TRUE} +```{r specific-dataset, echo=TRUE} y <- nomis_data_info("NM_893_1") tibble::glimpse(y) @@ -48,7 +48,7 @@ When a tibble with metadata for all datasets or a specific dataset is returned, The example below shows how to access data stored in list columns returned from the Nomis API. In the case of requests for metadata from a single dataset, the three columns are all lists with a length of 1. If requesting all dataset information with `nomis_data_info()`, each row is a list of length 1. Each list contains a data.frame, of varrying dimensions depending on the column and dataset. You can unnest individual list-columns to display their data in the same row as data from the rest of the tibble. Due to the differing lengths of the list-columns returned by `nomis_data_info()`, only one list-column can be unnested at a time. -```{r, echo=TRUE} +```{r specific-dataset-exam, echo=TRUE} library(dplyr, warn.conflicts = F) y$annotations.annotation %>% class() @@ -71,7 +71,7 @@ y %>% tidyr::unnest(annotations.annotation) %>% glimpse() `nomisr` also contains the `nomis_search()` function to search for datasets on particular topics. `nomis_search()` can be used to search in one or more of dataset names, descriptions, keywords, content type and units. If using multiple parameters, `nomis_search()` will return information on all datasets that match one or more parameters. Character vectors of strings can be used in searches, and likewise `nomis_search()` will return information on datasets that match one or more queries. The * is used as a wildcard symbol. `nomis_search()` returns metadata in the same format as `nomis_data_info()`, including using list-columns. The `nomis_content_type()` function can assist in identifying content type IDs for `nomis_search()`. -```{r, echo=TRUE} +```{r data-searching, echo=TRUE} a <- nomis_search(name = '*jobseekers*', keywords = 'Claimants') tibble::glimpse(a) @@ -92,7 +92,7 @@ b %>% tidyr::unnest(components.attribute) %>% glimpse() `nomis_overview()` returns a tibble with a generalised overview of a given dataset. -```{r, echo=TRUE} +```{r overview, echo=TRUE} q <- nomis_overview("NM_1650_1") q %>% tidyr::unnest(name) %>% glimpse() @@ -101,10 +101,10 @@ q %>% tidyr::unnest(name) %>% glimpse() `nomis_overview()` has a `select` parameter that can be used to select only particular elements of the overview to return. -```{r, echo=TRUE} - s <- nomis_overview("NM_1650_1", select = c("units", "keywords")) +```{r overview-select, echo=TRUE} +s <- nomis_overview("NM_1650_1", select = c("units", "keywords")) - s %>% tidyr::unnest(name) %>% glimpse() +s %>% tidyr::unnest(name) %>% glimpse() ``` @@ -118,39 +118,38 @@ The example below queries some of the metadata available through the API for the If provided with just a dataset ID, `nomis_get_metadata()` will return the concepts available for the given dataset. -```{r, echo=TRUE} +```{r get-metadata, echo=TRUE} +a <- nomis_get_metadata(id = "NM_893_1") - a <- nomis_get_metadata(id = "NM_893_1") - - print(a) +print(a) ``` ### Concept Values If provided with a concept name it returns the available values for that concept. However, in some cases, espescially with the geography concept, there are multiple options available, which Nomis labels types. In that case `nomis_get_metadata()` returns the values of the lowest indexed type available. -```{r, echo=TRUE} - b <- nomis_get_metadata(id = "NM_893_1", concept = "GEOGRAPHY") +```{r concepts, echo=TRUE} +b <- nomis_get_metadata(id = "NM_893_1", concept = "GEOGRAPHY") - print(b) +print(b) ``` We can now pass a generic "type" string to the `type` parameter in `nomis_get_metadata()`, which returns all available geography types for dataset "NM_893_1". -```{r, echo=TRUE} - c <- nomis_get_metadata(id = "NM_893_1", concept = "geography", type = "type") +```{r concept-types, echo=TRUE} +c <- nomis_get_metadata(id = "NM_893_1", concept = "geography", type = "type") - print(c) +print(c) ``` Passing a specific type to the `type` parameter, in this case "TYPE460" for all post-2010 parliamentary constituencies, returns a tibble with geographic codes for those specific constituencies, which can be used to filter queries. -```{r, echo=TRUE} - d <- nomis_get_metadata(id = "NM_893_1", concept = "geography", type = "TYPE460") +```{r concept-type460, echo=TRUE} +d <- nomis_get_metadata(id = "NM_893_1", concept = "geography", type = "TYPE460") - print(d) +print(d) ``` @@ -160,7 +159,7 @@ The vast majority (98% as of February 2018) of Nomis datasets include a geograph Using the information above, we can now query the latest data on bedroom occupancy per household type in different NHS clinical commissioning groups. -```{r, echo=TRUE} +```{r download-data, echo=TRUE} z <- nomis_get_data(id = "NM_893_1", time = "latest", geography = "TYPE266") print(z) @@ -169,14 +168,43 @@ print(z) We can also query bedroom occupancy per household type in the Manchester, Gorton and Manchester, Withington parliamentary constituencies. -```{r, echo=TRUE} - x <- nomis_get_data(id = "NM_893_1", time = "latest", geography = c("1929380119", "1929380120")) +```{r download-specific-area, echo=TRUE} +x <- nomis_get_data(id = "NM_893_1", time = "latest", geography = c("1929380119", "1929380120")) print(x) ``` +`nomisr` also allows for time series queries. The example below shows how to retrieve the percentage of the workforce claiming Jobseekers Allowance from January 2015 to January 2020, inclusive, for each region of the UK, divided by male and female claimants, with an accompanying graph. + + +```{r jsa-claimaints} +library(ggplot2) + +jsa <- nomis_get_data(id = "NM_1_1", time = "2015-01-2020-01", + geography = "TYPE480", measures=20201, + sex=c(5,6), item = 1, tidy = TRUE) + +jsa <- jsa %>% + mutate(date = as.Date(paste0(date, "-01")), + obs_value = obs_value/100) +theme_set(theme_bw()) + +p_jsa <- ggplot(jsa, aes(x = date, y = obs_value, colour = sex_name)) + + geom_line(size = 1.15) + + scale_colour_viridis_d(end = 0.75, begin = 0.1, name = "Gender") + + scale_x_date(breaks = "6 months", date_labels = "%b %Y") + + scale_y_continuous(labels = scales::percent) + + theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 8), + legend.position = "bottom") + + labs(x = "Date", y= "JSA Claimants (Percentage of Workforce)") + + facet_wrap(~geography_name, scales = "free_y") + + +p_jsa + +```