- Crossref API: https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md
- Crossref metadata search API (http://search.labs.crossref.org/)
- CrossRef DOI Content Negotiation
- Text and Data Mining TDM
Stable version from CRAN
install.packages("rcrossref")
Or development version from GitHub
install.packages("devtools")
devtools::install_github("ropensci/rcrossref")
Load rcrossref
library('rcrossref')
Use CrossRef's DOI Content Negotiation service, where you can citations back in various formats, including apa
cr_cn(dois = "10.1126/science.169.3946.635", format = "text", style = "apa")
#> [1] "Frank, H. S. (1970). The Structure of Ordinary Water: New data and interpretations are yielding new insights into this fascinating substance. Science, 169(3946), 635–641. doi:10.1126/science.169.3946.635"
bibtex
cat(cr_cn(dois = "10.1126/science.169.3946.635", format = "bibtex"))
#> @article{Frank_1970,
#> doi = {10.1126/science.169.3946.635},
#> url = {https://doi.org/10.1126%2Fscience.169.3946.635},
#> year = 1970,
#> month = {aug},
#> publisher = {American Association for the Advancement of Science ({AAAS})},
#> volume = {169},
#> number = {3946},
#> pages = {635--641},
#> author = {H. S. Frank},
#> title = {The Structure of Ordinary Water: New data and interpretations are yielding new insights into this fascinating substance},
#> journal = {Science}
#> }
bibentry
cr_cn(dois = "10.6084/m9.figshare.97218", format = "bibentry")
Citation count, using OpenURL
cr_citation_count(doi = "10.1371/journal.pone.0042793")
#> [1] 21
The following functions all use the CrossRef API.
cr_funders(query = "NSF")
#> $meta
#> total_results search_terms start_index items_per_page
#> 1 10 NSF 0 20
#>
#> $data
#> # A tibble: 10 x 6
#> id location name alt.names uri tokens
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 50110… Norway Norsk Sykepl… NSF, Norwegian … http://d… norsk, sykep…
#> 2 10000… United S… Center for H… CHM, NSF, Unive… http://d… center, for,…
#> 3 10000… United S… National Sle… NSF http://d… national, sl…
#> 4 50110… Sri Lanka National Sci… National Scienc… http://d… national, sc…
#> 5 10000… Denmark Statens Natu… Danish National… http://d… statens, nat…
#> 6 10000… United S… Office of th… NSF Office of t… http://d… office, of, …
#> 7 50110… Australia National Str… NSF http://d… national, st…
#> 8 10000… United S… National Sci… NSF http://d… national, sc…
#> 9 50110… China National Nat… NSFC-Yunnan Joi… http://d… national, na…
#> 10 50110… China National Nat… Natural Science… http://d… national, na…
#>
#> $facets
#> NULL
cr_agency(dois = '10.13039/100000001')
#> $DOI
#> [1] "10.13039/100000001"
#>
#> $agency
#> $agency$id
#> [1] "crossref"
#>
#> $agency$label
#> [1] "Crossref"
cr_works(filter = c(has_orcid = TRUE, from_pub_date = '2004-04-04'), limit = 1)
#> $meta
#> total_results search_terms start_index items_per_page
#> 1 1614119 NA 0 1
#>
#> $data
#> # A tibble: 1 x 26
#> alternative.id container.title created deposited doi indexed issn
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 S200529011400… Journal of Acu… 2014-0… 2017-06-… 10.1… 2018-0… 2005…
#> # ... with 19 more variables: issue <chr>, issued <chr>, member <chr>,
#> # page <chr>, prefix <chr>, publisher <chr>, reference.count <chr>,
#> # score <chr>, source <chr>, title <chr>, type <chr>,
#> # update.policy <chr>, url <chr>, volume <chr>, assertion <list>,
#> # author <list>, funder <list>, link <list>, license <list>
#>
#> $facets
#> NULL
cr_journals(issn = c('1803-2427','2326-4225'))
#> $data
#> # A tibble: 2 x 53
#> title publisher issn last_status_che… deposits_abstra… deposits_orcids…
#> <chr> <chr> <chr> <date> <lgl> <lgl>
#> 1 Jour… "De Gruy… 1805… 2018-08-06 TRUE FALSE
#> 2 Jour… American… 2326… 2018-08-06 FALSE FALSE
#> # ... with 47 more variables: deposits <lgl>,
#> # deposits_affiliations_backfile <lgl>,
#> # deposits_update_policies_backfile <lgl>,
#> # deposits_similarity_checking_backfile <lgl>,
#> # deposits_award_numbers_current <lgl>,
#> # deposits_resource_links_current <lgl>, deposits_articles <lgl>,
#> # deposits_affiliations_current <lgl>, deposits_funders_current <lgl>,
#> # deposits_references_backfile <lgl>, deposits_abstracts_backfile <lgl>,
#> # deposits_licenses_backfile <lgl>,
#> # deposits_award_numbers_backfile <lgl>,
#> # deposits_open_references_backfile <lgl>,
#> # deposits_open_references_current <lgl>,
#> # deposits_references_current <lgl>,
#> # deposits_resource_links_backfile <lgl>,
#> # deposits_orcids_backfile <lgl>, deposits_funders_backfile <lgl>,
#> # deposits_update_policies_current <lgl>,
#> # deposits_similarity_checking_current <lgl>,
#> # deposits_licenses_current <lgl>, affiliations_current <dbl>,
#> # similarity_checking_current <dbl>, funders_backfile <dbl>,
#> # licenses_backfile <dbl>, funders_current <dbl>,
#> # affiliations_backfile <dbl>, resource_links_backfile <dbl>,
#> # orcids_backfile <dbl>, update_policies_current <dbl>,
#> # open_references_backfile <dbl>, orcids_current <dbl>,
#> # similarity_checking_backfile <dbl>, references_backfile <dbl>,
#> # award_numbers_backfile <dbl>, update_policies_backfile <dbl>,
#> # licenses_current <dbl>, award_numbers_current <dbl>,
#> # abstracts_backfile <dbl>, resource_links_current <dbl>,
#> # abstracts_current <dbl>, open_references_current <dbl>,
#> # references_current <dbl>, total_dois <int>, current_dois <int>,
#> # backfile_dois <int>
#>
#> $facets
#> NULL
cr_licenses(query = 'elsevier')
#> $meta
#> total_results search_terms start_index items_per_page
#> 1 25 elsevier 0 20
#>
#> $data
#> # A tibble: 25 x 2
#> URL work.count
#> <chr> <int>
#> 1 http://aspb.org/publications/aspb-journals/open-articles 1
#> 2 http://creativecommons.org/licenses/by-nc-nd/3.0/ 13
#> 3 http://creativecommons.org/licenses/by-nc-nd/4.0/ 8
#> 4 http://creativecommons.org/licenses/by-nc/4.0/ 2
#> 5 http://creativecommons.org/licenses/by/3.0/ 1
#> 6 http://creativecommons.org/licenses/by/4.0 2
#> 7 http://creativecommons.org/licenses/by/4.0/ 2
#> 8 http://doi.wiley.com/10.1002/tdm_license_1 157
#> 9 http://doi.wiley.com/10.1002/tdm_license_1.1 2175
#> 10 http://journals.iucr.org/services/copyrightpolicy.html 10
#> # ... with 15 more rows
cr_prefixes(prefixes = c('10.1016','10.1371','10.1023','10.4176','10.1093'))
#> $meta
#> NULL
#>
#> $data
#> member name
#> 1 http://id.crossref.org/member/78 Elsevier BV
#> 2 http://id.crossref.org/member/340 Public Library of Science (PLoS)
#> 3 http://id.crossref.org/member/297 Springer Nature America, Inc
#> 4 http://id.crossref.org/member/1989 Co-Action Publishing
#> 5 http://id.crossref.org/member/286 Oxford University Press (OUP)
#> prefix
#> 1 http://id.crossref.org/prefix/10.1016
#> 2 http://id.crossref.org/prefix/10.1371
#> 3 http://id.crossref.org/prefix/10.1023
#> 4 http://id.crossref.org/prefix/10.4176
#> 5 http://id.crossref.org/prefix/10.1093
#>
#> $facets
#> list()
cr_members(query = 'ecology', limit = 5)
#> $meta
#> total_results search_terms start_index items_per_page
#> 1 18 ecology 0 5
#>
#> $data
#> # A tibble: 5 x 56
#> id primary_name location last_status_che… total.dois current.dois
#> <int> <chr> <chr> <date> <chr> <chr>
#> 1 336 Japanese So… 5-3 Yon… 2018-08-06 1167 157
#> 2 1950 Journal of … Suite 8… 2018-08-06 27 0
#> 3 2080 The Japan S… 5-3 Yon… 2018-08-06 685 35
#> 4 2151 Ecology and… 5-3 Yon… 2018-08-06 385 53
#> 5 2169 Italian Soc… Diparti… 2018-08-06 1217 367
#> # ... with 50 more variables: backfile.dois <chr>, prefixes <chr>,
#> # coverge.affiliations.current <chr>,
#> # coverge.similarity.checking.current <chr>,
#> # coverge.funders.backfile <chr>, coverge.licenses.backfile <chr>,
#> # coverge.funders.current <chr>, coverge.affiliations.backfile <chr>,
#> # coverge.resource.links.backfile <chr>, coverge.orcids.backfile <chr>,
#> # coverge.update.policies.current <chr>,
#> # coverge.open.references.backfile <chr>, coverge.orcids.current <chr>,
#> # coverge.similarity.checking.backfile <chr>,
#> # coverge.references.backfile <chr>,
#> # coverge.award.numbers.backfile <chr>,
#> # coverge.update.policies.backfile <chr>,
#> # coverge.licenses.current <chr>, coverge.award.numbers.current <chr>,
#> # coverge.abstracts.backfile <chr>,
#> # coverge.resource.links.current <chr>, coverge.abstracts.current <chr>,
#> # coverge.open.references.current <chr>,
#> # coverge.references.current <chr>,
#> # flags.deposits.abstracts.current <chr>,
#> # flags.deposits.orcids.current <chr>, flags.deposits <chr>,
#> # flags.deposits.affiliations.backfile <chr>,
#> # flags.deposits.update.policies.backfile <chr>,
#> # flags.deposits.similarity.checking.backfile <chr>,
#> # flags.deposits.award.numbers.current <chr>,
#> # flags.deposits.resource.links.current <chr>,
#> # flags.deposits.articles <chr>,
#> # flags.deposits.affiliations.current <chr>,
#> # flags.deposits.funders.current <chr>,
#> # flags.deposits.references.backfile <chr>,
#> # flags.deposits.abstracts.backfile <chr>,
#> # flags.deposits.licenses.backfile <chr>,
#> # flags.deposits.award.numbers.backfile <chr>,
#> # flags.deposits.open.references.backfile <chr>,
#> # flags.deposits.open.references.current <chr>,
#> # flags.deposits.references.current <chr>,
#> # flags.deposits.resource.links.backfile <chr>,
#> # flags.deposits.orcids.backfile <chr>,
#> # flags.deposits.funders.backfile <chr>,
#> # flags.deposits.update.policies.current <chr>,
#> # flags.deposits.similarity.checking.current <chr>,
#> # flags.deposits.licenses.current <chr>, names <chr>, tokens <chr>
#>
#> $facets
#> NULL
cr_r()
uses the function cr_works()
internally.
cr_r()
#> [1] "10.18472/cvt.18n1.2018.1488"
#> [2] "10.1016/0161-5890(76)90161-9"
#> [3] "10.3109/09546634.2010.521811"
#> [4] "10.1007/978-3-642-86520-6_3"
#> [5] "10.1093/benz/9780199773787.article.b00100239"
#> [6] "10.1007/s10100-006-0016-5"
#> [7] "10.1016/j.egypro.2011.10.348"
#> [8] "10.1097/01241398-199709000-00001"
#> [9] "10.1111/j.1445-5994.2009.02018.x"
#> [10] "10.7215/ds_ip_20101029"
You can pass in the number of DOIs you want back (default is 10)
cr_r(2)
#> [1] "10.7868/s0869565217150221" "10.2307/442387"
Publishers can optionally provide links in the metadata they provide to Crossref for full text of the work, but that data is often missing. Find out more about it at http://tdmsupport.crossref.org/.
Get some DOIs for articles that provide full text, and that have CC-BY 3.0
licenses (i.e., more likely to actually be open)
out <-
cr_works(filter = list(has_full_text = TRUE,
license_url = "http://creativecommons.org/licenses/by/3.0/"))
(dois <- out$data$doi)
#> [1] "10.5194/acpd-14-24183-2014" "10.5194/bgd-11-13343-2014"
#> [3] "10.5194/bgd-11-13455-2014" "10.1155/2014/128505"
#> [5] "10.1155/2014/124592" "10.1155/2014/154204"
#> [7] "10.1155/2014/718415" "10.1155/2014/727135"
#> [9] "10.1155/2014/264217" "10.1155/2014/484656"
#> [11] "10.1155/2014/490386" "10.1155/2014/528696"
#> [13] "10.1155/2014/934510" "10.1155/2014/913510"
#> [15] "10.1155/2014/907584" "10.1155/2014/936748"
#> [17] "10.5194/amtd-7-9453-2014" "10.1088/1742-6596/536/1/012003"
#> [19] "10.1088/1742-6596/536/1/012001" "10.1088/1742-6596/536/1/012016"
From the output of cr_works
we can get full text links if we know where to look:
do.call("rbind", out$data$link)
#> # A tibble: 58 x 4
#> URL content.type content.version intended.applica…
#> <chr> <chr> <chr> <chr>
#> 1 http://www.atmos-chem-p… unspecified vor similarity-check…
#> 2 http://www.biogeoscienc… unspecified vor similarity-check…
#> 3 http://www.biogeoscienc… unspecified vor similarity-check…
#> 4 http://downloads.hindaw… application… vor text-mining
#> 5 http://downloads.hindaw… application… vor text-mining
#> 6 http://downloads.hindaw… unspecified vor similarity-check…
#> 7 http://downloads.hindaw… application… vor text-mining
#> 8 http://downloads.hindaw… application… vor text-mining
#> 9 http://downloads.hindaw… unspecified vor similarity-check…
#> 10 http://downloads.hindaw… application… vor text-mining
#> # ... with 48 more rows
From there, you can grab your full text, but because most links require
authentication, enter another package: crminer
.
You'll need package crminer
for the rest of the work.
Onc we have DOIs, get URLs to full text content
if (!requireNamespace("crminer")) {
install.packages("crminer")
}
library(crminer)
(links <- crm_links("10.1155/2014/128505"))
#> $pdf
#> <url> http://downloads.hindawi.com/archive/2014/128505.pdf
#>
#> $xml
#> <url> http://downloads.hindawi.com/archive/2014/128505.xml
#>
#> $unspecified
#> <url> http://downloads.hindawi.com/archive/2014/128505.pdf
Then use those URLs to get full text
crm_pdf(links)
#> <document>/Users/sckott/Library/Caches/R/crminer/128505.pdf
#> Pages: 1
#> No. characters: 1565
#> Created: 2014-09-15
See also fulltext for getting scholarly text for text mining.
- Please report any issues or bugs.
- License: MIT
- Get citation information for
rcrossref
in R doingcitation(package = 'rcrossref')
- Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.
This package is part of a richer suite called fulltext, along with several other packages, that provides the ability to search for and retrieve full text of open access scholarly articles.