-
Notifications
You must be signed in to change notification settings - Fork 13
/
fetch-docs.py
executable file
·55 lines (45 loc) · 2.21 KB
/
fetch-docs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
import concurrent.futures
import requests
from pathlib import Path
augur_branch = 'master'
augur_url = f'https://raw.githubusercontent.com/nextstrain/augur/{augur_branch}/docs/'
auspice_branch = 'master'
auspice_base_url = f'https://raw.githubusercontent.com/nextstrain/auspice/{auspice_branch}/'
auspice_url = f'{auspice_base_url}docs/'
docs = {
f'{auspice_url}narratives/create-pdf.md': 'guides/communicate/create-pdf.md',
f'{auspice_url}narratives/introduction.md': 'guides/communicate/narratives-intro.md',
f'{augur_url}usage/augur_snakemake.md': 'guides/bioinformatics/augur_snakemake.md',
f'{augur_url}faq/translate_ref.md': 'guides/bioinformatics/translate_ref.md',
f'{augur_url}faq/import-beast.md': 'guides/bioinformatics/import-beast.md',
f'{augur_url}faq/colors.md': 'guides/bioinformatics/colors.md',
f'{augur_url}faq/lat_longs.md': 'guides/bioinformatics/lat_longs.md',
f'{augur_url}faq/vcf_input.md': 'guides/bioinformatics/vcf_input.md',
f'{augur_url}faq/fasta_input.md': 'guides/bioinformatics/fasta_input.md',
f'{augur_url}faq/seq_traits.md': 'guides/bioinformatics/seq_traits.md',
f'{augur_url}examples/examples.rst': 'guides/bioinformatics/examples.rst',
}
if __name__ == '__main__':
# Use a Session for connection pooling
session = requests.Session()
class RemoteDoc:
def __init__(self, source_url, dest_path):
self.source_url = source_url
self.dest_path = Path(dest_path)
def __call__(self):
response = session.get(self.source_url)
response.raise_for_status()
self.dest_path.parent.mkdir(exist_ok=True)
self.dest_path.write_bytes(response.content)
return self
# Fetch up to 5 docs at a time. Thread-based concurrency (as opposed to
# process-based) is appropriate because this is an I/O-bound operation.
with concurrent.futures.ThreadPoolExecutor(5) as pool:
futures = [
pool.submit(RemoteDoc(src, dst))
for src, dst in docs.items()
]
for future in concurrent.futures.as_completed(futures):
doc = future.result()
print(f"Fetched {doc.dest_path} from {doc.source_url}")