#!/usr/bin/env python3
"""Fetch methods."""
import os
import tarfile
import urllib.request
import zlib
from typing import Union
import requests
from tqdm import tqdm
from .tofile import write_file
[docs]class TqdmUpTo(tqdm):
"""Provides `update_to(n)` which uses `tqdm.update(delta_n)`.
From tqdm documentation.
"""
[docs] def update_to(self, b=1, bsize=1, tsize=None):
"""
Tqdm update_to method.
Args:
b (int, optional): Number of blocks transferred so far [default: 1].
bsize (int, optional): Size of each block (in tqdm units) [default: 1].
tsize (int, optional): Total size (in tqdm units).
"""
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n) # will also set self.n = b * bsize
[docs]def fetch_tmp_file(url):
"""
Fetch a remote URL to a temporary file.
Args:
url (str) : Remote URL to fetch.
Returns:
str : Temporary filename.
"""
with TqdmUpTo(
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
desc="Fetch %s" % url.split("/")[-1],
) as t:
file_tmp = urllib.request.urlretrieve(
url, filename=None, reporthook=t.update_to, data=None
)[0]
t.total = t.n
return file_tmp
[docs]def fetch_tar(url, path):
"""
Fetch and extract tarred archives.
Args:
url (str): Remote URL to fetch.
path (str): Path to extract tar file.
Returns:
int: Count of file members extracted from tar archive.
"""
file_tmp = fetch_tmp_file(url)
member_count = extract_tar(file_tmp, path)
return member_count
[docs]def fetch_stream(url, *, decode=True, show_progress=True):
"""
Stream download.
Args:
decode (bool, optional): Determines whether to unzip content. Defaults to True.
show_progress (bool, optional): Show a progress bar to indicate file streaming
progress. Defaults to True.
Yields:
str: 1024 byte chunk of remote URL.
"""
res = requests.get(url, stream=True)
if res.encoding is None:
res.encoding = "utf-8"
total_size = int(res.headers.get("content-length", 0))
block_size = 1024
if show_progress:
progress = tqdm(total=total_size, unit="iB", unit_scale=True)
dec = zlib.decompressobj(32 + zlib.MAX_WBITS)
for data in res.iter_content(block_size):
if decode:
try:
data = dec.decompress(data)
if show_progress:
progress.update(len(data))
yield data
except zlib.error:
decode = False
if show_progress:
progress.update(len(data))
yield data
if show_progress:
progress.close()
[docs]def fetch_file(url, path, decode=True):
"""Fetch a remote file.
Args:
url (str): Remote URL to fetch.
path (str): Path to extract tar file.
decode (bool, optional): Determines whether to unzip content. Defaults to True.
"""
data = "" if decode else b""
for part in fetch_stream(url, decode):
data += part.decode("utf-8") if decode else part
write_file(path, data)
[docs]def fetch_url(url):
"""Fetch a URL.
Args:
url (str): Remote URL to fetch.
Returns:
str: Content of file as a string. Will return None if response is not OK.
"""
res = requests.get(url)
if res.ok:
return res.content.decode("utf-8")
return None