From 63a40488cfb010618694be00691f40286213baff Mon Sep 17 00:00:00 2001 From: GentlemanOfFate Date: Thu, 15 Feb 2024 16:14:27 +0300 Subject: [PATCH 1/2] Added function for data downloading --- computage/data_library/data_repository.py | 55 +++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/computage/data_library/data_repository.py b/computage/data_library/data_repository.py index f9665f6..8a45c30 100644 --- a/computage/data_library/data_repository.py +++ b/computage/data_library/data_repository.py @@ -2,8 +2,57 @@ The module for getting datasets from the data repository of that project. """ -from typing import Any +from tqdm.notebook import trange +import requests +import pickle +import os + +def download_data(url: str, + out_dir: str, + file_name: str, + force = False): + + """ + Downloads data from a remote repository. Downloaded file is saved to out_dir/file_name + + Parameters: + url (str): Repository web link + out_dir (str): Output directiory containing downloaded data + file_name (str): Output file name + force (bin): If True, enables overwriting + + Returns: + None + """ + + # Streaming, so we can iterate over the response + response = requests.get(url, stream=True) + # Sizes in bytes + total_size = int(response.headers.get('content-length', 0)) + block_size = 1024 + + # Create output directory if it doesn't exist + if not os.path.exists(out_dir): os.makedirs(out_dir) + + # Output file path + out_path = f'{out_dir}/{file_name}' + + # Download file if it doesn't exist + if os.path.exists(out_path) and force == False: + print('File exists, overwriting disabled. To enable, set "force" = True') + else: + with trange(1, total=total_size, desc=f'Loading {file_name}', unit='B', unit_scale=True) as progress_bar: + with open(out_path, 'wb') as f: + for data in response.iter_content(block_size): + progress_bar.update(len(data)) + pickle.dump(data, f) + if total_size != 0 and progress_bar.n != total_size: + raise RuntimeError('Failed to download file. Please check if it exists in the remote repository.') + +from typing import Any -def import_data(name: str) -> tuple[Any, Any]: - raise NotImplementedError +def import_dataset(dataset_path: str) -> tuple[Any, Any]: + with open(dataset_path, 'rb') as f: + object = pickle.load(f) + return object \ No newline at end of file From 782efe16ff425f6413d0522402dce36ca3972f5a Mon Sep 17 00:00:00 2001 From: GentlemanOfFate Date: Thu, 15 Feb 2024 16:16:42 +0300 Subject: [PATCH 2/2] Added function for data downloading --- computage/data_library/data_repository.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/computage/data_library/data_repository.py b/computage/data_library/data_repository.py index 8a45c30..b9fb03d 100644 --- a/computage/data_library/data_repository.py +++ b/computage/data_library/data_repository.py @@ -2,6 +2,7 @@ The module for getting datasets from the data repository of that project. """ +from typing import Any from tqdm.notebook import trange import requests import pickle @@ -50,8 +51,6 @@ def download_data(url: str, if total_size != 0 and progress_bar.n != total_size: raise RuntimeError('Failed to download file. Please check if it exists in the remote repository.') -from typing import Any - def import_dataset(dataset_path: str) -> tuple[Any, Any]: with open(dataset_path, 'rb') as f: object = pickle.load(f)