# Copyright 2026 IPSL / CNRS / Sorbonne University
# Authors: Kishanthan Kingston
#
# This work is licensed under the Creative Commons
# Attribution-NonCommercial-ShareAlike 4.0 International License.
# To view a copy of this license, visit
# http://creativecommons.org/licenses/by-nc-sa/4.0/
import argparse
import os
import cdsapi
from tqdm import tqdm
import calendar
from IPSL_AID.logger import Logger
# python download_ERA5_cds.py --year_start 2015 --year_end 2015 --variable 2m_temperature
[docs]
def parse_args():
"""
Parse command-line arguments.
Returns
-------
argparse.Namespace
Parsed command line arguments as a namespace object with attributes
corresponding to each argument.
Notes
-----
ERA5 variable names must match those defined in the
Copernicus Climate Data Store catalogue.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--year_start", type=int, required=True, help="First year to process (2015)."
)
parser.add_argument(
"--year_end", type=int, required=True, help="Last year to process (inclusive)."
)
parser.add_argument(
"--variable",
type=str,
nargs="+",
required=True,
help="ERA5 variable names (example: 2m_temperature)",
)
parser.add_argument(
"--pressure_level",
type=str,
nargs="+",
required=False,
help="Pressure levels in hPa (500 750 850)",
)
return parser.parse_args()
[docs]
def main(logger):
"""
Download ERA5 data from the Copernicus Climate Data Store.
The function follows a structured workflow:
1. Parse command-line arguments.
2. Create output directories.
3. Loop over requested years, variables, and months.
4. Submit download requests to the CDS API.
5. Save results as NetCDF files.
Files are skipped if they already exist.
Notes
-----
Data are downloaded at hourly resolution for all days of each month.
The CDS API client requires a valid configuration file.
Visit: https://cds.climate.copernicus.eu/
The dataset used depends on whether pressure levels are requested:
- reanalysis-era5-single-levels
- reanalysis-era5-pressure-levels
"""
args = parse_args()
year_start = args.year_start
year_end = args.year_end
variables = args.variable
pressure_levels = args.pressure_level
# Base directory where data will be stored
base_output_dir = "/leonardo_work/EUHPC_D27_095/kkingston/IPSL-AID/data/"
# Initialize CDS API client
# Requires ~/.cdsapirc to be configured
client = cdsapi.Client()
# Loop over years
for year in range(year_start, year_end + 1):
tqdm.write(f"\n=== Processing year {year} ===")
for variable in variables:
tqdm.write(f"\n--- Variable: {variable} ---")
# Create output directory for this variable and year
output_dir = os.path.join(base_output_dir, f"data_{variable.upper()}")
year_dir = os.path.join(output_dir, str(year))
os.makedirs(year_dir, exist_ok=True)
# Loop over all 12 months
for month in tqdm(range(1, 13), desc=f"{variable} {year}", unit="month"):
mm = f"{month:02d}"
yyyy = str(year)
# Output file name: variable_YYYYMM.nc
if pressure_levels:
level_str = "_".join(pressure_levels)
target = os.path.join(
year_dir, f"{variable}_{level_str}_{yyyy}{mm}.nc"
)
else:
target = os.path.join(year_dir, f"{variable}_{yyyy}{mm}.nc")
# Skip download if file already exists
if os.path.exists(target):
continue
# number of days in month
n_days = calendar.monthrange(year, month)[1]
# ERA5 monthly request
request = {
"product_type": "reanalysis",
"variable": variable,
"year": yyyy,
"month": mm,
"day": [f"{d:02d}" for d in range(1, n_days + 1)],
"time": [f"{h:02d}:00" for h in range(24)],
"format": "netcdf",
}
if pressure_levels:
request["pressure_level"] = pressure_levels
# Submit request to CDS and download file
if pressure_levels:
dataset = "reanalysis-era5-pressure-levels"
else:
dataset = "reanalysis-era5-single-levels"
client.retrieve(dataset, request, target)
logger.success("ERA5 download completed successfully.")
if __name__ == "__main__":
logger = Logger(console_output=True)
logger.show_header("Download ERA5 from CDS")
main(logger)