import os
import numpy as np
import pandas as pd
import requests
from datetime import datetime
from datetime import timedelta
from loguru import logger
from exonamd.utils import ROOT
[docs]@logger.catch
def download_nasa_confirmed_planets(
min_sy_pnum=1,
from_scratch=False,
):
"""
Downloads the NASA Exoplanet Archive confirmed planets table.
Parameters
----------
min_sy_pnum : int, optional
Minimum number of planets in the system to consider. Defaults to 1.
from_scratch : bool, optional
If True, downloads the entire table. If False, downloads only the rows newer than the latest update date in the current table. Defaults to False.
Returns
-------
df : pandas.DataFrame
The downloaded table.
df_old : pandas.DataFrame
The previous table, if from_scratch is False. Otherwise, None.
"""
logger.info("Downloading NASA Exoplanet Archive confirmed planets")
if from_scratch:
df_old = None
latest = datetime.strptime("1990-01-01", "%Y-%m-%d")
else:
df_old = pd.read_csv(os.path.join(ROOT, "data", "exo.csv"))
latest = df_old["rowupdate"].max()
latest = datetime.strptime(latest, "%Y-%m-%d")
latest = latest - timedelta(days=1)
latest = latest.strftime("%Y-%m-%d")
logger.debug("Defining the SQL query to retrieve the required data")
query = f"""
SELECT
hostname,
pl_name,
default_flag,
rowupdate,
sy_pnum,
st_rad,
st_mass,
pl_orbper,
pl_orbsmax,
pl_orbsmaxerr1,
pl_orbsmaxerr2,
pl_rade,
pl_radeerr1,
pl_radeerr2,
pl_bmasse,
pl_bmasseerr1,
pl_bmasseerr2,
pl_orbeccen,
pl_orbeccenerr1,
pl_orbeccenerr2,
pl_orbincl,
pl_orbinclerr1,
pl_orbinclerr2,
pl_trueobliq,
pl_trueobliqerr1,
pl_trueobliqerr2,
pl_ratdor,
pl_ratror
FROM ps
WHERE
sy_pnum >= '{min_sy_pnum}'
AND rowupdate > '{latest}'
"""
logger.debug("Making the request to the API")
url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
params = {
"query": query,
"format": "json",
}
response = requests.get(url, params=params)
if response.status_code != 200:
logger.error(f"Error: {response.status_code} in fetching data")
raise ValueError(f"Error: {response.status_code} in fetching data")
data = response.json()
df = pd.DataFrame(data)
df = df.replace({None: np.nan, "": np.nan})
logger.info("Data fetched")
return df, df_old