Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ mast
Service fixes and enhancements
------------------------------

heasarc
^^^^^^^
- Add ``query_by_column`` to allow querying of different catalog columns. [#3403]
- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403]
- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. [#3403]
- Add automatic guessing for the data host in ``download_data``. [#3403]

alma
^^^^

Expand Down
261 changes: 227 additions & 34 deletions astroquery/heasarc/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@

import os

import shutil
import requests
import tarfile
Expand Down Expand Up @@ -261,7 +259,7 @@ def query_mission_cols(self, mission, *, cache=True,
cols = [col.upper() for col in cols['name'] if '__' not in col]
return cols

def query_tap(self, query, *, maxrec=None):
def query_tap(self, query, *, maxrec=None, uploads=None):
"""
Send query to HEASARC's Xamin TAP using ADQL.
Results in `~pyvo.dal.TAPResults` format.
Expand All @@ -273,6 +271,10 @@ def query_tap(self, query, *, maxrec=None):
ADQL query to be executed
maxrec : int
maximum number of records to return
uploads : dict
a mapping from table names used in the query to file like
objects containing a votable
(e.g. a file path or `~astropy.table.Table`).

Returns
-------
Expand All @@ -286,7 +288,85 @@ def query_tap(self, query, *, maxrec=None):
"""
log.debug(f'TAP query: {query}')
self._saved_query = query
return self.tap.search(query, language='ADQL', maxrec=maxrec)
return self.tap.search(
query, language='ADQL', maxrec=maxrec, uploads=uploads)

def _query_execute(self, catalog=None, where=None, *,
get_query_payload=False, columns=None,
verbose=False, maxrec=None):
"""Queries some catalog using the HEASARC TAP server based on the
where condition and returns an `~astropy.table.Table`.

Parameters
----------
catalog : str
The catalog to query. To list the available catalogs, use
:meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`.
where : str
The WHERE condition to be used in the query. It must
include the 'WHERE' keyword or be empty.
get_query_payload : bool, optional
If `True` then returns the generated ADQL query as str.
Defaults to `False`.
columns : str, optional
Target column list with value separated by a comma(,).
Use * for all the columns. The default is to return a subset
of the columns that are generally the most useful.
verbose : bool, optional
If False, suppress vo warnings.
maxrec : int, optional
Maximum number of records


Returns
-------
table : A `~astropy.table.Table` object.
"""
# if verbose is False then suppress any VOTable related warnings
if not verbose:
commons.suppress_vo_warnings()

if catalog is None:
raise InvalidQueryError("catalog name is required! Use 'xray' "
"to search the master X-ray catalog")

if where is None:
where = ''

# __row is needed for locate_data; we add it if not already present
# and remove it afterwards only if the user requested specific
# columns. keep_row tracks that.
keep_row = (
columns in (None, '*')
or isinstance(columns, str) and '__row' in columns
)

if columns is None:
columns = ', '.join(self._get_default_columns(catalog))

if '__row' not in columns and columns != '*':
columns += ', __row'

if where != '' and not where.startswith(' '):
where = ' ' + where.strip()
adql = f'SELECT {columns} FROM {catalog}{where}'

# if maxrec is more than the server limit, we set a higher limit
if maxrec is not None and maxrec > 100000:
adql = adql.replace('SELECT ', f'SELECT TOP {maxrec*4} ')

if get_query_payload:
return adql
response = self.query_tap(query=adql, maxrec=maxrec)

# save the response in case we want to use it later
self._last_result = response
self._last_catalog_name = catalog

table = response.to_table()
if not keep_row and '__row' in table.colnames:
table.remove_column('__row')
return table

@deprecated_renamed_argument(
('mission', 'fields', 'resultmax', 'entry', 'coordsys', 'equinox',
Expand Down Expand Up @@ -356,18 +436,6 @@ def query_region(self, position=None, catalog=None, radius=None, *,
-------
table : A `~astropy.table.Table` object.
"""
# if verbose is False then suppress any VOTable related warnings
if not verbose:
commons.suppress_vo_warnings()

if catalog is None:
raise InvalidQueryError("catalog name is required! Use 'xray' "
"to search the master X-ray catalog")

if columns is None:
columns = ', '.join(self._get_default_columns(catalog))
if '__row' not in columns:
columns += ',__row'

if spatial.lower() == 'all-sky':
where = ''
Expand All @@ -390,7 +458,7 @@ def query_region(self, position=None, catalog=None, radius=None, *,

coords_str = [f'{coord.ra.deg},{coord.dec.deg}'
for coord in coords_list]
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),"
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),"
f"POLYGON('ICRS',{','.join(coords_str)}))=1")
else:
coords_icrs = parse_coordinates(position).icrs
Expand All @@ -401,7 +469,7 @@ def query_region(self, position=None, catalog=None, radius=None, *,
radius = self.get_default_radius(catalog)
elif isinstance(radius, str):
radius = coordinates.Angle(radius)
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE("
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE("
f"'ICRS',{ra},{dec},{radius.to(u.deg).value}))=1")
# add search_offset for the case of cone
if add_offset:
Expand All @@ -410,24 +478,23 @@ def query_region(self, position=None, catalog=None, radius=None, *,
elif spatial.lower() == 'box':
if isinstance(width, str):
width = coordinates.Angle(width)
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),"
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),"
f"BOX('ICRS',{ra},{dec},{width.to(u.deg).value},"
f"{width.to(u.deg).value}))=1")
else:
raise ValueError("Unrecognized spatial query type. Must be one"
" of 'cone', 'box', 'polygon', or 'all-sky'.")

adql = f'SELECT {columns} FROM {catalog}{where}'

table_or_query = self._query_execute(
catalog=catalog, where=where,
get_query_payload=get_query_payload,
columns=columns, verbose=verbose,
maxrec=maxrec
)
if get_query_payload:
return adql
response = self.query_tap(query=adql, maxrec=maxrec)

# save the response in case we want to use it later
self._last_result = response
self._last_catalog_name = catalog
return table_or_query
table = table_or_query

table = response.to_table()
if add_offset:
table['search_offset'].unit = u.arcmin
if len(table) == 0:
Expand Down Expand Up @@ -464,6 +531,96 @@ def query_object(self, object_name, mission, *,
return self.query_region(pos, catalog=mission, spatial='cone',
get_query_payload=get_query_payload)

def query_by_column(self, catalog, params, *,
get_query_payload=False, columns=None,
verbose=False, maxrec=None):
"""Query the HEASARC TAP server using a constraints on the columns.

This is a simple wrapper around
`~astroquery.heasarc.HeasarcClass.query_tap`
that constructs an ADQL query from a dictionary of parameters.

Parameters
----------
catalog : str
The catalog to query. To list the available catalogs, use
:meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`.
params : dict
A dictionary of column constraint parameters to include in the query.
Each key-value pair will be translated into an ADQL condition.
- For a range query, use a tuple of two values (min, max).
e.g. ``{'flux': (1e-12, 1e-10)}`` translates to
``flux BETWEEN 1e-12 AND 1e-10``.
- For list values, use a list of values.
e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to
``object_type IN ('QSO', 'GALAXY')``.
- For comparison queries, use a tuple of (operator, value),
where operator is one of '=', '!=', '<', '>', '<=', '>='.
e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``.
- For exact matches, use a single value (str, int, float).
e.g. ``{'object_type': 'QSO'}`` translates to
``object_type = 'QSO'``.
The keys should correspond to valid column names in the catalog.
Use `list_columns` to see the available columns.
get_query_payload : bool, optional
If `True` then returns the generated ADQL query as str.
Defaults to `False`.
columns : str, optional
Target column list with value separated by a comma(,).
Use * for all the columns. The default is to return a subset
of the columns that are generally the most useful.
verbose : bool, optional
If False, suppress vo warnings.
maxrec : int, optional
Maximum number of records

"""

if not isinstance(params, dict):
raise ValueError('params must be a dictionary of key-value pairs')

conditions = []
for key, value in params.items():
if isinstance(value, tuple):
if (
len(value) == 2
and all(isinstance(v, (int, float)) for v in value)
):
conditions.append(
f"{key} BETWEEN {value[0]} AND {value[1]}"
)
elif (
len(value) == 2
and value[0] in (">", "<", ">=", "<=")
):
conditions.append(f"{key} {value[0]} {value[1]}")
elif isinstance(value, list):
# handle list values: key IN (...)
formatted = []
for v in value:
if isinstance(v, str):
formatted.append(f"'{v}'")
else:
formatted.append(str(v))
conditions.append(f"{key} IN ({', '.join(formatted)})")
else:
conditions.append(
f"{key} = '{value}'"
if isinstance(value, str) else f"{key} = {value}"
)
if len(conditions) == 0:
where = ""
else:
where = "WHERE " + (" AND ".join(conditions))

table_or_query = self._query_execute(
catalog=catalog, where=where,
get_query_payload=get_query_payload,
columns=columns, verbose=verbose,
maxrec=maxrec
)
return table_or_query

def locate_data(self, query_result=None, catalog_name=None):
"""Get links to data products
Use vo/datalinks to query the data products for some query_results.
Expand Down Expand Up @@ -505,7 +662,8 @@ def locate_data(self, query_result=None, catalog_name=None):
if '__row' not in query_result.colnames:
raise ValueError('No __row column found in query_result. '
'query_result needs to be the output of '
'query_region or a subset.')
'query_region or a subset. try adding '
'__row to the requested columns')

if catalog_name is None:
catalog_name = self._last_catalog_name
Expand Down Expand Up @@ -591,7 +749,41 @@ def enable_cloud(self, provider='aws', profile=None):

self.s3_client = self.s3_resource.meta.client

def download_data(self, links, host='heasarc', location='.'):
def _guess_host(self, host):
"""Guess the host to use for downloading data

Parameters
----------
host : str
The host provided by the user

Returns
-------
host : str
The guessed host

"""
if host in ['heasarc', 'sciserver', 'aws']:
return host
elif host is not None:
raise ValueError(
'host has to be one of heasarc, sciserver, aws or None')

# host is None, so we guess
if (
'HOME' in os.environ
and os.environ['HOME'] == '/home/idies'
and os.path.exists('/FTP/')
):
# we are on idies, so we can use sciserver
return 'sciserver'

for var in ['AWS_REGION', 'AWS_DEFAULT_REGION', 'AWS_ROLE_ARN']:
if var in os.environ:
return 'aws'
return 'heasarc'

def download_data(self, links, host=None, location='.'):
"""Download data products in links with a choice of getting the
data from either the heasarc server, sciserver, or the cloud in AWS.

Expand All @@ -600,8 +792,9 @@ def download_data(self, links, host='heasarc', location='.'):
----------
links : `astropy.table.Table` or `astropy.table.Row`
The result from locate_data
host : str
The data host. The options are: heasarc (default), sciserver, aws.
host : str or None
The data host. The options are: None (default), heasarc, sciserver, aws.
If None, the host is guessed based on the environment.
If host == 'sciserver', data is copied from the local mounted
data drive.
If host == 'aws', data is downloaded from Amazon S3 Open
Expand All @@ -622,8 +815,8 @@ def download_data(self, links, host='heasarc', location='.'):
if isinstance(links, Row):
links = links.table[[links.index]]

if host not in ['heasarc', 'sciserver', 'aws']:
raise ValueError('host has to be one of heasarc, sciserver, aws')
# guess the host if not provided
host = self._guess_host(host)

host_column = 'access_url' if host == 'heasarc' else host
if host_column not in links.colnames:
Expand Down
Loading
Loading