#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Routines to read galaxy catalogs from disk.
Updated with lognormal scripts!
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from os.path import dirname, abspath, splitext, exists as file_exists,\
join as pjoin
import numpy as np
import struct
try:
import pandas as pd
except ImportError:
pd = None
__all__ = ('read_fastfood_catalog', 'read_ascii_catalog', 'read_catalog', 'read_fortran_catalog', 'read_lognormal_catalog')
[docs]def read_fastfood_catalog(filename, return_dtype=None, need_header=None):
"""
Read a galaxy catalog from a fast-food binary file.
Parameters
-----------
filename: string
Filename containing the galaxy positions
return_dtype: numpy dtype for returned arrays. Default ``numpy.float``
Specifies the datatype for the returned arrays. Must be in
{np.float, np.float32}
need_header: boolean, default None.
Returns the header found in the fast-food file in addition to the
X/Y/Z arrays.
Returns
--------
X, Y, Z: numpy arrays
Returns the triplet of X/Y/Z positions as separate numpy arrays.
If need_header is set, then the header is also returned
Example
--------
>>> import numpy as np
>>> from os.path import dirname, abspath, join as pjoin
>>> import Corrfunc
>>> from Corrfunc.io import read_fastfood_catalog
>>> filename = pjoin(dirname(abspath(Corrfunc.__file__)),
... "../theory/tests/data/",
... "gals_Mr19.ff")
>>> X, Y, Z = read_fastfood_catalog(filename)
>>> N = 20
>>> for x,y,z in zip(X[0:N], Y[0:N], Z[0:]):
... print("{0:10.5f} {1:10.5f} {2:10.5f}".format(x, y, z))
... # doctest: +NORMALIZE_WHITESPACE
419.94550 1.96340 0.01610
419.88272 1.79736 0.11960
0.32880 10.63620 4.16550
0.15314 10.68723 4.06529
0.46400 8.91150 6.97090
6.30690 9.77090 8.61080
5.87160 9.65870 9.29810
8.06210 0.42350 4.89410
11.92830 4.38660 4.54410
11.95543 4.32622 4.51485
11.65676 4.34665 4.53181
11.75739 4.26262 4.31666
11.81329 4.27530 4.49183
11.80406 4.54737 4.26824
12.61570 4.14470 3.70140
13.23640 4.34750 5.26450
13.19833 4.33196 5.29435
13.21249 4.35695 5.37418
13.06805 4.24275 5.35126
13.19693 4.37618 5.28772
"""
if return_dtype is None:
return_dtype = np.float
if return_dtype not in [np.float32, np.float]:
msg = "Return data-type must be set and a valid numpy float"
raise ValueError(msg)
if not file_exists(filename):
msg = "Could not find file = {0}".format(filename)
raise IOError(msg)
import struct
try:
from future.utils import bytes_to_native_str
except ImportError:
print("\n\tPlease run python -m pip install Corrfunc before using "
"the 'Corrfunc' package\n")
raise
with open(filename, "rb") as f:
skip1 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
idat = struct.unpack(bytes_to_native_str(b'@iiiii'),
f.read(20))[0:5]
skip2 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
assert skip1 == 20 and skip2 == 20,\
"fast-food file seems to be incorrect (reading idat)"
ngal = idat[1]
if need_header is not None:
# now read fdat
skip1 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
fdat = struct.unpack(bytes_to_native_str(b'@fffffffff'),
f.read(36))[0:9]
skip2 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
assert skip1 == 36 and skip2 == 36,\
"fast-food file seems to be incorrect (reading fdat )"
skip1 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
znow = struct.unpack(bytes_to_native_str(b'@f'), f.read(4))[0]
skip2 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
assert skip1 == 4 and skip2 == 4,\
"fast-food file seems to be incorrect (reading redshift)"
else:
fdat_bytes = 4 + 36 + 4
znow_bytes = 4 + 4 + 4
# seek over the fdat + znow fields + padding bytes
# from current position
f.seek(fdat_bytes + znow_bytes, 1)
# read the padding bytes for the x-positions
skip1 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
assert skip1 == ngal * 4 or skip1 == ngal * 8, \
"fast-food file seems to be corrupt (padding bytes)"
# seek back 4 bytes from current position
f.seek(-4, 1)
pos = {}
for field in 'xyz':
skip1 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
assert skip1 == ngal * 4 or skip1 == ngal * 8, \
"fast-food file seems to be corrupt (padding bytes a)"
# the next division must be the integer division
input_dtype = np.float32 if skip1 // ngal == 4 else np.float
array = np.fromfile(f, input_dtype, ngal)
skip2 = struct.unpack(bytes_to_native_str(b'@i'), f.read(4))[0]
if return_dtype == input_dtype:
pos[field] = array
else:
pos[field] = [return_dtype(a) for a in array]
x = np.array(pos['x'])
y = np.array(pos['y'])
z = np.array(pos['z'])
if need_header is not None:
return idat, fdat, znow, x, y, z
else:
return x, y, z
[docs]def read_ascii_catalog(filename, return_dtype=None):
"""
Read a galaxy catalog from an ascii file.
Parameters
-----------
filename: string
Filename containing the galaxy positions
return_dtype: numpy dtype for returned arrays. Default ``numpy.float``
Specifies the datatype for the returned arrays. Must be in
{np.float, np.float32}
Returns
--------
X, Y, Z: numpy arrays
Returns the triplet of X/Y/Z positions as separate numpy arrays.
Example
--------
>>> from __future__ import print_function
>>> from os.path import dirname, abspath, join as pjoin
>>> import Corrfunc
>>> from Corrfunc.io import read_ascii_catalog
>>> filename = pjoin(dirname(abspath(Corrfunc.__file__)),
... "../mocks/tests/data/", "Mr19_mock_northonly.rdcz.dat")
>>> ra, dec, cz = read_ascii_catalog(filename)
>>> N = 20
>>> for r,d,c in zip(ra[0:N], dec[0:N], cz[0:]):
... print("{0:10.5f} {1:10.5f} {2:10.5f}".format(r, d, c))
... # doctest: +NORMALIZE_WHITESPACE
178.45087 67.01112 19905.28514
178.83495 67.72519 19824.02285
179.50132 67.67628 19831.21553
182.75497 67.13004 19659.79825
186.29853 68.64099 20030.64412
186.32346 68.65879 19763.38137
187.36173 68.15151 19942.66996
187.20613 68.56189 19996.36607
185.56358 67.97724 19729.32308
183.27930 67.11318 19609.71345
183.86498 67.82823 19500.44130
184.07771 67.43429 19440.53790
185.13370 67.15382 19390.60304
189.15907 68.28252 19858.85853
190.12209 68.55062 20044.29744
193.65245 68.36878 19445.62469
194.93514 68.34870 19158.93155
180.36897 67.50058 18671.40780
179.63278 67.51318 18657.59191
180.75742 67.95530 18586.88913
"""
if return_dtype is None:
return_dtype = np.float
if not file_exists(filename):
msg = "Could not find file = {0}".format(filename)
raise IOError(msg)
# check if pandas is available - much faster to read in the data
# using pandas
if pd is not None:
df = pd.read_csv(filename, header=None,
engine="c",
dtype={"x": return_dtype,
"y": return_dtype,
"z": return_dtype},
delim_whitespace=True)
x = np.asarray(df[0], dtype=return_dtype)
y = np.asarray(df[1], dtype=return_dtype)
z = np.asarray(df[2], dtype=return_dtype)
else:
x, y, z, _ = np.genfromtxt(filename, dtype=return_dtype, unpack=True)
return x, y, z
[docs]def read_fortran_catalog(filename, return_dtype=None):
if return_dtype is None:
return_dtype = np.float
with open(filename, mode='rb') as file: # b is important -> binary
fileContent = file.read()
nleading = 3*8+1*4
header = struct.unpack("dddi", fileContent[:nleading])
Lx, Ly, Lz, N = header
data = struct.unpack("f" * ((len(fileContent) -nleading) // 4), fileContent[nleading:])
data = np.array(data, dtype=return_dtype)
data = data.reshape((-1, 6))
x, y, z, *_ = data.T
return x, y, z
[docs]def read_lognormal_catalog(n='2e-4'):
filedir = '../theory/tests/data'
filepath = pjoin(dirname(abspath(__file__)), filedir, f'cat_L750_n{n}_z057_patchy_lognormal_rlz0.bin')
return read_fortran_catalog(filepath)
[docs]def read_catalog(filebase=None, return_dtype=np.float):
"""
Reads a galaxy/randoms catalog and returns 3 XYZ arrays.
Parameters
-----------
filebase: string (optional)
The fully qualified path to the file. If omitted, reads the
theory galaxy catalog under ../theory/tests/data/
return_dtype: numpy dtype for returned arrays. Default ``numpy.float``
Specifies the datatype for the returned arrays. Must be in
{np.float, np.float32}
Returns
--------
``x y z`` - Unpacked numpy arrays compatible with the installed
version of ``Corrfunc``.
.. note:: If the filename is omitted, then first the fast-food file
is searched for, and then the ascii file. End-users should always
supply the full filename.
"""
if filebase is None:
filename = pjoin(dirname(abspath(__file__)),
"../theory/tests/data/", "gals_Mr19")
allowed_exts = {'.ff': read_fastfood_catalog,
'.txt': read_ascii_catalog,
'.dat': read_ascii_catalog,
'.csv': read_ascii_catalog,
'.bin': read_fortran_catalog
}
for e in allowed_exts:
if file_exists(filename + e):
f = allowed_exts[e]
x, y, z = f(filename + e, return_dtype)
return x, y, z
raise IOError("Could not locate {0} with any of these extensions \
= {1}".format(filename, allowed_exts.keys()))
else:
# Likely an user-supplied value
if file_exists(filebase):
extension = splitext(filebase)[1]
if '.ff' in extension:
f = read_fastfood_catalog
elif '.bin' in extension:
f = read_fortran_catalog
else:
f = read_ascii_catalog
# default return is double
x, y, z = f(filebase, return_dtype)
return x, y, z
raise IOError("Could not locate file {0}".format(filebase))
if __name__ == '__main__':
import doctest
doctest.testmod(verbose=True)