diff --git a/Kernel/Applications/Makefile.am b/Kernel/Applications/Makefile.am index 3e39fb8c..bea9e349 100644 --- a/Kernel/Applications/Makefile.am +++ b/Kernel/Applications/Makefile.am @@ -1,11 +1,12 @@ -bin_PROGRAMS = load_bits +bin_PROGRAMS = load_bits digihdr check_PROGRAMS = test_Input test_Unpack load_bits_SOURCES = load_bits.C test_Input_SOURCES = test_Input.C test_Unpack_SOURCES = test_Unpack.C +digihdr_SOURCES = digihdr.C if HAVE_sigproc check_PROGRAMS += sigproc_header diff --git a/Kernel/Applications/digihdr.C b/Kernel/Applications/digihdr.C index e69f76f8..0517c411 100644 --- a/Kernel/Applications/digihdr.C +++ b/Kernel/Applications/digihdr.C @@ -6,6 +6,7 @@ ***************************************************************************/ #include +#include #include "dsp/File.h" #include "TextInterface.h" diff --git a/Kernel/Classes/BitUnpacker.C b/Kernel/Classes/BitUnpacker.C index fda7a788..df3b1dcc 100644 --- a/Kernel/Classes/BitUnpacker.C +++ b/Kernel/Classes/BitUnpacker.C @@ -59,7 +59,7 @@ void dsp::BitUnpacker::unpack () // Step through the array in small block sizes so that the matrix // transpose (for nchan>1 case) remains cache-friendly. - const unsigned blockdat = npol*nchan*ndim > 32 ? npol*nchan*ndim : 32; + const unsigned blockdat = npol*nchan*ndim*8/nbit > 32 ? npol*nchan*ndim*8/nbit : 32; const unsigned blockbytes = blockdat*nbit/8; const unsigned char* iptr = input->get_rawptr(); @@ -67,25 +67,22 @@ void dsp::BitUnpacker::unpack () for (uint64_t idat=0; idat(ndat-idat)) ? ndat-idat : blockdat; + const unsigned ndatblock = (blockdat>(ndat-idat)) ? ndat-idat : blockdat/nskip; for (unsigned ichan=0; ichanget_rawptr() + offset; const unsigned char* from = iptr + offset; float* into = output->get_datptr (ichan, ipol) + ndim*idat + idim; unsigned long* hist = get_histogram (offset); - + #ifdef _DEBUG cerr << "c=" << ichan << " p=" << ipol << " d=" << idim << endl; #endif unpack (ndatblock, from, nskip, into, fskip, hist); - //unpack (ndat, from, nskip, into, fskip, hist); offset ++; } } diff --git a/Kernel/Classes/DummyFile.C b/Kernel/Classes/DummyFile.C index db7367e1..1dd22ba5 100644 --- a/Kernel/Classes/DummyFile.C +++ b/Kernel/Classes/DummyFile.C @@ -56,6 +56,12 @@ void dsp::DummyFile::open_file (const char* filename) // Read obs info from ASCII file info = new ASCIIObservation(header); + + // cannot load less than a byte. set the time sample resolution accordingly + unsigned bits_per_byte = 8; + resolution = bits_per_byte / get_info()->get_nbit(); + if (resolution == 0) + resolution = 1; } void dsp::DummyFile::close () diff --git a/Kernel/Classes/MultiFile.C b/Kernel/Classes/MultiFile.C index 45071354..cea5e115 100644 --- a/Kernel/Classes/MultiFile.C +++ b/Kernel/Classes/MultiFile.C @@ -17,7 +17,7 @@ using namespace std; -dsp::MultiFile::MultiFile () : File ("MultiFile") +dsp::MultiFile::MultiFile (const char* name) : File (name) { test_contiguity = true; current_index = 0; diff --git a/Kernel/Classes/Multiplex.C b/Kernel/Classes/Multiplex.C index ea694cf2..7b777377 100644 --- a/Kernel/Classes/Multiplex.C +++ b/Kernel/Classes/Multiplex.C @@ -1,7 +1,7 @@ //-*-C++-*- /*************************************************************************** * - * Copyright (C) 2009 + * Copyright (C) 2009 by Jonathon Kocz * Licensed under the Academic Free License version 2.1 * ***************************************************************************/ @@ -18,7 +18,7 @@ using namespace std; -dsp::Multiplex::Multiplex () : MultiFile () +dsp::Multiplex::Multiplex () : MultiFile ( "Multiplex" ) { //current_index = 0; } diff --git a/Kernel/Classes/ObservationInterface.C b/Kernel/Classes/ObservationInterface.C index c9edb6bf..dc2e6b0d 100644 --- a/Kernel/Classes/ObservationInterface.C +++ b/Kernel/Classes/ObservationInterface.C @@ -13,6 +13,7 @@ dsp::Observation::Interface::Interface( Observation *c ) add( &Observation::get_nchan, "nchan", "Number of frequency channels" ); add( &Observation::get_npol, "npol", "Number of polarizations" ); add( &Observation::get_ndim, "ndim", "Number of data dimensions" ); + add( &Observation::get_nbit, "nbit", "Number of bits per datum" ); add( &Observation::get_type, &Observation::set_type, diff --git a/Kernel/Classes/dsp/MultiFile.h b/Kernel/Classes/dsp/MultiFile.h index 97809f31..a3667a66 100644 --- a/Kernel/Classes/dsp/MultiFile.h +++ b/Kernel/Classes/dsp/MultiFile.h @@ -27,7 +27,7 @@ namespace dsp { public: //! Constructor - MultiFile (); + MultiFile (const char* name = "MultiFile"); //! Destructor virtual ~MultiFile (); diff --git a/Kernel/Classes/dsp/Multiplex.h b/Kernel/Classes/dsp/Multiplex.h index 285565ef..f0a89e9f 100644 --- a/Kernel/Classes/dsp/Multiplex.h +++ b/Kernel/Classes/dsp/Multiplex.h @@ -1,16 +1,12 @@ //-*-C++-*- /*************************************************************************** * - * Copyright (C) 2002 by Willem van Straten + * Copyright (C) 2002 by Jonathon Kocz * Licensed under the Academic Free License version 2.1 * ***************************************************************************/ -/* $Source: /cvsroot/dspsr/dspsr/Kernel/Classes/dsp/Multiplex.h,v $ - $Revision: 1.1 $ - $Date: 2009/09/10 01:02:34 $ - $Author: tcaotiaafoc $ */ - +// dspsr/Kernel/Classes/dsp/Multiplex.h #ifndef __Multiplex_h #define __Multiplex_h diff --git a/Kernel/Formats/Unpacker_registry.C b/Kernel/Formats/Unpacker_registry.C index 9f7fa3e4..d0d3d921 100644 --- a/Kernel/Formats/Unpacker_registry.C +++ b/Kernel/Formats/Unpacker_registry.C @@ -102,6 +102,11 @@ static dsp::Unpacker::Register::Enter guppi4; static dsp::Unpacker::Register::Enter guppi2; #endif +#if HAVE_kat +#include "dsp/KAT7Unpacker.h" +static dsp::Unpacker::Register::Enter kat7; +#endif + #if HAVE_lofar_dal #include "dsp/LOFAR_DALUnpacker.h" static dsp::Unpacker::Register::Enter lofar_dal; diff --git a/Kernel/Formats/fits/FITSFile.C b/Kernel/Formats/fits/FITSFile.C index 66f59f43..10b690bc 100644 --- a/Kernel/Formats/fits/FITSFile.C +++ b/Kernel/Formats/fits/FITSFile.C @@ -154,6 +154,8 @@ void dsp::FITSFile::open_file(const char* filename) std::string backend_name = archive->get()->get_name(); if (backend_name == "GUPPI" || backend_name == "PUPPI") get_info()->set_machine("GUPPIFITS"); + else if (backend_name == "COBALT") + get_info()->set_machine("COBALT"); else get_info()->set_machine("FITS"); get_info()->set_telescope(archive->get_telescope()); diff --git a/Kernel/Formats/fits/Makefile.am b/Kernel/Formats/fits/Makefile.am index bb2e3cb9..f1eb1505 100644 --- a/Kernel/Formats/fits/Makefile.am +++ b/Kernel/Formats/fits/Makefile.am @@ -1,12 +1,12 @@ noinst_LTLIBRARIES = libfits.la nobase_include_HEADERS = dsp/FITSUnpacker.h dsp/FITSFile.h \ - dsp/GUPPIFITSUnpacker.h dsp/FITSDigitizer.h \ - dsp/FITSOutputFile.h + dsp/GUPPIFITSUnpacker.h dsp/FITSDigitizer.h + #dsp/FITSOutputFile.h libfits_la_SOURCES = FITSUnpacker.C GUPPIFITSUnpacker.C \ - FITSFile.C fits_params.h FITSDigitizer.C\ - FITSOutputFile.C + FITSFile.C fits_params.h FITSDigitizer.C + #FITSOutputFile.C ############################################################################# # diff --git a/Kernel/Formats/gmrt/GMRTFile.C b/Kernel/Formats/gmrt/GMRTFile.C index fcf1d27f..d3c202b6 100644 --- a/Kernel/Formats/gmrt/GMRTFile.C +++ b/Kernel/Formats/gmrt/GMRTFile.C @@ -133,9 +133,11 @@ void dsp::GMRTFile::open_file (const char* filename) fd = open_read_header (filename, &header, &block); - cerr << "n_ds = " << header.n_ds << endl; - cerr << "n_chan = " << header.n_chan << endl; - cerr << "i_chan = " << block.FreqChanNo << endl; + if (verbose) { + cerr << "n_ds = " << header.n_ds << endl; + cerr << "n_chan = " << header.n_chan << endl; + cerr << "i_chan = " << block.FreqChanNo << endl; + } get_info()->set_nbit (8); @@ -143,8 +145,10 @@ void dsp::GMRTFile::open_file (const char* filename) get_info()->set_bandwidth (bw); get_info()->set_centre_frequency (header.rf + (block.FreqChanNo + 0.5) * bw); - cerr << "cf = " << get_info()->get_centre_frequency() << endl; - cerr << "bw = " << bw << endl; + if (verbose) { + cerr << "cf = " << get_info()->get_centre_frequency() << endl; + cerr << "bw = " << bw << endl; + } get_info()->set_npol(2); get_info()->set_state (Signal::Analytic); @@ -154,18 +158,22 @@ void dsp::GMRTFile::open_file (const char* filename) epoch += block.ipts1 / get_info()->get_rate(); get_info()->set_start_time( epoch ); - cerr << "MJD = " << get_info()->get_start_time() << endl; - cerr << "telescope = " << header.telescope << endl; + if (verbose) { + cerr << "MJD = " << get_info()->get_start_time() << endl; + cerr << "telescope = " << header.telescope << endl; + } get_info()->set_telescope (header.telescope); get_info()->set_source (header.psr_name); header_bytes = sizeof(struct gmrt_params); - cerr << "totalsize=" << block.totalsize << endl; - cerr << "NPtsSend=" << block.NPtsSend << endl; - cerr << "overlap=" << header.overlap << endl; - cerr << "n_samp_dump=" << header.n_samp_dump << endl; + if (verbose) { + cerr << "totalsize=" << block.totalsize << endl; + cerr << "NPtsSend=" << block.NPtsSend << endl; + cerr << "overlap=" << header.overlap << endl; + cerr << "n_samp_dump=" << header.n_samp_dump << endl; + } block_header_bytes = sizeof(struct data2rcv); //block_tailer_bytes = header.overlap * 4; diff --git a/Kernel/Formats/kat/KAT7Unpacker.C b/Kernel/Formats/kat/KAT7Unpacker.C new file mode 100644 index 00000000..5a7ac12a --- /dev/null +++ b/Kernel/Formats/kat/KAT7Unpacker.C @@ -0,0 +1,179 @@ +//-*-C++-*- +/*************************************************************************** + * + * Copyright (C) 2015 Andrew Jameson + * Licensed under the Academic Free License version 2.1 + * + ***************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "dsp/KAT7Unpacker.h" +#include "dsp/BitTable.h" + +#include "Error.h" + +#if HAVE_CUDA +#include "dsp/MemoryCUDA.h" +#include "dsp/KAT7UnpackerCUDA.h" +#include +#endif + +#include + +using namespace std; + +static void* const undefined_stream = (void *) -1; + +dsp::KAT7Unpacker::KAT7Unpacker (const char* _name) : HistUnpacker (_name) +{ + if (verbose) + cerr << "dsp::KAT7Unpacker ctor" << endl; + + set_nstate (256); + gpu_stream = undefined_stream; + + table = new BitTable (8, BitTable::TwosComplement); + + device_prepared = false; +} + +dsp::KAT7Unpacker::~KAT7Unpacker () +{ +} + +dsp::KAT7Unpacker * dsp::KAT7Unpacker::clone () const +{ + return new KAT7Unpacker (*this); +} + +//! Return true if the unpacker can operate on the specified device +bool dsp::KAT7Unpacker::get_device_supported (Memory* memory) const +{ +#if HAVE_CUDA + if (verbose) + cerr << "dsp::KAT7Unpacker::get_device_supported HAVE_CUDA" << endl; + return dynamic_cast< CUDA::DeviceMemory*> ( memory ); +#else + return false; +#endif +} + +//! Set the device on which the unpacker will operate +void dsp::KAT7Unpacker::set_device (Memory* memory) +{ +#if HAVE_CUDA + CUDA::DeviceMemory * gpu_mem = dynamic_cast< CUDA::DeviceMemory*>( memory ); + if (gpu_mem) + { + gpu_stream = (void *) gpu_mem->get_stream(); + if (verbose) + cerr << "dsp::KAT7Unpacker::set_device using gpu memory" << endl; + staging.set_memory( memory ); + } + else + { + if (verbose) + cerr << "dsp::KAT7Unpacker::set_device using cpu memory" << endl; + gpu_stream = undefined_stream; + } +#else + Unpacker::set_device (memory); +#endif + device_prepared = true; +} + + +bool dsp::KAT7Unpacker::matches (const Observation* observation) +{ + return observation->get_machine() == "KPSR" + && observation->get_ndim() == 2 + && observation->get_nbit() == 8; +} + +void dsp::KAT7Unpacker::unpack () +{ +#if HAVE_CUDA + if (gpu_stream != undefined_stream) + { + unpack_on_gpu (); + return; + } +#endif + + // some programs (digifil) do not call set_device + if ( ! device_prepared ) + set_device ( Memory::get_manager ()); + + const uint64_t ndat = input->get_ndat(); + const int8_t * from = (int8_t *) input->get_rawptr(); + float * into; + const unsigned nchan = input->get_nchan(); + const unsigned npol = 1; + + const float* lookup = table->get_values (); + + // data is stored as 128 sample blocks of FT ordered data + const uint64_t nblocks = ndat / 128; + + // cerr << "dsp::KAT7Unpacker::unpack ndat="<get_scale(), (int16_t *) d_staging, into); +} + +#endif + diff --git a/Kernel/Formats/kat/KAT7UnpackerCUDA.cu b/Kernel/Formats/kat/KAT7UnpackerCUDA.cu new file mode 100644 index 00000000..5c3ffe88 --- /dev/null +++ b/Kernel/Formats/kat/KAT7UnpackerCUDA.cu @@ -0,0 +1,91 @@ +//-*-C++-*- +/*************************************************************************** + * + * Copyright (C) 2010 by Willem van Straten + * Licensed under the Academic Free License version 2.1 + * + ***************************************************************************/ + +#include "dsp/KAT7UnpackerCUDA.h" +#include "dsp/Operation.h" + +#include "Error.h" + +#include + +//#define _DEBUG + +using namespace std; + +void check_error (const char*); + +// each thread unpacks samples so that 1 warp does 128 contiguous samples +__global__ void kat7_unpack_fpt_kernel (const uint64_t ndat, float scale, const int16_t * input, cuFloatComplex * output) +{ + const int warp_idx = threadIdx.x & 0x1F; // threadIDx.x % 32 + const int warp_num = threadIdx.x / 32; + + const unsigned ichan = blockIdx.y; + const unsigned nchan = gridDim.y; + const unsigned ichan_offset = (ichan * 128); + + // first sample for the start of the warp + unsigned isamp = (blockIdx.x * blockDim.x * 4) + (warp_num * 128) + warp_idx; + unsigned idx = (blockIdx.x * blockDim.x * 4 * nchan) + (warp_num * nchan * 128) + ichan_offset + warp_idx; + unsigned odx = (ichan * ndat) + isamp; + + int16_t val16; + int8_t * val8 = (int8_t *) &val16; + cuFloatComplex val64; + + for (unsigned ival=0; ival<4; ival++) + { + if (isamp < ndat) + { + val16 = input[idx]; + val64.x = ((float) val8[0] + 0.5) * scale; + val64.y = ((float) val8[1] + 0.5) * scale; + output[odx] = val64; + + idx += 32; + odx += 32; + isamp += 32; + } + } + +} + +void kat7_unpack (cudaStream_t stream, const uint64_t ndat, unsigned nchan, unsigned npol, + float scale, const int16_t * input, float * output) +{ + int nthread = 1024; + + const unsigned ndat_per_block = 4 * nthread; + + // each thread will unpack 4 time samples + dim3 blocks = dim3 (ndat / ndat_per_block, nchan); + + if (ndat % ndat_per_block != 0) + blocks.x++; + +#ifdef _DEBUG + cerr << "kat7_unpack ndat=" << ndat << " scale=" << scale + << " input=" << (void*) input << " nblock=(" << blocks.x << "," << blocks.y << ")" + << " nthread=" << nthread << endl; +#endif + + kat7_unpack_fpt_kernel<<>> (ndat, scale, input, (cuFloatComplex *) output); + + // AJ's theory... + // If there are no stream synchronises on the input then the CPU pinned memory load from the + // input class might be able to get ahead of a whole sequence of GPU operations, and even exceed + // one I/O loop. Therefore this should be a reuqirement to have a stream synchronize some time + // after the data are loaded from pinned memory to GPU ram and the next Input copy to pinned memory + + if (dsp::Operation::record_time || dsp::Operation::verbose) + check_error ("kat7_unpack_fpt_kernel"); + + // put it here for now + cudaStreamSynchronize(stream); + +} diff --git a/Kernel/Formats/kat/Makefile.am b/Kernel/Formats/kat/Makefile.am new file mode 100644 index 00000000..5ec339b8 --- /dev/null +++ b/Kernel/Formats/kat/Makefile.am @@ -0,0 +1,21 @@ + +noinst_LTLIBRARIES = libkat.la + +nobase_include_HEADERS = dsp/KAT7Unpacker.h + +libkat_la_SOURCES = KAT7Unpacker.C + +if HAVE_CUDA + +nobase_include_HEADERS += dsp/KAT7UnpackerCUDA.h +libkat_la_SOURCES += KAT7UnpackerCUDA.cu + +endif + +############################################################################# +# + +include $(top_srcdir)/config/Makefile.include +include $(top_srcdir)/config/Makefile.cuda + +AM_CPPFLAGS += @CUDA_CFLAGS@ diff --git a/Kernel/Formats/kat/dsp/KAT7Unpacker.h b/Kernel/Formats/kat/dsp/KAT7Unpacker.h new file mode 100644 index 00000000..b13091cb --- /dev/null +++ b/Kernel/Formats/kat/dsp/KAT7Unpacker.h @@ -0,0 +1,55 @@ +/* + + */ + +#ifndef __dsp_KAT7Unpacker_h +#define __dsp_KAT7Unpacker_h + +#include "dsp/EightBitUnpacker.h" + +namespace dsp { + + class KAT7Unpacker : public HistUnpacker + { + public: + + //! Constructor + KAT7Unpacker (const char* name = "KAT7Unpacker"); + ~KAT7Unpacker (); + + //! Cloner (calls new) + virtual KAT7Unpacker * clone () const; + + //! Return true if the unpacker can operate on the specified device + bool get_device_supported (Memory*) const; + + //! Set the device on which the unpacker will operate + void set_device (Memory*); + + protected: + + Reference::To table; + + //! Return true if we can convert the Observation + bool matches (const Observation* observation); + + void unpack (); + + void unpack (uint64_t ndat, const unsigned char* from, + float* into, const unsigned fskip, + unsigned long* hist); + + BitSeries staging; + void * gpu_stream; + void unpack_on_gpu (); + + unsigned get_resolution ()const ; + + private: + + bool device_prepared; + + }; +} + +#endif diff --git a/Kernel/Formats/kat/dsp/KAT7UnpackerCUDA.h b/Kernel/Formats/kat/dsp/KAT7UnpackerCUDA.h new file mode 100644 index 00000000..8e2a9169 --- /dev/null +++ b/Kernel/Formats/kat/dsp/KAT7UnpackerCUDA.h @@ -0,0 +1,14 @@ +/* + + */ + +#ifndef __dsp_KAT7UnpackerCUDA_h +#define __dsp_KAT7UnpackerCUDA_h + +#include +#include + +void kat7_unpack (cudaStream_t stream, const uint64_t ndat, unsigned nchan, unsigned npol, + float scale, const int16_t * input, float * output); + +#endif diff --git a/Kernel/Formats/lofar_dal/LOFAR_DALFile.C b/Kernel/Formats/lofar_dal/LOFAR_DALFile.C index 78fd9180..4441f607 100644 --- a/Kernel/Formats/lofar_dal/LOFAR_DALFile.C +++ b/Kernel/Formats/lofar_dal/LOFAR_DALFile.C @@ -411,8 +411,6 @@ void dsp::LOFAR_DALFile::open_file (const char* filename) } } - get_info()->set_machine( "LOFAR" ); - // OPEN ALL FILES handle = new Handle; diff --git a/Kernel/Formats/lofar_dal/LOFAR_DALUnpacker.C b/Kernel/Formats/lofar_dal/LOFAR_DALUnpacker.C index 28068eef..18190087 100644 --- a/Kernel/Formats/lofar_dal/LOFAR_DALUnpacker.C +++ b/Kernel/Formats/lofar_dal/LOFAR_DALUnpacker.C @@ -27,7 +27,7 @@ bool dsp::LOFAR_DALUnpacker::matches (const Observation* observation) { return observation->get_nbit() == 32 && - observation->get_machine() == "LOFAR"; + observation->get_machine() == "COBALT"; } //! Return true if the unpacker support the specified output order diff --git a/Kernel/Formats/sigproc/SigProcDigitizer.C b/Kernel/Formats/sigproc/SigProcDigitizer.C index b1c4e13e..db66f024 100644 --- a/Kernel/Formats/sigproc/SigProcDigitizer.C +++ b/Kernel/Formats/sigproc/SigProcDigitizer.C @@ -53,12 +53,15 @@ public: inline unsigned operator () (unsigned out_chan) { unsigned in_chan = out_chan; - if (flip_band) - in_chan = (nchan-in_chan-1); + //if (flip_band) + // in_chan = (nchan-in_chan-1); if (input->get_nsub_swap() > 1) in_chan = input->get_unswapped_ichan(out_chan); else if (swap_band) in_chan = (in_chan+half_chan)%nchan; + // moved from the start of the block + if (flip_band) + in_chan = (nchan-in_chan-1); return in_chan; } }; diff --git a/Kernel/Formats/sigproc/SigProcObservation.C b/Kernel/Formats/sigproc/SigProcObservation.C index b36d5738..409a68bf 100644 --- a/Kernel/Formats/sigproc/SigProcObservation.C +++ b/Kernel/Formats/sigproc/SigProcObservation.C @@ -86,6 +86,8 @@ static std::string get_sigproc_telescope_name (int _id) return "GMRT"; case 8: return "Effelsberg"; + case 11: + return "LOFAR"; default: return "unknown"; break; @@ -119,6 +121,7 @@ static int get_sigproc_telescope_id (string name) else if (itoa == "GB") return 6; else if (itoa == "GM") return 7; else if (itoa == "EF") return 8; + else if (itoa == "LF") return 11; else return 0; } catch (Error &error) @@ -129,6 +132,41 @@ static int get_sigproc_telescope_id (string name) return 0; } +static std::string get_sigproc_machine_name (int _id) +{ + // Info from sigproc's aliases.c + switch (_id) { + case 0: + return "FAKE"; + case 1: + return "PSPM"; + case 2: + return "WAPP"; + case 3: + return "AOFTM"; + case 4: + return "BPP"; + case 5: + return "OOTY"; + case 6: + return "SCAMP"; + case 7: + return "GMRTFB"; + case 8: + return "PULSAR2000"; + case 10: + return "ARTEMIS"; + case 11: + return "COBALT"; + default: + return "?????"; + break; + } + + return "?????"; +} + + void dsp::SigProcObservation::load_global () { // set_receiver (buffer); @@ -163,7 +201,8 @@ void dsp::SigProcObservation::load_global () coord.dec().setDegMS (src_dej); set_coordinates (coord); - set_machine ("SigProc"); + // set_machine ("SigProc"); + set_machine ( get_sigproc_machine_name(machine_id) ); set_telescope ( get_sigproc_telescope_name(telescope_id) ); } @@ -193,6 +232,7 @@ void dsp::SigProcObservation::unload_global () */ if(get_machine().compare("BPSR")==0)machine_id=10; else if(get_machine().compare("SCAMP")==0)machine_id=6; + else if(get_machine().compare("COBALT")==0)machine_id=11; // This is the 'rawfilename' parameter in the header. // inpfile is possibly uninitialized here so avoid setting diff --git a/Kernel/Formats/sigproc/read_header.c b/Kernel/Formats/sigproc/read_header.c index 28a3b53d..54cdbda8 100644 --- a/Kernel/Formats/sigproc/read_header.c +++ b/Kernel/Formats/sigproc/read_header.c @@ -17,9 +17,9 @@ void get_string(FILE *inputfile, int *nbytes, char string[]) /* includefile */ int nchar; strcpy(string,"ERROR"); fread(&nchar, sizeof(int), 1, inputfile); + *nbytes=sizeof(int); if (feof(inputfile)) exit(0); if (nchar>80 || nchar<1) return; - *nbytes=sizeof(int); fread(string, nchar, 1, inputfile); string[nchar]='\0'; *nbytes+=nchar; diff --git a/Signal/General/Detection.C b/Signal/General/Detection.C index f6849c26..9e79e02a 100644 --- a/Signal/General/Detection.C +++ b/Signal/General/Detection.C @@ -189,6 +189,7 @@ void dsp::Detection::resize_output () get_output()->set_npol( output_npol ); get_output()->set_ndim( output_ndim ); get_output()->resize( get_input()->get_ndat() ); + get_output()->set_zeroed_data (input->get_zeroed_data()); } else if (reshape) { @@ -222,9 +223,12 @@ void dsp::Detection::square_law () cerr << "dsp::Detection::square_law" << endl; if (engine) - throw Error (InvalidState, "dsp::Detection::square_law", - "square law detection not yet implemented for the GPU"); - + { + if (verbose) + cerr << "dsp::Detection::square_law using Engine engine=" << (void *) engine << endl; + engine->square_law (input, output); + return; + } const unsigned nchan = input->get_nchan(); const unsigned npol = input->get_npol(); diff --git a/Signal/General/DetectionCUDA.cu b/Signal/General/DetectionCUDA.cu index 3b305026..985484ed 100644 --- a/Signal/General/DetectionCUDA.cu +++ b/Signal/General/DetectionCUDA.cu @@ -22,6 +22,7 @@ using namespace std; void check_error (const char*); +void check_error_stream (const char*, cudaStream_t); /* PP = p^* p @@ -172,3 +173,116 @@ void CUDA::DetectionEngine::polarimetry (unsigned ndim, check_error ("CUDA::DetectionEngine::polarimetry"); } +// dubiuous about the correctness here... TODO AJ +__global__ void sqld_tfp (float2 *base_in, unsigned stride_in, + float * base_out, unsigned stride_out, unsigned ndat) +{ + // input and output pointers for channel (y dim) + float2 * in = base_in + (blockIdx.y * stride_in); + float * out = base_out + (blockIdx.y * stride_out); + + unsigned i = blockIdx.x * blockDim.x + threadIdx.x; + + out[i] = in[i].x * in[i].x + in[i].y * in[i].y; +} + +__global__ void sqld_fpt (float2 *base_in, float *base_out, uint64_t ndat) +{ + // set base pointer for ichan [blockIdx.y], input complex, output detected, npol 1 + float2 * in = base_in + (blockIdx.y * ndat); + float * out = base_out + (blockIdx.y * ndat); + + // the sample for the channel + unsigned i = blockIdx.x * blockDim.x + threadIdx.x; + + out[i] = in[i].x * in[i].x + in[i].y * in[i].y; +} + +void CUDA::DetectionEngine::square_law (const dsp::TimeSeries* input, + dsp::TimeSeries* output) +{ + uint64_t ndat = input->get_ndat (); + unsigned nchan = input->get_nchan (); + unsigned ndim = input->get_ndim(); + unsigned npol = input->get_npol(); + + if (ndim != 2) + throw Error (InvalidParam, "CUDA::DetectionEngine::square_law", + "cannot handle ndim=%u != 2", ndim); + + if (npol != 1) + throw Error (InvalidParam, "CUDA::DetectionEngine::square_law", + "cannot handle npol=%u != 1", ndim); + + if (input == output) + throw Error (InvalidParam, "CUDA::DetectionEngine::square_law" + "cannot handle in-place data"); + +/* + if (input->get_order() == dsp::TimeSeries::OrderTFP) + cerr << "CUDA::DetectionEngine::square_law input->get_order=TFP" << endl; + if (output->get_order() == dsp::TimeSeries::OrderTFP) + cerr << "CUDA::DetectionEngine::square_law output->get_order=TFP" << endl; +*/ + + switch (input->get_order()) + { + case dsp::TimeSeries::OrderTFP: + { + dim3 threads (512); + dim3 blocks (ndat/threads.x, nchan); + + if (ndat % threads.x) + blocks.x ++; + + float2* base_in = (float2*) input->get_dattfp (); + float* base_out = output->get_dattfp(); + + unsigned stride_in = nchan * npol; + unsigned stride_out = nchan * npol; + + if (dsp::Operation::verbose) + cerr << "CUDA::DetectionEngine::square_law sqld_tfp ndat=" << ndat + << " stride_in=" << stride_in << " stride_out=" << stride_out << endl; + + sqld_tfp<<>> (base_in, stride_in, base_out, stride_out, ndat); + + if (dsp::Operation::record_time || dsp::Operation::verbose) + check_error_stream ("CUDA::DetectionEngine::square_law sqld_tfp", stream); + + break; + } + + case dsp::TimeSeries::OrderFPT: + { + dim3 threads (512); + dim3 blocks (ndat/threads.x, nchan); + + if (ndat % threads.x) + blocks.x ++; + + unsigned ichan = 0; + unsigned ipol = 0; + float2* base_in = (float2*) input->get_datptr(ichan, ipol); + float* base_out = output->get_datptr(ichan, ipol); + + if (dsp::Operation::verbose) + cerr << "CUDA::DetectionEngine::square_law <<>> " + << " base_in=" << (void *) base_in + << " base_out=" << (void *) base_out + << " ndat=" << ndat << endl; + + sqld_fpt<<>> (base_in, base_out, ndat); + + if (dsp::Operation::record_time || dsp::Operation::verbose) + check_error_stream ("CUDA::DetectionEngine::square_law sqld_fpt", stream); + + break; + } + + default: + { + throw Error (InvalidState, "CUDA::DetectionEngine::square_law", "unrecognized order"); + } + } +} diff --git a/Signal/General/Filterbank.C b/Signal/General/Filterbank.C index d9935173..3e751af8 100644 --- a/Signal/General/Filterbank.C +++ b/Signal/General/Filterbank.C @@ -205,21 +205,6 @@ void dsp::Filterbank::make_preparations () "matrix convolution and input.npol != 2"); } - if (passband) - { - if (response) - passband -> match (response); - - unsigned passband_npol = input->get_npol(); - if (matrix_convolution) - passband_npol = 4; - - passband->resize (passband_npol, input->get_nchan(), n_fft, 1); - - if (!response) - passband->match (input); - } - if (has_buffering_policy()) { if (verbose) @@ -239,6 +224,22 @@ void dsp::Filterbank::make_preparations () return; } + // the engine should delete the passband if it doesn't support this feature + if (passband) + { + if (response) + passband -> match (response); + + unsigned passband_npol = input->get_npol(); + if (matrix_convolution) + passband_npol = 4; + + passband->resize (passband_npol, input->get_nchan(), n_fft, 1); + + if (!response) + passband->match (input); + } + using namespace FTransform; OptimalFFT* optimal = 0; diff --git a/Signal/General/FilterbankCUDA.cu b/Signal/General/FilterbankCUDA.cu index 4fab005c..9986105e 100644 --- a/Signal/General/FilterbankCUDA.cu +++ b/Signal/General/FilterbankCUDA.cu @@ -77,6 +77,9 @@ void CUDA::FilterbankEngine::setup (dsp::Filterbank* filterbank) float2** d_kernel_ptr = reinterpret_cast(filterbank->get_d_kernel_gpu_ptr()); d_kernel = *d_kernel_ptr; + // the CUDA engine does not maintain/compute the passband + filterbank->set_passband (NULL); + freq_res = filterbank->get_freq_res (); nchan_subband = filterbank->get_nchan_subband(); diff --git a/Signal/General/SingleThread.C b/Signal/General/SingleThread.C index 6f74de32..6af57a36 100644 --- a/Signal/General/SingleThread.C +++ b/Signal/General/SingleThread.C @@ -284,16 +284,16 @@ void dsp::SingleThread::construct () try unpacked->set_memory (new CUDA::PinnedMemory); - TransferCUDA* transfer; + TransferCUDA* transfer = new TransferCUDA (stream); if (config->use_input_stream) { + if (Operation::verbose) + cerr << "SingleThread: setting input stream" << endl; // Create an event that signals the completion of the CUDA transfer cudaEventCreate( reinterpret_cast(&input_event) ); - transfer = new TransferCUDA (stream, - static_cast(input_stream), static_cast(input_event)); + transfer->set_input_stream(static_cast(input_stream), + static_cast(input_event)); } - else - transfer = new TransferCUDA (stream); transfer->set_kind( cudaMemcpyHostToDevice ); transfer->set_input( unpacked ); diff --git a/Signal/General/TransferCUDA.C b/Signal/General/TransferCUDA.C index c5de21c2..98023b68 100644 --- a/Signal/General/TransferCUDA.C +++ b/Signal/General/TransferCUDA.C @@ -19,17 +19,7 @@ dsp::TransferCUDA::TransferCUDA(cudaStream_t _stream) stream = _stream; input_stream = _stream; kind = cudaMemcpyHostToDevice; -} - -//! Associate cudaEvent with the transfer -dsp::TransferCUDA::TransferCUDA(cudaStream_t _stream, - cudaStream_t _input_stream, cudaEvent_t _event) - : Transformation ("CUDA::Transfer", outofplace) -{ - stream = _stream; - input_stream = _input_stream; - kind = cudaMemcpyHostToDevice; - event = _event; + event = 0; } //! Do stuff @@ -62,7 +52,8 @@ void dsp::TransferCUDA::transformation () input->internal_get_size(), kind, input_stream); - cudaEventRecord(event, input_stream); + if (event) + cudaEventRecord(event, input_stream); } else error = cudaMemcpy (output->internal_get_buffer(), @@ -89,3 +80,9 @@ void dsp::TransferCUDA::prepare () output->internal_match( input ); output->copy_configuration( input ); } + +void dsp::TransferCUDA::set_input_stream (cudaStream_t _input_stream, cudaEvent_t _event) +{ + input_stream = _input_stream; + event = _event; +} diff --git a/Signal/General/dsp/Detection.h b/Signal/General/dsp/Detection.h index 092b410b..c5d09e46 100644 --- a/Signal/General/dsp/Detection.h +++ b/Signal/General/dsp/Detection.h @@ -103,6 +103,9 @@ namespace dsp { public: virtual void polarimetry (unsigned ndim, const TimeSeries* in, TimeSeries* out) = 0; + + virtual void square_law (const dsp::TimeSeries* input, + dsp::TimeSeries* output) = 0; }; } diff --git a/Signal/General/dsp/DetectionCUDA.h b/Signal/General/dsp/DetectionCUDA.h index 8f1a53d1..d2d8894a 100644 --- a/Signal/General/dsp/DetectionCUDA.h +++ b/Signal/General/dsp/DetectionCUDA.h @@ -28,6 +28,8 @@ namespace CUDA void polarimetry (unsigned ndim, const dsp::TimeSeries* in, dsp::TimeSeries* out); + void square_law (const dsp::TimeSeries* in, dsp::TimeSeries* out); + protected: cudaStream_t stream; diff --git a/Signal/General/dsp/TransferCUDA.h b/Signal/General/dsp/TransferCUDA.h index bdc3506c..67f8a59b 100644 --- a/Signal/General/dsp/TransferCUDA.h +++ b/Signal/General/dsp/TransferCUDA.h @@ -23,13 +23,13 @@ namespace dsp { //! Default constructor - always out of place TransferCUDA(cudaStream_t _stream); - //! Constructor with input stream and completion event - TransferCUDA(cudaStream_t _stream, cudaStream_t _input_stream, - cudaEvent_t _event); - void set_kind (cudaMemcpyKind k) { kind = k; } void prepare (); + // If transferring all input in its own stream, need the stream and an event + // signaling transfer completion + void set_input_stream (cudaStream_t _input_stream, cudaEvent_t _event); + Operation::Function get_function () const { return Operation::Structural; } protected: diff --git a/Signal/Pulsar/FoldCUDA.cu b/Signal/Pulsar/FoldCUDA.cu index 23d36628..342b8551 100644 --- a/Signal/Pulsar/FoldCUDA.cu +++ b/Signal/Pulsar/FoldCUDA.cu @@ -177,8 +177,11 @@ void CUDA::FoldEngine::send_binplan () cudaError error; if (stream) + { error = cudaMemcpyAsync (d_bin, binplan, mem_size, cudaMemcpyHostToDevice, stream); + cudaStreamSynchronize(stream); + } else error = cudaMemcpy (d_bin, binplan, mem_size, cudaMemcpyHostToDevice); diff --git a/Signal/Pulsar/LoadToFold1.C b/Signal/Pulsar/LoadToFold1.C index a3e45bb1..4ac12c51 100644 --- a/Signal/Pulsar/LoadToFold1.C +++ b/Signal/Pulsar/LoadToFold1.C @@ -689,6 +689,30 @@ void dsp::LoadToFold::prepare_fold () fold_prepared = true; } +MJD dsp::LoadToFold::parse_epoch (const std::string& epoch_string) +{ + MJD epoch; + + if (epoch_string == "start") + { + epoch = manager->get_info()->get_start_time(); + epoch += manager->get_input()->tell_seconds(); + + if (Operation::verbose) + cerr << "dsp::LoadToFold::parse reference epoch=start_time=" + << epoch.printdays(13) << endl; + } + else if (!epoch_string.empty()) + { + epoch = MJD( epoch_string ); + if (Operation::verbose) + cerr << "dsp::LoadToFold::parse reference epoch=" + << epoch.printdays(13) << endl; + } + + return epoch; +} + void dsp::LoadToFold::prepare () { assert (fold.size() > 0); @@ -759,24 +783,7 @@ void dsp::LoadToFold::prepare () excision -> set_cutoff_sigma ( config->excision_cutoff ); } - MJD reference_epoch; - - if (config->reference_epoch == "start") - { - reference_epoch = manager->get_info()->get_start_time(); - reference_epoch += manager->get_input()->tell_seconds(); - - if (Operation::verbose) - cerr << "dsp::LoadToFold::prepare reference epoch=start_time=" - << reference_epoch.printdays(13) << endl; - } - else if (!config->reference_epoch.empty()) - { - reference_epoch = MJD( config->reference_epoch ); - if (Operation::verbose) - cerr << "dsp::LoadToFold::prepare reference epoch=" - << reference_epoch.printdays(13) << endl; - } + MJD fold_reference_epoch = parse_epoch (config->reference_epoch); for (unsigned ifold=0; ifold < fold.size(); ifold++) { @@ -791,7 +798,7 @@ void dsp::LoadToFold::prepare () fold[ifold]->get_output()->set_extensions (extensions); } - fold[ifold]->set_reference_epoch (reference_epoch); + fold[ifold]->set_reference_epoch (fold_reference_epoch); } SingleThread::prepare (); @@ -1064,6 +1071,9 @@ void dsp::LoadToFold::build_fold (Reference::To& fold, if (config->minimum_integration_length > 0) unloader->set_minimum_integration_length (config->minimum_integration_length); + + MJD reference_epoch = parse_epoch (config->integration_reference_epoch); + subfold -> set_subint_reference_epoch( reference_epoch ); } else { @@ -1101,7 +1111,8 @@ catch (Error& error) throw error += "dsp::LoadToFold::build_fold"; } -void dsp::LoadToFold::configure_detection (Detection* detect, int noperations) +void dsp::LoadToFold::configure_detection (Detection* detect, + unsigned noperations) { #if HAVE_CUDA bool run_on_gpu = thread_id < config->get_cuda_ndevice() diff --git a/Signal/Pulsar/TimeDivide.C b/Signal/Pulsar/TimeDivide.C index 86285bde..1bc2a363 100644 --- a/Signal/Pulsar/TimeDivide.C +++ b/Signal/Pulsar/TimeDivide.C @@ -58,8 +58,15 @@ void dsp::TimeDivide::set_start_time (MJD _start_time) start_phase = Phase::zero; is_valid = false; - if( division_seconds && division_seconds == unsigned(division_seconds) && - integer_division_seconds_boundaries) + if( reference_epoch != MJD::zero ) + { + if (Operation::verbose) + cerr << "dsp::TimeDivide::set_start_time set to reference_epoch=" + << reference_epoch.printall() << endl; + + start_time = reference_epoch; + } + else if( division_seconds && division_seconds == unsigned(division_seconds) ) { unsigned integer_seconds = unsigned(division_seconds); unsigned seconds = start_time.get_secs(); @@ -98,9 +105,11 @@ void dsp::TimeDivide::set_predictor (const Pulsar::Predictor* _poly) return; poly = _poly; + period = 0.0; division_seconds = 0; } + //! Set the reference phase (phase of bin zero) void dsp::TimeDivide::set_reference_phase (double phase) { @@ -455,10 +464,10 @@ void dsp::TimeDivide::set_boundaries (const MJD& input_start) // division length specified in turns - if (!poly) + if (!poly && period == 0.0) throw Error (InvalidState, "dsp::TimeDivide::set_boundaries", "division length specified in turns " - "but no folding Pulsar::Predictor"); + "but no folding predictor or period"); if (Operation::verbose) cerr << "dsp::TimeDivide::set_boundaries using polynomial:\n" diff --git a/Signal/Pulsar/dsp/LoadToFold1.h b/Signal/Pulsar/dsp/LoadToFold1.h index aaf33688..03d6b37c 100644 --- a/Signal/Pulsar/dsp/LoadToFold1.h +++ b/Signal/Pulsar/dsp/LoadToFold1.h @@ -150,7 +150,7 @@ namespace dsp { void build_fold (TimeSeries*); void build_fold (Reference::To&, PhaseSeriesUnloader*); void configure_fold (unsigned ifold, TimeSeries* to_fold); - void configure_detection (Detection*, int); + void configure_detection (Detection*, unsigned); PhaseSeriesUnloader* get_unloader (unsigned ifold); size_t get_nfold (); @@ -162,6 +162,8 @@ namespace dsp { //! Prepare the given Archiver void prepare_archiver (Archiver*); + //! Parse the epoch string into a reference epoch + MJD parse_epoch (const std::string&); }; } diff --git a/Signal/Pulsar/dsp/LoadToFoldConfig.h b/Signal/Pulsar/dsp/LoadToFoldConfig.h index d105c559..4a81436a 100644 --- a/Signal/Pulsar/dsp/LoadToFoldConfig.h +++ b/Signal/Pulsar/dsp/LoadToFoldConfig.h @@ -151,6 +151,9 @@ namespace dsp { // length of sub-integrations in seconds double integration_length; + // reference epoch = start of first sub-integration + std::string integration_reference_epoch; + // minimum sub-integration length written to disk double minimum_integration_length; diff --git a/Signal/Pulsar/dsp/Subint.h b/Signal/Pulsar/dsp/Subint.h index 1aac4873..4a7acc70 100644 --- a/Signal/Pulsar/dsp/Subint.h +++ b/Signal/Pulsar/dsp/Subint.h @@ -75,6 +75,14 @@ namespace dsp { //! Get the number of seconds to fold into each sub-integration double get_subint_seconds () const { return divider.get_seconds(); } + //! Set the start time of the first sub-integration + void set_subint_reference_epoch (const MJD& epoch) + { divider.set_reference_epoch (epoch); } + + //! Get the start time of the first sub-integration + MJD get_subint_reference_epoch () const + { return divider.get_reference_epoch (); } + //! Set the number of turns to fold into each sub-integration void set_subint_turns (unsigned subint_turns) { divider.set_turns (subint_turns); } diff --git a/Signal/Pulsar/dsp/TimeDivide.h b/Signal/Pulsar/dsp/TimeDivide.h index c42cfa65..03adbd68 100644 --- a/Signal/Pulsar/dsp/TimeDivide.h +++ b/Signal/Pulsar/dsp/TimeDivide.h @@ -49,6 +49,12 @@ namespace dsp { //! Get the number of seconds in each division double get_seconds () const { return division_seconds; } + //! Set the reference epoch (start time of first division) + void set_reference_epoch (const MJD& epoch) { reference_epoch = epoch; } + + //! Set the reference epoch (start time of first division) + MJD get_reference_epoch () const { return reference_epoch; } + //! Set the number of turns in each division void set_turns (double division_turns); @@ -61,6 +67,12 @@ namespace dsp { //! Get the Pulsar::Predictor used to determine pulse phase const Pulsar::Predictor* get_predictor () const { return poly; } + //! Set the folding period used to determine pulse phase + void set_period (double); + + //! Set the folding period used to determine pulse phase + double get_period () const { return period; } + //! Set the reference phase (phase of bin zero) void set_reference_phase (double phase); @@ -126,6 +138,9 @@ namespace dsp { //! Number of seconds in each division double division_seconds; + //! Reference epoch at start of the first division + MJD reference_epoch; + //! Number of turns in each division double division_turns; @@ -138,6 +153,9 @@ namespace dsp { //! Round division boundaries to integer numbers of division_seconds bool integer_division_seconds_boundaries; + //! The period used to determine pulse phase + double period; + //! The Pulsar::Predictor used to determine pulse phase Reference::To poly; diff --git a/Signal/Pulsar/dspsr.C b/Signal/Pulsar/dspsr.C index e0c8ac28..73f2d0b6 100644 --- a/Signal/Pulsar/dspsr.C +++ b/Signal/Pulsar/dspsr.C @@ -436,6 +436,9 @@ void parse_options (int argc, char** argv) try arg = menu.add (config->integration_length, 'L', "seconds"); arg->set_help ("create integrations of specified duration"); + arg = menu.add (config->integration_reference_epoch, "Lepoch", "MJD"); + arg->set_help ("start time of first sub-integration (when -L is used)"); + arg = menu.add (config->minimum_integration_length, "Lmin", "seconds"); arg->set_help ("minimum integration length output"); diff --git a/configure.in b/configure.in index 32df2ca2..33c6ef51 100644 --- a/configure.in +++ b/configure.in @@ -164,6 +164,7 @@ AC_CONFIG_FILES([Makefile Kernel/Formats/fits/Makefile Kernel/Formats/gmrt/Makefile Kernel/Formats/guppi/Makefile + Kernel/Formats/kat/Makefile Kernel/Formats/lbadr/Makefile Kernel/Formats/lbadr64/Makefile Kernel/Formats/lofar_dal/Makefile