casacore
Loading...
Searching...
No Matches
casacore::MultiFile Class Reference

More...

#include <MultiFile.h>

Public Member Functions

 MultiFile (const String &name, ByteIO::OpenOption, Int blockSize=0, Bool useODirect=False, Bool useCRC=False)
 Open or create a MultiFile with the given name.
 
 MultiFile (const String &name, const std::shared_ptr< MultiFileBase > &parent, ByteIO::OpenOption, Int blockSize=0)
 Open or create a MultiFile with the given name which is nested in the given parent.
 
 ~MultiFile () override
 The destructor flushes and closes the file.
 
 MultiFile (const MultiFile &)=delete
 Copy constructor and assignment not possible.
 
MultiFileoperator= (const MultiFile &)=delete
 
std::shared_ptr< MultiFileBasemakeNested (const std::shared_ptr< MultiFileBase > &parent, const String &name, ByteIO::OpenOption, Int blockSize) const override
 Make a nested MultiFile.
 
void reopenRW () override
 Reopen the underlying file for read/write access.
 
void fsync () override
 Fsync the file (i.e., force the data to be physically written).
 
void show (std::ostream &) const
 Show some info.
 
- Public Member Functions inherited from casacore::MultiFileBase
 MultiFileBase (const String &name, Int blockSize, Bool useODirect)
 Create a MultiFileBase object with the given name.
 
virtual ~MultiFileBase ()
 The destructor flushes dirty blocks and closes the container file.
 
 MultiFileBase (const MultiFileBase &)=delete
 Forbid copy constructor.
 
MultiFileBaseoperator= (const MultiFileBase &)=delete
 Forbid assignment.
 
String fileName () const
 Get the file name of the MultiFileBase container file.
 
Bool isWritable () const
 Is the container file writable?
 
Int openFile (const String &name)
 Open the given logical file and return its file id.
 
Int createFile (const String &name, ByteIO::OpenOption=ByteIO::New)
 Create a new logical file and return its file id.
 
void flushFile (Int fileId)
 Flush the possible dirty buffer of the given logical file.
 
void closeFile (Int fileId)
 Close a logical file.
 
void deleteFile (Int fileId)
 Delete a logical file.
 
Int64 fileSize (Int fileId) const
 Get the size of a logical file.
 
Int64 read (Int fileId, void *buffer, Int64 size, Int64 offset)
 Read a block at the given offset in the logical file.
 
Int64 write (Int fileId, const void *buffer, Int64 size, Int64 offset)
 Write a block at the given offset in the logical file.
 
void truncate (Int fileId, Int64 size)
 Truncate the logical file to the given size.
 
void flush ()
 Flush the file by writing all dirty data and all header info.
 
Int64 blockSize () const
 Get the block size used.
 
uInt nfile () const
 Get the nr of logical files.
 
Int64 nblock () const
 Get the total nr of data blocks used.
 
const std::vector< MultiFileInfo > & info () const
 Get the info object (for test purposes mainly).
 
const std::vector< Int64 > & freeBlocks () const
 Get the free blocks (for test purposes mainly).
 
Int fileId (const String &name, Bool throwExcp=True) const
 Return the file id of a file in the MultiFileBase object.
 
Bool useODirect () const
 Is O_DIRECT used?
 

Static Public Member Functions

static std::vector< Int64packIndex (const std::vector< Int64 > &blockNrs)
 Compress a block index by looking for subsequent block numbers.
 
static std::vector< Int64unpackIndex (const std::vector< Int64 > &blockNrs)
 Decompress a block index by inserting subsequent block numbers.
 
- Static Public Member Functions inherited from casacore::MultiFileBase
static std::shared_ptr< MultiFileBaseopenMF (const String &fileName)
 Open the correct MultiFileBase (as plain or HDF5).
 

Private Member Functions

void init (ByteIO::OpenOption option)
 Initialize the MultiFile object.
 
void getInfoVersion2 (Int64 contBlockNr, CanonicalIO &aio)
 Read the file info for the new version 2.
 
void writeVector (CanonicalIO &cio, const std::vector< Int64 > &index)
 Write a vector of Int64.
 
void writeVector (CanonicalIO &cio, const std::vector< uInt > &index)
 
void readVector (CanonicalIO &cio, std::vector< Int64 > &index)
 Read a vector of Int64.
 
void readVector (CanonicalIO &cio, std::vector< uInt > &index)
 
void writeRemainder (MemoryIO &mio, CanonicalIO &, MultiFileBuffer &mfbuf)
 Write the remainder of the header (in case exceeding 1 block).
 
void readRemainder (Int64 headerSize, Int64 blockNr, std::vector< char > &buf)
 Read the remainder of the header into the buffer.
 
void truncateIfNeeded ()
 Truncate the file if blocks are freed at the end.
 
virtual void writeHeaderShow (Int64 ncont, Int64 todo) const
 Header writing hooks (meant for derived test classes).
 
virtual void writeHeaderTest ()
 

Additional Inherited Members

- Protected Member Functions inherited from casacore::MultiFileBase
void resync ()
 Resync with another process by clearing the buffers and rereading the header.
 
void setNewFile ()
 Set the flags and blockSize for a new MultiFile/HDF5.
 
- Protected Attributes inherited from casacore::MultiFileBase
String itsName
 
Int64 itsBlockSize
 
Int64 itsNrBlock
 
Int64 itsHdrCounter
 
std::vector< MultiFileInfoitsInfo
 
std::shared_ptr< MultiFileBufferitsBuffer
 
Bool itsUseODirect
 
Bool itsWritable
 
Bool itsChanged
 
std::vector< Int64itsFreeBlocks
 

Detailed Description


Class to combine multiple files in a single one.

Intended use:

Public interface

Review Status

Test programs:
tMultiFile

Synopsis

This class (derived from MultiFileBase) is a container file holding multiple virtual files in a regular file. It is primarily meant as a container file for the storage manager files of a table to reduce the number of files used (especially for Lustre) and to reduce the number of open files (especially when concatenating tables).
MultiFile has the following properties:

  • It can choose an IO buffer size that matches the file system well (e.g., to support a large buffer size on ZFS or Lustre).
  • O_DIRECT (if supported by the OS) can be used to tell the OS kernel to bypass its file cache. It does not speed up the I/O, but it makes I/O behaviour more predictable which a real-time system might need.
  • Often the data to be read from MultiFile will not exactly match the block size and offset. MultiFile will buffer the data and copy the part that is needed (similar to stdio). However, when matching block size and offset are used, data will directly be read into the user's buffer to achieve zero-copy behaviour.
  • It is possible to nest MultiFile's. Thus a MultiFile can be a file in a parent MultiFile. In this way it is easily possible to store a main table and its subtables (such as an MS) in a single file.
  • Optionally each block is stored with a 32-bit CRC to check if the data in a block are correctly read. The CRC values are stored as part of the header, thus not in each individual block. This is done to make the zero-copy behaviour possible (as described above).
  • The header and the index are stored in the first block. If too large, continuation blocks are used. There are two sets of continuation blocks between which is alternated. This is done for robustness purposes; there is always a valid one in case of a crash in the middle of writing the continuation blocks. Note that the first header block is written after the continuation blocks, so it always points to a valid set of continuation blocks.

The SetupNewTable constructor has a StorageOption argument to define if a MultiFile has to be used and if so, the buffer size to use. It is also possible to specify that through aipsrc variables.

A virtual file is spread over multiple (fixed size) data blocks in the MultiFile. A data block is never shared by multiple files. For each virtual file MultiFile keeps a MultiFileInfo object telling the file size and the block numbers used for the file. When flushing the MultiFile, this meta info is written into the header block. If it does not fit in the header block, the rest is written in continuation blocks. On open and resync, it is read back. There are two sets of continuation blocks which are alternately used when the header is written. This is done to have a valid header in case of a crash in the middle of writing the header.

A virtual file is represented by an MFFileIO object, which is derived from ByteIO and as such part of the casacore IO framework. It makes it possible for applications to access a virtual file in the same way as a regular file.

It is possible to delete a virtual file. Its blocks will be added to the free block list (which is also stored in the meta info). The MultiFile is truncated when blocks are deleted at the end of the file.

Example

In principle it is possible to use the MultiFile functions directly. However, in general it is much easier to use an MFFileIO object per virtual file as shown below.

// Create a new MultiFile using a block size of 1 MB.
std::shared_ptr<MultiFileBase> mfile
(new MultiFile("file.mf", ByteIO::New, 1048576));
// Create a virtual file in it.
MFFileIO mf1(mfile, "mf1", ByteIO::New);
// Use it (for example) as the sink of AipsIO.
AipsIO stream (&mf1);
// Write values.
stream << (Int)10;
stream << True;
// Seek to beginning of file and read data in.
stream.setpos (0);
Int vali;
Bool valb;
stream >> vali >> valb;
@ New
read/write; create file if not exist.
Definition ByteIO.h:70
MultiFile(const String &name, ByteIO::OpenOption, Int blockSize=0, Bool useODirect=False, Bool useCRC=False)
Open or create a MultiFile with the given name.
int Int
Definition aipstype.h:48
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
const Bool True
Definition aipstype.h:41

To Do

  • MultiFile can be optimized how cont.blocks are used. In case of file truncation, it could check if only cont.blocks are present after the blocks to be removed. In such a case they can be moved backwards. Also the nr of cont.blocks can shrink. In such a case the unused blocks are not added to the free list. Only the nr of actually used cont.blocks is decremented. They could be added to the free list later. The reason for above is that the free list is written into the header blocks before the required nr of continuation blocks is known.
  • Keep a journal file telling which files are created and which blocks are allocated for a virtual file.

Definition at line 146 of file MultiFile.h.

Constructor & Destructor Documentation

◆ MultiFile() [1/3]

casacore::MultiFile::MultiFile ( const String & name,
ByteIO::OpenOption ,
Int blockSize = 0,
Bool useODirect = False,
Bool useCRC = False )
explicit

Open or create a MultiFile with the given name.

Upon creation the block size can be given. If 0, it uses the block size of the file system the file is on.
If useODirect=True, the O_DIRECT flag is used (if supported). It tells the kernel to bypass its file cache to have more predictable I/O behaviour.
If useCRC=True, 32-bit CRC values are calculated and stored for each data block. Note that useCRC is only used for new files.

◆ MultiFile() [2/3]

casacore::MultiFile::MultiFile ( const String & name,
const std::shared_ptr< MultiFileBase > & parent,
ByteIO::OpenOption ,
Int blockSize = 0 )
explicit

Open or create a MultiFile with the given name which is nested in the given parent.

Thus data are read/written in the parent file. Upon creation the block size can be given. If 0, it uses the block size of the parent.

◆ ~MultiFile()

casacore::MultiFile::~MultiFile ( )
override

The destructor flushes and closes the file.

◆ MultiFile() [3/3]

casacore::MultiFile::MultiFile ( const MultiFile & )
delete

Copy constructor and assignment not possible.

Member Function Documentation

◆ fsync()

void casacore::MultiFile::fsync ( )
overridevirtual

Fsync the file (i.e., force the data to be physically written).

Implements casacore::MultiFileBase.

◆ getInfoVersion2()

void casacore::MultiFile::getInfoVersion2 ( Int64 contBlockNr,
CanonicalIO & aio )
private

Read the file info for the new version 2.

◆ init()

void casacore::MultiFile::init ( ByteIO::OpenOption option)
private

Initialize the MultiFile object.

◆ makeNested()

std::shared_ptr< MultiFileBase > casacore::MultiFile::makeNested ( const std::shared_ptr< MultiFileBase > & parent,
const String & name,
ByteIO::OpenOption ,
Int blockSize ) const
overridevirtual

Make a nested MultiFile.

Implements casacore::MultiFileBase.

◆ operator=()

MultiFile & casacore::MultiFile::operator= ( const MultiFile & )
delete

◆ packIndex()

static std::vector< Int64 > casacore::MultiFile::packIndex ( const std::vector< Int64 > & blockNrs)
static

Compress a block index by looking for subsequent block numbers.

◆ readRemainder()

void casacore::MultiFile::readRemainder ( Int64 headerSize,
Int64 blockNr,
std::vector< char > & buf )
private

Read the remainder of the header into the buffer.

◆ readVector() [1/2]

void casacore::MultiFile::readVector ( CanonicalIO & cio,
std::vector< Int64 > & index )
private

Read a vector of Int64.

◆ readVector() [2/2]

void casacore::MultiFile::readVector ( CanonicalIO & cio,
std::vector< uInt > & index )
private

◆ reopenRW()

void casacore::MultiFile::reopenRW ( )
overridevirtual

Reopen the underlying file for read/write access.

Nothing will be done if the file is writable already. Otherwise it will be reopened and an exception will be thrown if it is not possible to reopen it for read/write access.

Implements casacore::MultiFileBase.

◆ show()

void casacore::MultiFile::show ( std::ostream & ) const

Show some info.

◆ truncateIfNeeded()

void casacore::MultiFile::truncateIfNeeded ( )
private

Truncate the file if blocks are freed at the end.

◆ unpackIndex()

static std::vector< Int64 > casacore::MultiFile::unpackIndex ( const std::vector< Int64 > & blockNrs)
static

Decompress a block index by inserting subsequent block numbers.

◆ writeHeaderShow()

virtual void casacore::MultiFile::writeHeaderShow ( Int64 ncont,
Int64 todo ) const
privatevirtual

Header writing hooks (meant for derived test classes).

◆ writeHeaderTest()

virtual void casacore::MultiFile::writeHeaderTest ( )
privatevirtual

◆ writeRemainder()

void casacore::MultiFile::writeRemainder ( MemoryIO & mio,
CanonicalIO & ,
MultiFileBuffer & mfbuf )
private

Write the remainder of the header (in case exceeding 1 block).

iobuf should be large enough

◆ writeVector() [1/2]

void casacore::MultiFile::writeVector ( CanonicalIO & cio,
const std::vector< Int64 > & index )
private

Write a vector of Int64.

◆ writeVector() [2/2]

void casacore::MultiFile::writeVector ( CanonicalIO & cio,
const std::vector< uInt > & index )
private

The documentation for this class was generated from the following file: