[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

multi_array_chunked_hdf5.hxx
1/************************************************************************/
2/* */
3/* Copyright 2012-2014 by Ullrich Koethe and Thorben Kroeger */
4/* */
5/* This file is part of the VIGRA computer vision library. */
6/* The VIGRA Website is */
7/* http://hci.iwr.uni-heidelberg.de/vigra/ */
8/* Please direct questions, bug reports, and contributions to */
9/* ullrich.koethe@iwr.uni-heidelberg.de or */
10/* vigra@informatik.uni-hamburg.de */
11/* */
12/* Permission is hereby granted, free of charge, to any person */
13/* obtaining a copy of this software and associated documentation */
14/* files (the "Software"), to deal in the Software without */
15/* restriction, including without limitation the rights to use, */
16/* copy, modify, merge, publish, distribute, sublicense, and/or */
17/* sell copies of the Software, and to permit persons to whom the */
18/* Software is furnished to do so, subject to the following */
19/* conditions: */
20/* */
21/* The above copyright notice and this permission notice shall be */
22/* included in all copies or substantial portions of the */
23/* Software. */
24/* */
25/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27/* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28/* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29/* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30/* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31/* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32/* OTHER DEALINGS IN THE SOFTWARE. */
33/* */
34/************************************************************************/
35
36#ifndef VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
37#define VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
38
39#include <queue>
40
41#include "multi_array_chunked.hxx"
42#include "hdf5impex.hxx"
43
44// Bounds checking Macro used if VIGRA_CHECK_BOUNDS is defined.
45#ifdef VIGRA_CHECK_BOUNDS
46#define VIGRA_ASSERT_INSIDE(diff) \
47 vigra_precondition(this->isInside(diff), "Index out of bounds")
48#else
49#define VIGRA_ASSERT_INSIDE(diff)
50#endif
51
52namespace vigra {
53
54/** \addtogroup ChunkedArrayClasses
55*/
56//@{
57
58/** \weakgroup ParallelProcessing
59 \sa ChunkedArrayHDF5
60*/
61
62/** Implement ChunkedArray as a chunked dataset in an HDF5 file.
63
64 <b>\#include</b> <vigra/multi_array_chunked_hdf5.hxx> <br/>
65 Namespace: vigra
66
67 This uses the native chunking and compression functionality provided by the
68 HDF5 library. Note: This file must only be included when the HDF5 headers
69 and libraries are installed on the system.
70*/
71template <unsigned int N, class T, class Alloc = std::allocator<T> >
73: public ChunkedArray<N, T>
74{
75 /* REMARKS
76 Alternatives are:
77 * Back chunks by HDF5 chunks, possibly using on-the-fly compression. This
78 is in particular useful for existing HDF5 files.
79 * Back chunks by HDF5 datasets. This can be combined with compression
80 (both explicit and on-the-fly) or with memory mapping (using the
81 function H5Dget_offset() to get the offset from the beginning of the file).
82 */
83
84 public:
85
86 class Chunk
87 : public ChunkBase<N, T>
88 {
89 public:
90 typedef typename MultiArrayShape<N>::type shape_type;
91 typedef T value_type;
92 typedef value_type * pointer;
93 typedef value_type & reference;
94
95 Chunk(shape_type const & shape, shape_type const & start,
96 ChunkedArrayHDF5 * array, Alloc const & alloc)
97 : ChunkBase<N, T>(detail::defaultStride(shape))
98 , shape_(shape)
99 , start_(start)
100 , array_(array)
101 , alloc_(alloc)
102 {}
103
104 ~Chunk()
105 {
106 write();
107 }
108
109 std::size_t size() const
110 {
111 return prod(shape_);
112 }
113
114 void write(bool deallocate = true)
115 {
116 if(this->pointer_ != 0)
117 {
118 if(!array_->file_.isReadOnly())
119 {
120 herr_t status = array_->file_.writeBlock(array_->dataset_, start_,
121 MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
122 vigra_postcondition(status >= 0,
123 "ChunkedArrayHDF5: write to dataset failed.");
124 }
125 if(deallocate)
126 {
127 alloc_.deallocate(this->pointer_, this->size());
128 this->pointer_ = 0;
129 }
130 }
131 }
132
133 pointer read()
134 {
135 if(this->pointer_ == 0)
136 {
137 this->pointer_ = alloc_.allocate(this->size());
138 herr_t status = array_->file_.readBlock(array_->dataset_, start_, shape_,
139 MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
140 vigra_postcondition(status >= 0,
141 "ChunkedArrayHDF5: read from dataset failed.");
142 }
143 return this->pointer_;
144 }
145
146 shape_type shape_, start_;
147 ChunkedArrayHDF5 * array_;
148 Alloc alloc_;
149
150 private:
151 Chunk & operator=(Chunk const &);
152 };
153
157 typedef T value_type;
158 typedef value_type * pointer;
159 typedef value_type & reference;
160
161 /** \brief Construct with given 'shape', 'chunk_shape' and 'options',
162 using 'alloc' to manage the in-memory version of the data..
163
164 The data are placed in 'file' at the internal path 'dataset'. Argument
165 'mode' must be one of the following:
166 <ul>
167 <li>HDF5File::New: Create new dataset, possibly deleting any existing content.
168 It is an error to request this mode when the entire
169 'file' is read-only.
170 <li>HDF5File::Replace: Same as New.
171 <li>HDF5File::ReadWrite: Open the dataset for reading and writing. Create
172 the datset if it doesn't exist. It is an error
173 to request this mode when 'file' is read-only.
174 <li>HDF5File::ReadOnly: Open the dataset for reading. It is an error to
175 request this mode when the dataset doesn't exist.
176 <li>HDF5File::Default: Resolves to ReadOnly when the dataset exists, and
177 to New otherwise.
178 </ul>
179 The supported compression algorithms are:
180 <ul>
181 <li>ZLIB_FAST: Fast compression using 'zlib' (slower than LZ4, but higher compression).
182 <li>ZLIB_BEST: Best compression using 'zlib', slow.
183 <li>ZLIB_NONE: Use 'zlib' format without compression.
184 <li>DEFAULT_COMPRESSION: Same as ZLIB_FAST.
185 </ul>
186 */
187 ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
189 shape_type const & shape,
190 shape_type const & chunk_shape=shape_type(),
191 ChunkedArrayOptions const & options = ChunkedArrayOptions(),
192 Alloc const & alloc = Alloc())
193 : ChunkedArray<N, T>(shape, chunk_shape, options),
194 file_(file),
195 dataset_name_(dataset),
196 dataset_(),
197 compression_(options.compression_method),
198 alloc_(alloc)
199 {
200 init(mode);
201 }
202
203 /** \brief Construct for an already existing dataset with given 'options',
204 using 'alloc' to manage the in-memory version of the data.
205
206 The data must be located in 'file' at the internal path 'dataset'. The
207 array's shape and chunk_shape are read from the file. It is an error
208 to use this constructor when 'dataset' doesn't exist.
209
210 Argument 'mode' must be one of the following:
211 <ul>
212 <li>HDF5File::ReadWrite: Open the dataset for reading and writing. It is an error
213 to request this mode when 'file' is read-only.
214 <li>HDF5File::ReadOnly: Open the dataset for reading (default).
215 <li>HDF5File::Default: Same as ReadOnly.
216 </ul>
217 The supported compression algorithms are:
218 <ul>
219 <li>ZLIB_FAST: Fast compression using 'zlib' (slower than LZ4, but higher compression).
220 <li>ZLIB_BEST: Best compression using 'zlib', slow.
221 <li>ZLIB_NONE: Use 'zlib' format without compression.
222 <li>DEFAULT_COMPRESSION: Same as ZLIB_FAST.
223 </ul>
224 */
225 ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
226 HDF5File::OpenMode mode = HDF5File::ReadOnly,
227 ChunkedArrayOptions const & options = ChunkedArrayOptions(),
228 Alloc const & alloc = Alloc())
229 : ChunkedArray<N, T>(shape_type(),
230 ceilPower2<N>(shape_type(file.getChunkShape(dataset).begin())),
231 options),
232 file_(file),
233 dataset_name_(dataset),
234 dataset_(),
235 compression_(options.compression_method),
236 alloc_(alloc)
237 {
238 init(mode);
239 }
240
241
242 // copy constructor
244 : ChunkedArray<N, T>(src),
245 file_(src.file_),
246 dataset_name_(src.dataset_name_),
247 compression_(src.compression_),
248 alloc_(src.alloc_)
249 {
250 if( file_.isReadOnly() )
251 init(HDF5File::ReadOnly);
252 else
253 init(HDF5File::ReadWrite);
254 }
255
256 void init(HDF5File::OpenMode mode)
257 {
258 bool exists = file_.existsDataset(dataset_name_);
259
260 if(mode == HDF5File::Replace)
261 {
262 mode = HDF5File::New;
263 }
264 else if(mode == HDF5File::Default)
265 {
266 if(exists)
267 mode = HDF5File::ReadOnly;
268 else
269 mode = HDF5File::New;
270 }
271
272 if(mode == HDF5File::ReadOnly)
273 file_.setReadOnly();
274 else
275 vigra_precondition(!file_.isReadOnly(),
276 "ChunkedArrayHDF5(): 'mode' is incompatible with read-only file.");
277
278 vigra_precondition(exists || !file_.isReadOnly(),
279 "ChunkedArrayHDF5(): dataset does not exist, but file is read-only.");
280
281 if(!exists || mode == HDF5File::New)
282 {
283 // FIXME: set rdcc_nbytes to 0 (disable cache, because we don't
284 // need two caches
285 // H5Pset_chunk_cache (dapl, rdcc_nslots, rdcc_nbytes, rdcc_w0);
286 // Chunk cache size (rdcc_nbytes) should be large
287 // enough to hold all the chunks in a selection
288 // * If this is not possible, it may be best to disable chunk
289 // caching altogether (set rdcc_nbytes to 0)
290 // * rdcc_slots should be a prime number that is at
291 // least 10 to 100 times the number of chunks that can fit
292 // into rdcc_nbytes
293 // * rdcc_w0 should be set to 1 if chunks that have been
294 // fully read/written will never be read/written again
295 //
296 // the above may be WRONG in general - it may only apply if the
297 // chunk size in the file matches the chunk size in the CachedArray.
298 // Otherwise, make sure that the file cache can hold at least as many
299 // chunks as are needed for a single array chunk.
300 if(compression_ == DEFAULT_COMPRESSION)
301 compression_ = ZLIB_FAST;
302 vigra_precondition(compression_ != LZ4,
303 "ChunkedArrayHDF5(): HDF5 does not support LZ4 compression.");
304
305 vigra_precondition(this->size() > 0,
306 "ChunkedArrayHDF5(): invalid shape.");
307 typename detail::HDF5TypeTraits<T>::value_type init(this->fill_scalar_);
308 dataset_ = file_.createDataset<N, T>(dataset_name_,
309 this->shape_,
310 init,
311 this->chunk_shape_,
312 compression_);
313 }
314 else
315 {
316 dataset_ = file_.getDatasetHandleShared(dataset_name_);
317
318 // check shape
319 ArrayVector<hsize_t> fileShape(file_.getDatasetShape(dataset_name_));
320 typedef detail::HDF5TypeTraits<T> TypeTraits;
321 if(TypeTraits::numberOfBands() > 1)
322 {
323 vigra_precondition(fileShape.size() == N+1,
324 "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
325 vigra_precondition(fileShape[0] == static_cast<unsigned>(TypeTraits::numberOfBands()),
326 "ChunkedArrayHDF5(file, dataset): dataset has wrong number of bands.");
327 shape_type shape(fileShape.begin()+1);
328 if(this->size() > 0)
329 {
330 vigra_precondition(shape == this->shape_,
331 "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
332 }
333 else
334 {
335 this->shape_ = shape;
336 }
337 }
338 else
339 {
340 vigra_precondition(fileShape.size() == N,
341 "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
342 shape_type shape(fileShape.begin());
343 if(this->size() > 0)
344 {
345 vigra_precondition(shape == this->shape_,
346 "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
347 }
348 else
349 {
350 this->shape_ = shape;
351 ChunkStorage(detail::computeChunkArrayShape(shape, this->bits_, this->mask_)).swap(this->handle_array_);
352 }
353 }
354 typename ChunkStorage::iterator i = this->handle_array_.begin(),
355 end = this->handle_array_.end();
356 for(; i != end; ++i)
357 {
358 i->chunk_state_.store(base_type::chunk_asleep);
359 }
360 }
361 }
362
363 ~ChunkedArrayHDF5()
364 {
365 closeImpl(true);
366 }
367
368 void close()
369 {
370 closeImpl(false);
371 }
372
373 void closeImpl(bool force_destroy)
374 {
375 flushToDiskImpl(true, force_destroy);
376 file_.close();
377 }
378
379 void flushToDisk()
380 {
381 flushToDiskImpl(false, false);
382 }
383
384 void flushToDiskImpl(bool destroy, bool force_destroy)
385 {
386 if(file_.isReadOnly())
387 return;
388
389 threading::lock_guard<threading::mutex> guard(*this->chunk_lock_);
390 typename ChunkStorage::iterator i = this->handle_array_.begin(),
391 end = this->handle_array_.end();
392 if(destroy && !force_destroy)
393 {
394 for(; i != end; ++i)
395 {
396 vigra_precondition(i->chunk_state_.load() <= 0,
397 "ChunkedArrayHDF5::close(): cannot close file because there are active chunks.");
398 }
399 i = this->handle_array_.begin();
400 }
401 for(; i != end; ++i)
402 {
403 Chunk * chunk = static_cast<Chunk*>(i->pointer_);
404 if(!chunk)
405 continue;
406 if(destroy)
407 {
408 delete chunk;
409 i->pointer_ = 0;
410 }
411 else
412 {
413 chunk->write(false);
414 }
415 }
416 file_.flushToDisk();
417 }
418
419 virtual bool isReadOnly() const
420 {
421 return file_.isReadOnly();
422 }
423
424 virtual pointer loadChunk(ChunkBase<N, T> ** p, shape_type const & index)
425 {
426 vigra_precondition(file_.isOpen(),
427 "ChunkedArrayHDF5::loadChunk(): file was already closed.");
428 if(*p == 0)
429 {
430 *p = new Chunk(this->chunkShape(index), index*this->chunk_shape_, this, alloc_);
431 this->overhead_bytes_ += sizeof(Chunk);
432 }
433 return static_cast<Chunk *>(*p)->read();
434 }
435
436 virtual bool unloadChunk(ChunkBase<N, T> * chunk, bool /* destroy */)
437 {
438 if(!file_.isOpen())
439 return true;
440 static_cast<Chunk *>(chunk)->write();
441 return false;
442 }
443
444 virtual std::string backend() const
445 {
446 return "ChunkedArrayHDF5<'" + file_.filename() + "/" + dataset_name_ + "'>";
447 }
448
449 virtual std::size_t dataBytes(ChunkBase<N,T> * c) const
450 {
451 return c->pointer_ == 0
452 ? 0
453 : static_cast<Chunk*>(c)->size()*sizeof(T);
454 }
455
456 virtual std::size_t overheadBytesPerChunk() const
457 {
458 return sizeof(Chunk) + sizeof(SharedChunkHandle<N, T>);
459 }
460
461 std::string fileName() const
462 {
463 return file_.filename();
464 }
465
466 std::string datasetName() const
467 {
468 return dataset_name_;
469 }
470
471 HDF5File file_;
472 std::string dataset_name_;
473 HDF5HandleShared dataset_;
474 CompressionMethod compression_;
475 Alloc alloc_;
476};
477
478//@}
479
480} // namespace vigra
481
482#undef VIGRA_ASSERT_INSIDE
483
484#endif /* VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX */
Definition multi_array_chunked_hdf5.hxx:74
ChunkedArrayHDF5(HDF5File const &file, std::string const &dataset, HDF5File::OpenMode mode, shape_type const &shape, shape_type const &chunk_shape=shape_type(), ChunkedArrayOptions const &options=ChunkedArrayOptions(), Alloc const &alloc=Alloc())
Construct with given 'shape', 'chunk_shape' and 'options', using 'alloc' to manage the in-memory vers...
Definition multi_array_chunked_hdf5.hxx:187
virtual std::size_t overheadBytesPerChunk() const
Bytes of main memory needed to manage a single chunk.
Definition multi_array_chunked_hdf5.hxx:456
ChunkedArrayHDF5(HDF5File const &file, std::string const &dataset, HDF5File::OpenMode mode=HDF5File::ReadOnly, ChunkedArrayOptions const &options=ChunkedArrayOptions(), Alloc const &alloc=Alloc())
Construct for an already existing dataset with given 'options', using 'alloc' to manage the in-memory...
Definition multi_array_chunked_hdf5.hxx:225
Option object for ChunkedArray construction.
Definition multi_array_chunked.hxx:1268
Interface and base class for chunked arrays.
Definition multi_fwd.hxx:137
shape_type const & shape() const
Return the shape in this array.
MultiArrayIndex size() const
Return the number of elements in this array.
std::size_t dataBytes() const
Bytes of main memory occupied by the array's data.
Definition multi_array_chunked.hxx:1674
shape_type const & chunkShape() const
Return the global chunk shape.
iterator end()
Create the end iterator for scan-order iteration over the entire chunked array.
Definition multi_array_chunked.hxx:2389
iterator begin()
Create a scan-order iterator for the entire chunked array.
Definition multi_array_chunked.hxx:2381
Access to HDF5 files.
Definition hdf5impex.hxx:975
void writeBlock(std::string datasetName, typename MultiArrayShape< N >::type blockOffset, const MultiArrayView< N, T, Stride > &array)
Write a multi array into a larger volume. blockOffset determines the position, where array is written...
Definition hdf5impex.hxx:1845
void readBlock(std::string datasetName, typename MultiArrayShape< N >::type blockOffset, typename MultiArrayShape< N >::type blockShape, MultiArrayView< N, T, Stride > array)
Read a block of data into a multi array. This function allows to read a small block out of a larger v...
Definition hdf5impex.hxx:2068
OpenMode
Set how a file is opened.
Definition hdf5impex.hxx:1031
bool existsDataset(std::string datasetName) const
Check if given datasetName exists.
Definition hdf5impex.hxx:1354
void close()
Close the current file.
Definition hdf5impex.hxx:1199
void flushToDisk()
Immediately write all data to disk.
Definition hdf5impex.hxx:2234
HDF5HandleShared createDataset(std::string datasetName, TinyVector< MultiArrayIndex, N > const &shape, typename detail::HDF5TypeTraits< T >::value_type init=typename detail::HDF5TypeTraits< T >::value_type(), TinyVector< MultiArrayIndex, N > const &chunkSize=(TinyVector< MultiArrayIndex, N >()), int compressionParameter=0)
Create a new dataset. This function can be used to create a dataset filled with a default value init,...
Definition hdf5impex.hxx:2761
std::string filename() const
Get the name of the associated file.
Definition hdf5impex.hxx:1347
ArrayVector< hsize_t > getDatasetShape(std::string datasetName) const
Get the shape of each dimension of a certain dataset.
Definition hdf5impex.hxx:1394
HDF5HandleShared getDatasetHandleShared(std::string const &datasetName) const
Obtain a shared HDF5 handle of a dataset.
Definition hdf5impex.hxx:1527
Base class for, and view to, vigra::MultiArray.
Definition multi_fwd.hxx:127
Main MultiArray class containing the memory management.
Definition multi_fwd.hxx:131
view_type::iterator iterator
Definition multi_array.hxx:2550
Class for fixed size vectors.
Definition tinyvector.hxx:1008
UInt32 ceilPower2(UInt32 x)
Round up to the nearest power of 2.
Definition mathutil.hxx:294
NumericTraits< V >::Promote prod(TinyVectorBase< V, SIZE, D1, D2 > const &l)
product of the vector's elements
Definition tinyvector.hxx:2097

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.12.2