LLFIO
v2.00
|
A handle to synthesised, non-cryptographic, pseudo-random data. More...
#include "fast_random_file_handle.hpp"
Classes | |
struct | prng |
Public Types | |
using | dev_t = file_handle::dev_t |
using | ino_t = file_handle::ino_t |
using | path_view_type = file_handle::path_view_type |
using | path_type = byte_io_handle::path_type |
using | extent_type = byte_io_handle::extent_type |
using | size_type = byte_io_handle::size_type |
using | mode = byte_io_handle::mode |
using | creation = byte_io_handle::creation |
using | caching = byte_io_handle::caching |
using | flag = byte_io_handle::flag |
using | buffer_type = byte_io_handle::buffer_type |
using | const_buffer_type = byte_io_handle::const_buffer_type |
using | buffers_type = byte_io_handle::buffers_type |
using | const_buffers_type = byte_io_handle::const_buffers_type |
template<class T > | |
using | io_request = byte_io_handle::io_request< T > |
template<class T > | |
using | io_result = byte_io_handle::io_result< T > |
using | barrier_kind = byte_io_multiplexer::barrier_kind |
using | registered_buffer_type = byte_io_multiplexer::registered_buffer_type |
template<class T > | |
using | awaitable = byte_io_multiplexer::awaitable< T > |
using | unique_id_type = QUICKCPPLIB_NAMESPACE::integers128::uint128 |
The unique identifier type used by this handle. | |
using | unique_id_type_hasher = QUICKCPPLIB_NAMESPACE::integers128::uint128_hasher |
A hasher for the unique identifier type used by this handle. | |
Public Member Functions | |
fast_random_file_handle ()=default | |
Default constructor. | |
fast_random_file_handle (extent_type length, span< const byte > seed) | |
Constructor. Seed is not much use past sixteen bytes. | |
fast_random_file_handle (fast_random_file_handle &&o) noexcept | |
Implicit move construction of fast_random_file_handle permitted. | |
fast_random_file_handle (const fast_random_file_handle &)=delete | |
No copy construction (use clone() ) | |
fast_random_file_handle & | operator= (fast_random_file_handle &&o) noexcept |
Move assignment of fast_random_file_handle permitted. | |
fast_random_file_handle & | operator= (const fast_random_file_handle &)=delete |
No copy assignment. | |
void | swap (fast_random_file_handle &o) noexcept |
Swap with another instance. | |
virtual result< void > | close () noexcept override |
Immediately close the native handle type managed by this handle. | |
virtual result< extent_type > | maximum_extent () const noexcept override |
Return the current maximum permitted extent of the file. | |
virtual result< extent_type > | truncate (extent_type newsize) noexcept override |
Resize the current maximum permitted extent of the random file to the given extent. | |
virtual result< extent_type > | zero (file_handle::extent_pair extent, deadline=deadline()) noexcept override |
Zero a portion of the random file (does nothing). | |
virtual result< std::vector< file_handle::extent_pair > > | extents () const noexcept override |
Return a single extent of the maximum extent. | |
virtual result< extent_guard > | lock_file_range (extent_type offset, extent_type bytes, lock_kind kind, deadline=deadline()) noexcept override |
EXTENSION: Tries to lock the range of bytes specified for shared or exclusive access. Note that this may, or MAY NOT, observe whole file locks placed with lock() , lock_shared() etc. More... | |
virtual void | unlock_file_range (extent_type, extent_type) noexcept override |
EXTENSION: Unlocks a byte range previously locked. More... | |
virtual result< void > | _replace_handle (handle &&o_) noexcept override |
void | swap (file_handle &o) noexcept |
Swap with another instance. | |
void | swap (handle &o) noexcept |
Swap with another instance. | |
result< file_handle > | reopen (mode mode_=mode::unchanged, caching caching_=caching::unchanged, deadline d=std::chrono::seconds(30)) const noexcept |
template<class... Args> | |
bool | try_reopen (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_reopen_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_reopen_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
virtual result< extent_pair > | clone_extents_to (extent_pair extent, byte_io_handle &dest, byte_io_handle::extent_type destoffset, deadline d={}, bool force_copy_now=false, bool emulate_if_unsupported=true) noexcept |
Clones the extents referred to by extent to dest at destoffset . This is how you ought to copy file content, including within the same file. This is fundamentally a racy call with respect to concurrent modification of the files. More... | |
result< extent_pair > | clone_extents_to (byte_io_handle &dest, deadline d={}, bool force_copy_now=false, bool emulate_if_unsupported=true) noexcept |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
result< extent_type > | zero (extent_type offset, extent_type bytes, deadline d=deadline()) noexcept |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
template<class... Args> | |
bool | try_zero (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_zero_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_zero_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
virtual result< void > | lock_file () noexcept |
Locks the inode referred to by the open handle for exclusive access. More... | |
virtual bool | try_lock_file () noexcept |
Tries to lock the inode referred to by the open handle for exclusive access, returning false if lock is currently unavailable. More... | |
virtual void | unlock_file () noexcept |
Unlocks a previously acquired exclusive lock. | |
virtual result< void > | lock_file_shared () noexcept |
Locks the inode referred to by the open handle for shared access. More... | |
virtual bool | try_lock_file_shared () noexcept |
Tries to lock the inode referred to by the open handle for shared access, returning false if lock is currently unavailable. More... | |
virtual void | unlock_file_shared () noexcept |
Unlocks a previously acquired shared lock. | |
result< extent_guard > | lock_file_range (io_request< buffers_type > reqs, deadline d=deadline()) noexcept |
result< extent_guard > | lock_file_range (io_request< const_buffers_type > reqs, deadline d=deadline()) noexcept |
template<class... Args> | |
bool | try_lock_file_range (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_lock_file_range_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_lock_file_range_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
byte_io_multiplexer * | multiplexer () const noexcept |
The i/o multiplexer this handle will use to multiplex i/o. If this returns null, then this handle has not been registered with an i/o multiplexer yet. | |
virtual result< void > | set_multiplexer (byte_io_multiplexer *c=this_thread::multiplexer()) noexcept |
Sets the i/o multiplexer this handle will use to implement read() , write() and barrier() . More... | |
size_t | max_buffers () const noexcept |
The maximum number of buffers which a single read or write syscall can (atomically) process at a time for this specific open handle. On POSIX, this is known as IOV_MAX . Preferentially uses any i/o multiplexer set over the virtually overridable per-class implementation. More... | |
result< registered_buffer_type > | allocate_registered_buffer (size_t &bytes) noexcept |
Request the allocation of a new registered i/o buffer with the system suitable for maximum performance i/o, preferentially using any i/o multiplexer set over the virtually overridable per-class implementation. More... | |
io_result< buffers_type > | read (io_request< buffers_type > reqs, deadline d=deadline()) noexcept |
Read data from the open handle, preferentially using any i/o multiplexer set over the virtually overridable per-class implementation. More... | |
io_result< buffers_type > | read (registered_buffer_type base, io_request< buffers_type > reqs, deadline d=deadline()) noexcept |
io_result< size_type > | read (extent_type offset, std::initializer_list< buffer_type > lst, deadline d=deadline()) noexcept |
template<class... Args> | |
bool | try_read (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_read_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_read_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
io_result< const_buffers_type > | write (io_request< const_buffers_type > reqs, deadline d=deadline()) noexcept |
Write data to the open handle, preferentially using any i/o multiplexer set over the virtually overridable per-class implementation. More... | |
io_result< const_buffers_type > | write (registered_buffer_type base, io_request< const_buffers_type > reqs, deadline d=deadline()) noexcept |
io_result< size_type > | write (extent_type offset, std::initializer_list< const_buffer_type > lst, deadline d=deadline()) noexcept |
template<class... Args> | |
bool | try_write (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_write_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_write_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
virtual io_result< const_buffers_type > | barrier (io_request< const_buffers_type > reqs=io_request< const_buffers_type >(), barrier_kind kind=barrier_kind::nowait_data_only, deadline d=deadline()) noexcept |
Issue a write reordering barrier such that writes preceding the barrier will reach storage before writes after this barrier, preferentially using any i/o multiplexer set over the virtually overridable per-class implementation. More... | |
io_result< const_buffers_type > | barrier (barrier_kind kind, deadline d=deadline()) noexcept |
template<class... Args> | |
bool | try_barrier (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_barrier_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_barrier_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
awaitable< io_result< buffers_type > > | co_read (io_request< buffers_type > reqs, deadline d=deadline()) noexcept |
A coroutinised equivalent to .read() which suspends the coroutine until the i/o finishes. Blocks execution i.e is equivalent to .read() if no i/o multiplexer has been set on this handle! More... | |
awaitable< io_result< buffers_type > > | co_read (registered_buffer_type base, io_request< buffers_type > reqs, deadline d=deadline()) noexcept |
awaitable< io_result< const_buffers_type > > | co_write (io_request< const_buffers_type > reqs, deadline d=deadline()) noexcept |
A coroutinised equivalent to .write() which suspends the coroutine until the i/o finishes. Blocks execution i.e is equivalent to .write() if no i/o multiplexer has been set on this handle! More... | |
awaitable< io_result< const_buffers_type > > | co_write (registered_buffer_type base, io_request< const_buffers_type > reqs, deadline d=deadline()) noexcept |
awaitable< io_result< const_buffers_type > > | co_barrier (io_request< const_buffers_type > reqs=io_request< const_buffers_type >(), barrier_kind kind=barrier_kind::nowait_data_only, deadline d=deadline()) noexcept |
A coroutinised equivalent to .barrier() which suspends the coroutine until the i/o finishes. Blocks execution i.e is equivalent to .barrier() if no i/o multiplexer has been set on this handle! More... | |
flag | flags () const noexcept |
The flags this handle was opened with. | |
QUICKCPPLIB_BITFIELD_BEGIN_T (flag, uint16_t) | |
Bitwise flags which can be specified. More... | |
virtual result< path_type > | current_path () const noexcept |
result< handle > | clone () const noexcept |
virtual native_handle_type | release () noexcept |
Release the native handle type managed by this handle. | |
bool | is_valid () const noexcept |
True if the handle is valid (and usually open) | |
bool | is_readable () const noexcept |
True if the handle is readable. | |
bool | is_writable () const noexcept |
True if the handle is writable. | |
bool | is_append_only () const noexcept |
True if the handle is append only. | |
virtual result< void > | set_append_only (bool enable) noexcept |
EXTENSION: Changes whether this handle is append only or not. More... | |
bool | is_multiplexable () const noexcept |
True if multiplexable. | |
bool | is_nonblocking () const noexcept |
True if nonblocking. | |
bool | is_seekable () const noexcept |
True if seekable. | |
bool | requires_aligned_io () const noexcept |
True if requires aligned i/o. | |
bool | is_kernel_handle () const noexcept |
True if native_handle() is a valid kernel handle. | |
bool | is_regular () const noexcept |
True if a regular file or device. | |
bool | is_directory () const noexcept |
True if a directory. | |
bool | is_symlink () const noexcept |
True if a symlink. | |
bool | is_pipe () const noexcept |
True if a pipe. | |
bool | is_socket () const noexcept |
True if a socket. | |
bool | is_multiplexer () const noexcept |
True if a multiplexer like BSD kqueues, Linux epoll or Windows IOCP. | |
bool | is_process () const noexcept |
True if a process. | |
bool | is_section () const noexcept |
True if a memory section. | |
bool | is_allocation () const noexcept |
True if a memory allocation. | |
bool | is_path () const noexcept |
True if a path or a directory. | |
bool | is_tls_socket () const noexcept |
True if a TLS socket. | |
bool | is_http_socket () const noexcept |
True if a HTTP socket. | |
caching | kernel_caching () const noexcept |
Kernel cache strategy used by this handle. | |
bool | are_reads_from_cache () const noexcept |
True if the handle uses the kernel page cache for reads. | |
bool | are_writes_durable () const noexcept |
True if writes are safely on storage on completion. | |
bool | are_safety_barriers_issued () const noexcept |
True if issuing safety fsyncs is on. | |
native_handle_type | native_handle () const noexcept |
The native handle used by this handle. | |
dev_t | st_dev () const noexcept |
Unless flag::disable_safety_unlinks is set, the device id of the file when opened. | |
ino_t | st_ino () const noexcept |
Unless flag::disable_safety_unlinks is set, the inode of the file when opened. When combined with st_dev(), forms a unique identifer on this system. | |
unique_id_type | unique_id () const noexcept |
A unique identifier for this handle across the entire system. Can be used in hash tables etc. | |
virtual result< path_handle > | parent_path_handle (deadline d=std::chrono::seconds(30)) const noexcept |
Obtain a handle to the path currently containing this handle's file entry. More... | |
template<class... Args> | |
bool | try_parent_path_handle (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_parent_path_handle_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_parent_path_handle_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
virtual result< void > | relink (const path_handle &base, path_view_type path, bool atomic_replace=true, deadline d=std::chrono::seconds(30)) noexcept |
Relinks the current path of this open handle to the new path specified. If atomic_replace is true, the relink atomically and silently replaces any item at the new path specified. This operation is both atomic and matching POSIX behaviour even on Microsoft Windows where no Win32 API can match POSIX semantics. More... | |
template<class... Args> | |
bool | try_relink (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_relink_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_relink_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
virtual result< void > | link (const path_handle &base, path_view_type path, deadline d=std::chrono::seconds(30)) noexcept |
Links the inode referred to by this open handle to the path specified. The current path of this open handle is not changed, unless it has no current path due to being unlinked. More... | |
template<class... Args> | |
bool | try_link (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_link_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_link_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
virtual result< void > | unlink (deadline d=std::chrono::seconds(30)) noexcept |
Unlinks the current path of this open handle, causing its entry to immediately disappear from the filing system. More... | |
template<class... Args> | |
bool | try_unlink (Args &&... args) noexcept |
template<class... Args, class Rep , class Period > | |
bool | try_unlink_for (Args &&... args, const std::chrono::duration< Rep, Period > &duration) noexcept |
template<class... Args, class Clock , class Duration > | |
bool | try_unlink_until (Args &&... args, const std::chrono::time_point< Clock, Duration > &timeout) noexcept |
virtual result< span< path_view_component > > | list_extended_attributes (span< byte > tofill) const noexcept |
Fill the supplied buffer with the names of all extended attributes set on this file or directory, returning a span of path view components. More... | |
virtual result< span< byte > > | get_extended_attribute (span< byte > tofill, path_view_component name) const noexcept |
Retrieve the value of an extended attribute set on this file or directory. More... | |
virtual result< void > | set_extended_attribute (path_view_component name, span< const byte > value) noexcept |
Sets the value of an extended attribute on this file or directory. More... | |
virtual result< void > | remove_extended_attribute (path_view_component) noexcept |
Removes the extended attribute set on this file or directory. More... | |
result< size_t > | copy_extended_attributes (const fs_handle &src, bool replace_all_local_attributes=false) noexcept |
Copies the extended attributes from one entity to another, optionally replacing all the extended attributes on the destination. More... | |
Static Public Member Functions | |
static result< fast_random_file_handle > | fast_random_file (extent_type bytes=(extent_type) -1, mode _mode=mode::read, span< const byte > seed={}) noexcept |
static result< file_handle > | file (const path_handle &base, path_view_type path, mode _mode=mode::read, creation _creation=creation::open_existing, caching _caching=caching::all, flag flags=flag::none) noexcept |
static result< file_handle > | uniquely_named_file (const path_handle &dirpath, mode _mode=mode::write, caching _caching=caching::temporary, flag flags=flag::none) noexcept |
static result< file_handle > | temp_file (path_view_type name=path_view_type(), mode _mode=mode::write, creation _creation=creation::if_needed, caching _caching=caching::temporary, flag flags=flag::unlink_on_first_close) noexcept |
static result< file_handle > | temp_inode (const path_handle &dirh=path_discovery::storage_backed_temporary_files_directory(), mode _mode=mode::write, caching _caching=caching::temporary, flag flags=flag::none) noexcept |
Protected Member Functions | |
result< void > | _perms_check () const noexcept |
virtual size_t | _do_max_buffers () const noexcept override |
The virtualised implementation of max_buffers() used if no multiplexer has been set. | |
virtual io_result< const_buffers_type > | _do_barrier (io_request< const_buffers_type > reqs=io_request< const_buffers_type >(), barrier_kind=barrier_kind::nowait_data_only, deadline=deadline()) noexcept override |
The virtualised implementation of barrier() used if no multiplexer has been set. | |
virtual io_result< buffers_type > | _do_read (io_request< buffers_type > reqs, deadline d=deadline()) noexcept override |
The virtualised implementation of read() used if no multiplexer has been set. | |
virtual io_result< const_buffers_type > | _do_write (io_request< const_buffers_type > reqs, deadline d=deadline()) noexcept override |
The virtualised implementation of write() used if no multiplexer has been set. | |
virtual result< registered_buffer_type > | _do_allocate_registered_buffer (size_t &bytes) noexcept |
The virtualised implementation of allocate_registered_buffer() used if no multiplexer has been set. | |
virtual io_result< buffers_type > | _do_read (registered_buffer_type base, io_request< buffers_type > reqs, deadline d) noexcept |
The virtualised implementation of read() used if no multiplexer has been set. | |
virtual io_result< const_buffers_type > | _do_write (registered_buffer_type base, io_request< const_buffers_type > reqs, deadline d) noexcept |
The virtualised implementation of write() used if no multiplexer has been set. | |
io_result< buffers_type > | _do_multiplexer_read (registered_buffer_type &&base, io_request< buffers_type > reqs, deadline d) noexcept |
io_result< const_buffers_type > | _do_multiplexer_write (registered_buffer_type &&base, io_request< const_buffers_type > reqs, deadline d) noexcept |
io_result< const_buffers_type > | _do_multiplexer_barrier (registered_buffer_type &&base, io_request< const_buffers_type > reqs, barrier_kind kind, deadline d) noexcept |
result< void > | _fetch_inode () const noexcept |
Fill in _devid and _inode from the handle via fstat() | |
Protected Attributes | |
llfio_v2_xxx::fast_random_file_handle::prng | _prng |
extent_type | _length {0} |
byte_io_multiplexer * | _ctx {nullptr} |
union { | |
native_handle_type _v | |
struct { | |
intptr_t _padding0_ | |
uint32_t _padding1_ | |
flag flags | |
uint16_t _padding2_ | |
} _ | |
}; | |
dev_t | _devid {0} |
ino_t | _inode {0} |
A handle to synthesised, non-cryptographic, pseudo-random data.
This implementation of file handle provides read-only file data of random bits based on Bob Jenkins' 32-bit JSF PRNG (http://burtleburtle.net/bob/rand/smallprng.html). It works by initialising the prng state from the seed you provide at construction, and then for each 4 byte block it copies the 16 bytes of prng state, overwrites the first eight bytes with the offset divided by 4, and performs a PRNG round to generate a fairly unique number. As there are eight bytes of randomness being mixed with eight bytes of counter, this will not be a particularly random stream, but it's probably not awful either.
Note that writes to this handle are permitted if it was opened with write permission, but writes have no effect.
The use for a file handle full of random data may not be obvious. The first is to obfuscate another file's data using algorithm::xor_handle_adapter
. The second is for mock ups in testing, where this file handle stands in for some other (large) file, and you are testing throughput or latency in processing code.
The third is for unit testing randomly corrupted file data. algorithm::mix_handle_adapter
can randomly mix scatter gather buffers from this file handle into another file handle in order to test how well handling code copes with random data corruption.
On a 3.1Ghz Intel Skylake CPU where memcpy()
can do ~12Gb/sec:
The current implementation spots when it can do 16x simultaneous PRNG rounds, and thus can fill a cache line at a time. The Skylake CPU used to benchmark the code dispatches around four times the throughput with this, however there is likely still performance left on the table.
If someone were bothered to rewrite the JSF PRNG into SIMD, it is possible one could approach memcpy()
in performance. One would probably need to use AVX-512 however, as the JSF PRNG makes heavy use of bit rotation, which is slow before AVX-512 as it must be emulated with copious bit shifting and masking.
|
inlinenoexceptinherited |
Request the allocation of a new registered i/o buffer with the system suitable for maximum performance i/o, preferentially using any i/o multiplexer set over the virtually overridable per-class implementation.
bytes | The size of the i/o buffer requested. This may be rounded (considerably) upwards, you should always use the value returned. |
Some i/o multiplexer implementations have the ability to allocate i/o buffers in special memory shared between the i/o hardware and user space processes. Using registered i/o buffers can entirely eliminate all kernel transitions and memory copying during i/o, and can saturate very high end hardware from a single kernel thread.
If no multiplexer is set, the default implementation uses map_handle
to allocate raw memory pages from the OS kernel. If the requested buffer size is a multiple of one of the larger page sizes from utils::page_sizes()
, an attempt to satisfy the request using the larger page size will be attempted first.
|
inlinevirtualnoexceptinherited |
Issue a write reordering barrier such that writes preceding the barrier will reach storage before writes after this barrier, preferentially using any i/o multiplexer set over the virtually overridable per-class implementation.
caching::reads
which means that all writes form a strict sequential order not completing until acknowledged by the storage device. Filing system can and do use different algorithms to give much better performance with caching::reads
, some (e.g. ZFS) spectacularly better.reqs | A scatter-gather and offset request for what range to barrier. May be ignored on some platforms which always write barrier the entire file. Supplying a default initialised reqs write barriers the entire file. |
kind | Which kind of write reordering barrier to perform. |
d | An optional deadline by which the i/o must complete, else it is cancelled. Note function may return significantly after this deadline if the i/o takes long to cancel. |
|
inlinenoexceptinherited |
Clone this handle (copy constructor is disabled to avoid accidental copying)
|
inlinevirtualnoexceptinherited |
Clones the extents referred to by extent
to dest
at destoffset
. This is how you ought to copy file content, including within the same file. This is fundamentally a racy call with respect to concurrent modification of the files.
Some of the filesystems on the major operating systems implement copy-on-write extent reference counting, and thus can very cheaply link a "copy" of extents in one file into another file (at the time of writing - Linux: XFS, btrfs, ocfs2, smbfs; Mac OS: APFS; Windows: ReFS, CIFS). Upon first write into an extent, only then is a new copy formed for the specific extents being modified. Note that extent cloning is usually only possible in cluster sized amounts, so if the portion you clone is not so aligned, new extents will be allocated for the spill into non-aligned portions. Obviously, cloning an entire file in a single shot does not have that problem.
Networked filing systems typically can also implement remote extent copying, such that extents can be copied between files entirely upon the remote server, and avoiding the copy going over the network. This is usually far more efficient.
This implementation first enumerates the valid extents for the region requested, and only clones extents which are reported as valid. It then iterates the platform specific syscall to cause the extents to be cloned in utils::page_allocator<T>
sized chunks (i.e. the next large page greater or equal to 1Mb). Generally speaking, if the dedicated syscalls fail, the implementation falls back to a user space emulation, unless emulate_if_unsupported
is false.
If the region being cloned does not exist in the source file, the region is truncated to what is available. If the destination file is not big enough to receive the cloned region, it is extended. If the clone is occurring within the same inode, you should ensure that the regions do not overlap, as cloning regions which overlap has platform-specific semantics. If they do overlap, you should always set force_copy_now
for portable code.
utils::page_allocator<T>
sized chunks. It will fail with an error code comparing equal to errc::invalid_parameter
.If you really want the copy to happen now, and not later via copy-on-write, set force_copy_now
. Note that this forces emulate_if_unsupported
to true.
If dest
is not a file_handle
, sendfile()
is used and the destination offset and gaps in the source valid extents are ignored.
|
inlinenoexceptinherited |
A coroutinised equivalent to .barrier()
which suspends the coroutine until the i/o finishes. Blocks execution i.e is equivalent to .barrier()
if no i/o multiplexer has been set on this handle!
The awaitable returned is eager i.e. it immediately begins the i/o. If the i/o completes and finishes immediately, no coroutine suspension occurs.
|
inlinenoexceptinherited |
A coroutinised equivalent to .read()
which suspends the coroutine until the i/o finishes. Blocks execution i.e is equivalent to .read()
if no i/o multiplexer has been set on this handle!
The awaitable returned is eager i.e. it immediately begins the i/o. If the i/o completes and finishes immediately, no coroutine suspension occurs.
|
inlinenoexceptinherited |
A coroutinised equivalent to .write()
which suspends the coroutine until the i/o finishes. Blocks execution i.e is equivalent to .write()
if no i/o multiplexer has been set on this handle!
The awaitable returned is eager i.e. it immediately begins the i/o. If the i/o completes and finishes immediately, no coroutine suspension occurs.
|
inlinenoexceptinherited |
Copies the extended attributes from one entity to another, optionally replacing all the extended attributes on the destination.
This convenience function is implemented using the APIs above, and therefore is racy with respect to concurrent users. If you specifiy an invalid source with replace_all_local_attributes = true
, then this is a convenient way to remove all extended attributes on the local inode.
|
inlinevirtualnoexceptinherited |
Returns the current path of the open handle as said by the operating system. Note that you are NOT guaranteed that any path refreshed bears any resemblance to the original, some operating systems will return some different path which still reaches the same inode via some other route e.g. hardlinks, dereferenced symbolic links, etc. Windows and Linux correctly track changes to the specific path the handle was opened with, not getting confused by other hard links. MacOS nearly gets it right, but under some circumstances e.g. renaming may switch to a different hard link's path which is almost certainly a bug.
If LLFIO was not able to determine the current path for this open handle e.g. the inode has been unlinked, it returns an empty path. Be aware that FreeBSD can return an empty (deleted) path for file inodes no longer cached by the kernel path cache, LLFIO cannot detect the difference. FreeBSD will also return any path leading to the inode if it is hard linked. FreeBSD does implement path retrieval for directory inodes correctly however, and see algorithm::cached_parent_handle_adapter<T>
for a handle adapter which makes use of that.
On Linux if /proc
is not mounted, this call fails with an error. All APIs in LLFIO which require the use of current_path()
can be told to not use it e.g. flag::disable_safety_unlinks
. It is up to you to detect if current_path()
is not working, and to change how you call LLFIO appropriately.
On Windows, you will almost certainly get back a path of the form \!!\Device\HarddiskVolume10\Users\ned\...
. See path_view
for what all the path prefix sequences mean, but to summarise the \!!\
prefix is LLFIO-only and will not be accepted by other Windows APIs. Pass LLFIO derived paths through the function to_win32_path()
to Win32-ise them. This function is also available on Linux where it does nothing, so you can use it in portable code.
path_handle
to fix a base location on the file system and work from that anchor instead!algorithm::cached_parent_handle_adapter<T>
which overrides this with an implementation based on retrieving the current path of a cached handle to the parent directory. On platforms with instability or failure to retrieve the correct current path for regular files, the cached parent handle adapter works around the problem by taking advantage of directory inodes not having the same instability problems on any platform. Reimplemented in llfio_v2_xxx::symlink_handle, and llfio_v2_xxx::process_handle.
|
inlinestaticnoexcept |
Create a random file handle.
bytes | How long the random file ought to report itself being. |
_mode | How to open the file. |
seed | Up to 88 bytes with which to seed the randomness. The default means use utils::random_fill() . |
|
inlinestaticnoexceptinherited |
Create a file handle opening access to a file on path
base | Handle to a base location on the filing system. Pass {} to indicate that path will be absolute. |
path | The path relative to base to open. |
_mode | How to open the file. |
_creation | How to create the file. |
_caching | How to ask the kernel to cache the file. |
flags | Any additional custom behaviours. |
|
inlinevirtualnoexceptinherited |
Retrieve the value of an extended attribute set on this file or directory.
|
inlinevirtualnoexceptinherited |
Links the inode referred to by this open handle to the path specified. The current path of this open handle is not changed, unless it has no current path due to being unlinked.
flag::disable_safety_unlinks
is set, this implementation opens a path_handle
to the source containing directory first, then checks before linking that the item about to be hard linked has the same inode as the open file handle. It will retry this matching until success until the deadline given. This should prevent most unmalicious accidental loss of data.base | Base for any relative path. |
path | The relative or absolute new path to hard link to. |
d | The deadline by which the matching of the containing directory to the open handle's inode must succeed, else errc::timed_out will be returned. |
current_path()
via parent_path_handle()
and thus is both expensive and calls malloc many times.
|
inlinevirtualnoexceptinherited |
Fill the supplied buffer with the names of all extended attributes set on this file or directory, returning a span of path view components.
Note that this routine is a very thin wrap of listxattr()
on POSIX and NtQueryInformationFile()
on Windows. If the supplied buffer is too small, the syscall typically returns failure rather than do a partial fill. Most implementations do not support more than 64Kb of extended attribute information per inode so maybe 70Kb is a safe default (to account for the return value storage), however properly written code will detect the buffer being too small and will auto-expand it until success.
|
inlinevirtualnoexceptinherited |
Locks the inode referred to by the open handle for exclusive access.
Note that this may, or may not, interact with the byte range lock extensions. See unique_file_lock
for a RAII locker.
|
inlineoverridevirtualnoexcept |
EXTENSION: Tries to lock the range of bytes specified for shared or exclusive access. Note that this may, or MAY NOT, observe whole file locks placed with lock()
, lock_shared()
etc.
Be aware this passes through the same semantics as the underlying OS call, including any POSIX insanity present on your platform:
flag::byte_lock_insanity
will be set in flags() after the first call to this function.You almost cetainly should use your choice of an algorithm::shared_fs_mutex::*
instead of this as those are more portable and performant, or use the SharedMutex
modelling member functions which lock the whole inode for exclusive or shared access.
extent_guard
after creating a new one over the same byte range, otherwise the old extent_guard
's destructor will simply unlock the range entirely. On Windows however upgrade/downgrade locks overlay, so on that platform you must not release the old extent_guard
. Look into algorithm::shared_fs_mutex::safe_byte_ranges
for a portable solution.offset | The offset to lock. Note that on POSIX the top bit is always cleared before use as POSIX uses signed transport for offsets. If you want an advisory rather than mandatory lock on Windows, one technique is to force top bit set so the region you lock is not the one you will i/o - obviously this reduces maximum file size to (2^63)-1. |
bytes | The number of bytes to lock. Setting this and the offset to zero causes the whole file to be locked. |
kind | Whether the lock is to be shared or exclusive. |
d | An optional deadline by which the lock must complete, else it is cancelled. |
Reimplemented from llfio_v2_xxx::lockable_byte_io_handle.
|
inlinevirtualnoexceptinherited |
Locks the inode referred to by the open handle for shared access.
Note that this may, or may not, interact with the byte range lock extensions. See unique_file_lock
for a RAII locker.
|
inlinenoexceptinherited |
The maximum number of buffers which a single read or write syscall can (atomically) process at a time for this specific open handle. On POSIX, this is known as IOV_MAX
. Preferentially uses any i/o multiplexer set over the virtually overridable per-class implementation.
Note that the actual number of buffers accepted for a read or a write may be significantly lower than this system-defined limit, depending on available resources. The read()
or write()
call will return the buffers accepted at the time of invoking the syscall.
Note also that some OSs will error out if you supply more than this limit to read()
or write()
, but other OSs do not. Some OSs guarantee that each i/o syscall has effects atomically visible or not to other i/o, other OSs do not.
OS X does not implement scatter-gather file i/o syscalls. Thus this function will always return 1
in that situation.
Microsoft Windows may implement scatter-gather i/o under certain handle configurations. Most of the time for non-socket handles this function will return 1
.
For handles which implement i/o entirely in user space, and thus syscalls are not involved, this function will return 0
.
|
inlinevirtualnoexceptinherited |
Obtain a handle to the path currently containing this handle's file entry.
flag::disable_safety_unlinks
is set, this implementation opens a path_handle
to the source containing directory, then checks if the file entry within has the same inode as the open file handle. It will retry this matching until success until the deadline given.algorithm::cached_parent_handle_adapter<T>
which overrides this with a zero cost implementation, thus making unlinking and relinking very considerably quicker.
|
inlineinherited |
Bitwise flags which can be specified.
< No flags
Unlinks the file on handle close. On POSIX, this simply unlinks whatever is pointed to by path()
upon the call of close()
if and only if the inode matches. On Windows, if you are on Windows 10 1709 or later, exactly the same thing occurs. If on previous editions of Windows, the file entry does not disappears but becomes unavailable for anyone else to open with an errc::resource_unavailable_try_again
error return. Because this is confusing, unless the win_disable_unlink_emulation
flag is also specified, this POSIX behaviour is somewhat emulated by LLFIO on older Windows by renaming the file to a random name on close()
causing it to appear to have been unlinked immediately.
Some kernel caching modes have unhelpfully inconsistent behaviours in getting your data onto storage, so by default unless this flag is specified LLFIO adds extra fsyncs to the following operations for the caching modes specified below: truncation of file length either explicitly or during file open. closing of the handle either explicitly or in the destructor.
Additionally on Linux only to prevent loss of file metadata: On the parent directory whenever a file might have been created. On the parent directory on file close.
This only occurs for these kernel caching modes: caching::none caching::reads caching::reads_and_metadata caching::safety_barriers
file_handle::unlink()
could accidentally delete the wrong file if someone has renamed the open file handle since the time it was opened. To prevent this occuring, where the OS doesn't provide race free unlink-by-open-handle we compare the inode of the path we are about to unlink with that of the open handle before unlinking.
Ask the OS to disable prefetching of data. This can improve random i/o performance.
Ask the OS to maximise prefetching of data, possibly prefetching the entire file into kernel cache. This can improve sequential i/o performance.
< See the documentation for unlink_on_first_close
Microsoft Windows NTFS, having been created in the late 1980s, did not originally implement extents-based storage and thus could only represent sparse files via efficient compression of intermediate zeros. With NTFS v3.0 (Microsoft Windows 2000), a proper extents-based on-storage representation was added, thus allowing only 64Kb extent chunks written to be stored irrespective of whatever the maximum file extent was set to.
For various historical reasons, extents-based storage is disabled by default in newly created files on NTFS, unlike in almost every other major filing system. You have to explicitly "opt in" to extents-based storage.
As extents-based storage is nearly cost free on NTFS, LLFIO by default opts in to extents-based storage for any empty file it creates. If you don't want this, you can specify this flag to prevent that happening.
Filesystems tend to be embarrassingly parallel for operations performed to different inodes. Where LLFIO performs i/o to multiple inodes at a time, it will use OpenMP or the Parallelism or Concurrency standard library extensions to usually complete the operation in constant rather than linear time. If you don't want this default, you can disable default using this flag.
Microsoft Windows NTFS has the option, when creating a directory, to set whether leafname lookup will be case sensitive. This is the only way of getting exact POSIX semantics on Windows without resorting to editing the system registry, however it also affects all code doing lookups within that directory, so we must default it to off.
Create the handle in a way where i/o upon it can be multiplexed with other i/o on the same initiating thread of execution i.e. you can perform more than one read concurrently, without using threads. The blocking operations .read()
and .write()
may have to use a less efficient, but cancellable, blocking implementation for handles created in this way. On Microsoft Windows, this creates handles with OVERLAPPED
semantics. On POSIX, this creates handles with nonblocking semantics for non-file handles such as pipes and sockets, however for file, directory and symlink handles it does not set nonblocking, as it is non-portable.
< Using insane POSIX byte range locks
< This is an inode created with no representation on the filing system
|
inlinenoexceptinherited |
Read data from the open handle, preferentially using any i/o multiplexer set over the virtually overridable per-class implementation.
reqs | A scatter-gather and offset request. |
d | An optional deadline by which the i/o must complete, else it is cancelled. Note function may return significantly after this deadline if the i/o takes long to cancel. |
|
inlinevirtualnoexceptinherited |
Relinks the current path of this open handle to the new path specified. If atomic_replace
is true, the relink atomically and silently replaces any item at the new path specified. This operation is both atomic and matching POSIX behaviour even on Microsoft Windows where no Win32 API can match POSIX semantics.
Note that if atomic_replace
is false, the operation may be implemented as creating a hard link to the destination (which fails if the destination exists), opening a new file descriptor to the destination, closing the existing file descriptor, replacing the existing file descriptor with the new one (this is to ensure path tracking continues to work), then unlinking the previous link. Thus native_handle()
's value may change. This is not the case on Microsoft Windows nor Linux, both of which provide syscalls capable of refusing to rename if the destination exists.
If the handle refers to a pipe, on Microsoft Windows the base path handle is ignored as there is a single global named pipe namespace. Unless the path fragment begins with \
, the string \??\
is prefixed to the name before passing it to the NT kernel API which performs the rename. This is because \\.\
in Win32 maps onto \??\
in the NT kernel.
flag::disable_safety_unlinks
is set, this implementation opens a path_handle
to the source containing directory first, then checks before relinking that the item about to be relinked has the same inode as the open file handle. It will retry this matching until success until the deadline given. This should prevent most unmalicious accidental loss of data.base | Base for any relative path. |
path | The relative or absolute new path to relink to. |
atomic_replace | Atomically replace the destination if a file entry already is present there. Choosing false for this will fail if a file entry is already present at the destination, and may not be an atomic operation on some platforms (i.e. both the old and new names may be linked to the same inode for a very short period of time). Windows and recent Linuxes are always atomic. |
d | The deadline by which the matching of the containing directory to the open handle's inode must succeed, else errc::timed_out will be returned. |
current_path()
via parent_path_handle()
and thus is both expensive and calls malloc many times. Reimplemented in llfio_v2_xxx::symlink_handle, and llfio_v2_xxx::mapped_file_handle.
|
inlinevirtualnoexceptinherited |
Removes the extended attribute set on this file or directory.
|
inlinenoexceptinherited |
Reopen this handle (copy constructor is disabled to avoid accidental copying), optionally race free reopening the handle with different access or caching.
Microsoft Windows provides a syscall for cloning an existing handle but with new access. On POSIX, if not changing the mode, we change caching via fcntl()
, if changing the mode we must loop calling current_path()
, trying to open the path returned and making sure it is the same inode.
|
inlinevirtualnoexceptinherited |
EXTENSION: Changes whether this handle is append only or not.
Reimplemented in llfio_v2_xxx::process_handle.
|
inlinevirtualnoexceptinherited |
Sets the value of an extended attribute on this file or directory.
To prevent collision in a globally visible resource, there is a convention whereby you ought to namespace the names of your values as namespace.attribute
e.g. appname.setting
to prevent unintentional collision with other programs. Obviously, do choose a unique appname
if there is any chance another program might use the same namespace name.
On POSIX, there are additional namespacing requirements: before your value name, you need to prefix one of user
or system
, so the actual name you might set would be user.appname.propname
. Windows does not have the user
/system
prefix requirement, but it does no harm to do the exact same on Windows as on POSIX.
The host OS and target filing system choose the limits on value size, and will fail accordingly. Some impose a maximum of 64Kb for all names and values per inode, others have a 4Kb maximum value size, there are lots of combinations. You are probably safest not setting many names, and keep the values short.
This API is implemented as file alternate data streams, rather than the Extended Attributes API as accessed via NtSetEaFile()
and NtQueryEaFile()
(which actually modify the file alternate data stream $EA
in any case).
The reason why is that NtSetEaFile()
can only append new records to EA storage. It cannot deallocate any existing EA records, if you try to do so you will get STATUS_EA_CORRUPT_ERROR
. You can append setting the same name to a different value, which can include a null value which then appears as if the name is no longer there. But there is a cap of 64kB for the EA record, and once it is consumed, it is gone forever for that inode.
Obviously that doesn't map at all well onto POSIX extended attributes, where you can set the value of an attribute as frequently as you like. The closest equivalent on Windows is therefore file alternate data streams, even though the attribute's value is then worked with as a whole proper file with all the attendant performance consequences.
As a result, name
must be a valid filename and not contain any characters not permitted in a filename. We use the NT API here, so the character restrictions are far fewer than for the Win32 API e.g. single character names do NOT cause misoperation like on Win32.
|
inlinevirtualnoexceptinherited |
Sets the i/o multiplexer this handle will use to implement read()
, write()
and barrier()
.
Note that this call deregisters this handle from any existing i/o multiplexer, and registers it with the new i/o multiplexer. You must therefore not call it if any i/o is currently outstanding on this handle. You should also be aware that multiple dynamic memory allocations and deallocations may occur, as well as multiple syscalls (i.e. this is an expensive call, try to do it from cold code).
If the handle was not created as multiplexable, this call always fails.
Reimplemented in llfio_v2_xxx::mapped_file_handle.
|
inlinestaticnoexceptinherited |
Create a file handle creating the named file on some path which the OS declares to be suitable for temporary files. Most OSs are very lazy about flushing changes made to these temporary files. Note the default flags are to have the newly created file deleted on first handle close. Note also that an empty name is equivalent to calling uniquely_named_file(path_discovery::storage_backed_temporary_files_directory())
and the creation parameter is ignored.
temp_inode()
instead, it is far more secure.
|
inlinestaticnoexceptinherited |
Securely create a file handle creating a temporary anonymous inode in the filesystem referred to by dirpath. The inode created has no name nor accessible path on the filing system and ceases to exist as soon as the last handle is closed, making it ideal for use as a temporary file where other processes do not need to have access to its contents via some path on the filing system (a classic use case is for backing shared memory maps).
|
inlinevirtualnoexceptinherited |
Tries to lock the inode referred to by the open handle for exclusive access, returning false
if lock is currently unavailable.
Note that this may, or may not, interact with the byte range lock extensions. See unique_file_lock
for a RAII locker.
|
inlinevirtualnoexceptinherited |
Tries to lock the inode referred to by the open handle for shared access, returning false
if lock is currently unavailable.
Note that this may, or may not, interact with the byte range lock extensions. See unique_file_lock
for a RAII locker.
|
inlinestaticnoexceptinherited |
Create a file handle creating a uniquely named file on a path. The file is opened exclusively with creation::only_if_not_exist
so it will never collide with nor overwrite any existing file. Note also that caching defaults to temporary which hints to the OS to only flush changes to physical storage as lately as possible.
|
inlinevirtualnoexceptinherited |
Unlinks the current path of this open handle, causing its entry to immediately disappear from the filing system.
On Windows before Windows 10 1709 unless flag::win_disable_unlink_emulation
is set, this behaviour is simulated by renaming the file to something random and setting its delete-on-last-close flag. Note that Windows may prevent the renaming of a file in use by another process, if so it will NOT be renamed. After the next handle to that file closes, it will become permanently unopenable by anyone else until the last handle is closed, whereupon the entry will be eventually removed by the operating system.
flag::disable_safety_unlinks
is set, this implementation opens a path_handle
to the containing directory first, then checks that the item about to be unlinked has the same inode as the open file handle. It will retry this matching until success until the deadline given. This should prevent most unmalicious accidental loss of data.d | The deadline by which the matching of the containing directory to the open handle's inode must succeed, else errc::timed_out will be returned. |
current_path()
and thus is both expensive and calls malloc many times. On Windows, also calls current_path()
if flag::disable_safety_unlinks
is not set. Reimplemented in llfio_v2_xxx::symlink_handle.
|
inlineoverridevirtualnoexcept |
EXTENSION: Unlocks a byte range previously locked.
offset | The offset to unlock. This should be an offset previously locked. |
bytes | The number of bytes to unlock. This should be a byte extent previously locked. |
Reimplemented from llfio_v2_xxx::lockable_byte_io_handle.
|
inlinenoexceptinherited |
Write data to the open handle, preferentially using any i/o multiplexer set over the virtually overridable per-class implementation.
truncate(newsize)
first.reqs | A scatter-gather and offset request. |
d | An optional deadline by which the i/o must complete, else it is cancelled. Note function may return significantly after this deadline if the i/o takes long to cancel. |