tokio-uring-0.5.0/.cargo/config.toml000064400000000000000000000000521046102023000154050ustar 00000000000000[env] RUST_TEST_THREADS = { value = "1" } tokio-uring-0.5.0/.cargo_vcs_info.json0000644000000001360000000000100133050ustar { "git": { "sha1": "bf9906d052299dd05c3d13671800cd1632b1ee67" }, "path_in_vcs": "" }tokio-uring-0.5.0/.github/workflows/ci.yml000064400000000000000000000034001046102023000166050ustar 00000000000000name: CI on: pull_request: branches: - master push: branches: - master env: RUSTFLAGS: -Dwarnings RUST_BACKTRACE: 1 jobs: # Depends on all actions that are required for a "successful" CI run. # Based on the ci here: https://github.com/tokio-rs/tokio/blob/master/.github/workflows/ci.yml all-systems-go: runs-on: ubuntu-latest needs: - check - clippy - fmt - test - test-docs - docs steps: - run: exit 0 bench: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Rust run: rustup update stable - run: cargo bench --no-run check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Rust run: rustup update stable - run: cargo check clippy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Rust run: rustup update stable - run: cargo clippy test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Rust run: rustup update stable - run: cargo test test-docs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Rust run: rustup update stable - run: cargo test --doc fmt: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Rust run: rustup update stable - run: cargo fmt -- --check docs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Rust run: rustup update nightly && rustup default nightly - run: cargo doc --no-deps --all-features env: RUSTDOCFLAGS: -Dwarnings tokio-uring-0.5.0/.gitignore000064400000000000000000000000231046102023000140600ustar 00000000000000/target Cargo.lock tokio-uring-0.5.0/CHANGELOG.md000064400000000000000000000114211046102023000137050ustar 00000000000000# 0.4.0 (November 5th, 2022) ### Fixed - Fix panic in Deref/DerefMut for Slice extending into uninitialized part of the buffer ([#52]) - docs: all-features = true ([#84]) - fix fs unit tests to avoid parallelism ([#121]) - Box the socket address to allow moving the Connect future ([#126]) - rt: Fix data race ([#146]) ### Added - Implement fs::File::readv_at()/writev_at() ([#87]) - fs: implement FromRawFd for File ([#89]) - Implement `AsRawFd` for `TcpStream` ([#94]) - net: add TcpListener.local_addr method ([#107]) - net: add TcpStream.write_all ([#111]) - driver: add Builder API as an option to start ([#113]) - Socket and TcpStream shutdown ([#124]) - fs: implement fs::File::from_std ([#131]) - net: implement FromRawFd for TcpStream ([#132]) - fs: implement OpenOptionsExt for OpenOptions ([#133]) - Add NoOp support ([#134]) - Add writev to TcpStream ([#136]) - sync TcpStream, UnixStream and UdpSocket functionality ([#141]) - Add benchmarks for no-op submission ([#144]) - Expose runtime structure ([#148]) ### Changed - driver: batch submit requests and add benchmark ([#78]) - Depend on io-uring version ^0.5.8 ([#153]) ### Internal Improvements - chore: fix clippy lints ([#99]) - io: refactor post-op logic in ops into Completable ([#116]) - Support multi completion events: v2 ([#130]) - simplify driver operation futures ([#139]) - rt: refactor runtime to avoid Rc\> ([#142]) - Remove unused dev-dependencies ([#143]) - chore: types and fields explicitly named ([#149]) - Ignore errors from uring while cleaning up ([#154]) - rt: drop runtime before driver during shutdown ([#155]) - rt: refactor drop logic ([#157]) - rt: fix error when calling block_on twice ([#162]) ### CI changes - chore: update actions/checkout action to v3 ([#90]) - chore: add all-systems-go ci check ([#98]) - chore: add clippy to ci ([#100]) - ci: run cargo test --doc ([#135]) [#52]: https://github.com/tokio-rs/tokio-uring/pull/52 [#78]: https://github.com/tokio-rs/tokio-uring/pull/78 [#84]: https://github.com/tokio-rs/tokio-uring/pull/84 [#87]: https://github.com/tokio-rs/tokio-uring/pull/87 [#89]: https://github.com/tokio-rs/tokio-uring/pull/89 [#90]: https://github.com/tokio-rs/tokio-uring/pull/90 [#94]: https://github.com/tokio-rs/tokio-uring/pull/94 [#98]: https://github.com/tokio-rs/tokio-uring/pull/98 [#99]: https://github.com/tokio-rs/tokio-uring/pull/99 [#100]: https://github.com/tokio-rs/tokio-uring/pull/100 [#107]: https://github.com/tokio-rs/tokio-uring/pull/107 [#111]: https://github.com/tokio-rs/tokio-uring/pull/111 [#113]: https://github.com/tokio-rs/tokio-uring/pull/113 [#116]: https://github.com/tokio-rs/tokio-uring/pull/116 [#121]: https://github.com/tokio-rs/tokio-uring/pull/121 [#124]: https://github.com/tokio-rs/tokio-uring/pull/124 [#126]: https://github.com/tokio-rs/tokio-uring/pull/126 [#130]: https://github.com/tokio-rs/tokio-uring/pull/130 [#131]: https://github.com/tokio-rs/tokio-uring/pull/131 [#132]: https://github.com/tokio-rs/tokio-uring/pull/132 [#133]: https://github.com/tokio-rs/tokio-uring/pull/133 [#134]: https://github.com/tokio-rs/tokio-uring/pull/134 [#135]: https://github.com/tokio-rs/tokio-uring/pull/135 [#136]: https://github.com/tokio-rs/tokio-uring/pull/136 [#139]: https://github.com/tokio-rs/tokio-uring/pull/139 [#141]: https://github.com/tokio-rs/tokio-uring/pull/141 [#142]: https://github.com/tokio-rs/tokio-uring/pull/142 [#143]: https://github.com/tokio-rs/tokio-uring/pull/143 [#144]: https://github.com/tokio-rs/tokio-uring/pull/144 [#146]: https://github.com/tokio-rs/tokio-uring/pull/146 [#148]: https://github.com/tokio-rs/tokio-uring/pull/148 [#149]: https://github.com/tokio-rs/tokio-uring/pull/149 [#153]: https://github.com/tokio-rs/tokio-uring/pull/153 [#154]: https://github.com/tokio-rs/tokio-uring/pull/154 [#155]: https://github.com/tokio-rs/tokio-uring/pull/155 [#157]: https://github.com/tokio-rs/tokio-uring/pull/157 [#162]: https://github.com/tokio-rs/tokio-uring/pull/162 # 0.3.0 (March 2nd, 2022) ### Added - net: add unix stream & listener ([#74]) - net: add tcp and udp support ([#40]) [#74]: https://github.com/tokio-rs/tokio-uring/pull/74 [#40]: https://github.com/tokio-rs/tokio-uring/pull/40 # 0.2.0 (January 9th, 2022) ### Fixed - fs: fix error handling related to changes in rustc ([#69]) - op: fix 'already borrowed' panic ([#39]) ### Added - fs: add fs::remove_file ([#66]) - fs: implement Debug for File ([#65]) - fs: add remove_dir and unlink ([#63]) - buf: impl IoBuf/IoBufMut for bytes::Bytes/BytesMut ([#43]) [#69]: https://github.com/tokio-rs/tokio-uring/pull/69 [#66]: https://github.com/tokio-rs/tokio-uring/pull/66 [#65]: https://github.com/tokio-rs/tokio-uring/pull/65 [#63]: https://github.com/tokio-rs/tokio-uring/pull/63 [#39]: https://github.com/tokio-rs/tokio-uring/pull/39 [#43]: https://github.com/tokio-rs/tokio-uring/pull/43 tokio-uring-0.5.0/Cargo.lock0000644000000654600000000000100112730ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "addr2line" version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" dependencies = [ "gimli", ] [[package]] name = "adler" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "async-stream" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" dependencies = [ "async-stream-impl", "futures-core", "pin-project-lite", ] [[package]] name = "async-stream-impl" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", "libc", "winapi", ] [[package]] name = "autocfg" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" version = "0.3.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" dependencies = [ "addr2line", "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", ] [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "bumpalo" version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytes" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "bitflags 1.3.2", "clap_lex", "indexmap", "textwrap", ] [[package]] name = "clap_lex" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" dependencies = [ "os_str_bytes", ] [[package]] name = "criterion" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" dependencies = [ "anes", "atty", "cast", "ciborium", "clap", "criterion-plot", "itertools", "lazy_static", "num-traits", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-deque" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crunchy" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] name = "either" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "errno" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys 0.52.0", ] [[package]] name = "fastrand" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "futures-core" version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-task" version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-core", "futures-task", "pin-project-lite", "pin-utils", "slab", ] [[package]] name = "gimli" version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "half" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", ] [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hermit-abi" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "iai" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678" [[package]] name = "indexmap" version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown", ] [[package]] name = "io-uring" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "595a0399f411a508feb2ec1e970a4a30c249351e30208960d58298de8660b0e5" dependencies = [ "bitflags 1.3.2", "libc", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "js-sys" version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "linux-raw-sys" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "log" version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "memchr" version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "memoffset" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" dependencies = [ "autocfg", ] [[package]] name = "miniz_oxide" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" dependencies = [ "adler", ] [[package]] name = "mio" version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "wasi", "windows-sys 0.48.0", ] [[package]] name = "nix" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" dependencies = [ "bitflags 1.3.2", "cfg-if", "libc", "memoffset", "pin-utils", ] [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "object" version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "os_str_bytes" version = "6.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" [[package]] name = "pin-project-lite" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "plotters" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" [[package]] name = "plotters-svg" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" dependencies = [ "plotters-backend", ] [[package]] name = "proc-macro2" version = "1.0.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "regex" version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rustc-demangle" version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustix" version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ "bitflags 2.5.0", "errno", "libc", "linux-raw-sys", "windows-sys 0.52.0", ] [[package]] name = "ryu" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "slab" version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ "autocfg", ] [[package]] name = "socket2" version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" dependencies = [ "libc", "winapi", ] [[package]] name = "socket2" version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" dependencies = [ "libc", "windows-sys 0.52.0", ] [[package]] name = "syn" version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tempfile" version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", "rustix", "windows-sys 0.52.0", ] [[package]] name = "textwrap" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "tokio" version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "libc", "mio", "pin-project-lite", "socket2 0.5.7", "windows-sys 0.48.0", ] [[package]] name = "tokio-stream" version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" dependencies = [ "futures-core", "pin-project-lite", "tokio", ] [[package]] name = "tokio-test" version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" dependencies = [ "async-stream", "bytes", "futures-core", "tokio", "tokio-stream", ] [[package]] name = "tokio-uring" version = "0.5.0" dependencies = [ "bytes", "criterion", "futures-util", "iai", "io-uring", "libc", "nix", "slab", "socket2 0.4.10", "tempfile", "tokio", "tokio-test", ] [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "walkdir" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "web-sys" version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ "windows-sys 0.52.0", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ "windows-targets 0.48.5", ] [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets 0.52.5", ] [[package]] name = "windows-targets" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ "windows_aarch64_gnullvm 0.48.5", "windows_aarch64_msvc 0.48.5", "windows_i686_gnu 0.48.5", "windows_i686_msvc 0.48.5", "windows_x86_64_gnu 0.48.5", "windows_x86_64_gnullvm 0.48.5", "windows_x86_64_msvc 0.48.5", ] [[package]] name = "windows-targets" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ "windows_aarch64_gnullvm 0.52.5", "windows_aarch64_msvc 0.52.5", "windows_i686_gnu 0.52.5", "windows_i686_gnullvm", "windows_i686_msvc 0.52.5", "windows_x86_64_gnu 0.52.5", "windows_x86_64_gnullvm 0.52.5", "windows_x86_64_msvc 0.52.5", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" [[package]] name = "windows_i686_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" tokio-uring-0.5.0/Cargo.toml0000644000000035410000000000100113060ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "tokio-uring" version = "0.5.0" authors = ["Tokio Contributors "] description = """ io-uring support for the Tokio asynchronous runtime. """ homepage = "https://tokio.rs" documentation = "https://docs.rs/tokio-uring/0.5.0/tokio-uring" readme = "README.md" keywords = [ "async", "fs", "io-uring", ] categories = [ "asynchronous", "network-programming", ] license = "MIT" repository = "https://github.com/tokio-rs/tokio-uring" [package.metadata.docs.rs] all-features = true [profile.bench] debug = 2 [[bench]] name = "lai_no_op" path = "benches/lai/no_op.rs" harness = false [[bench]] name = "criterion_no_op" path = "benches/criterion/no_op.rs" harness = false [dependencies.bytes] version = "1.0" optional = true [dependencies.futures-util] version = "0.3.26" features = ["std"] default-features = false [dependencies.io-uring] version = "0.6.0" [dependencies.libc] version = "0.2.80" [dependencies.slab] version = "0.4.2" [dependencies.socket2] version = "0.4.4" features = ["all"] [dependencies.tokio] version = "1.2" features = [ "net", "rt", "sync", ] [dev-dependencies.criterion] version = "0.4.0" [dev-dependencies.iai] version = "0.1.1" [dev-dependencies.nix] version = "0.26.1" [dev-dependencies.tempfile] version = "3.2.0" [dev-dependencies.tokio] version = "1.21.2" [dev-dependencies.tokio-test] version = "0.4.2" tokio-uring-0.5.0/Cargo.toml.orig000064400000000000000000000023711046102023000147670ustar 00000000000000[package] name = "tokio-uring" version = "0.5.0" authors = ["Tokio Contributors "] edition = "2018" readme = "README.md" license = "MIT" documentation = "https://docs.rs/tokio-uring/0.5.0/tokio-uring" repository = "https://github.com/tokio-rs/tokio-uring" homepage = "https://tokio.rs" description = """ io-uring support for the Tokio asynchronous runtime. """ categories = ["asynchronous", "network-programming"] keywords = ["async", "fs", "io-uring"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] tokio = { version = "1.2", features = ["net", "rt", "sync"] } slab = "0.4.2" libc = "0.2.80" io-uring = "0.6.0" socket2 = { version = "0.4.4", features = ["all"] } bytes = { version = "1.0", optional = true } futures-util = { version = "0.3.26", default-features = false, features = ["std"] } [dev-dependencies] tempfile = "3.2.0" tokio-test = "0.4.2" iai = "0.1.1" criterion = "0.4.0" # we use joinset in our tests tokio = "1.21.2" nix = "0.26.1" [package.metadata.docs.rs] all-features = true [profile.bench] debug = true [[bench]] name = "lai_no_op" path = "benches/lai/no_op.rs" harness = false [[bench]] name = "criterion_no_op" path = "benches/criterion/no_op.rs" harness = false tokio-uring-0.5.0/DESIGN.md000064400000000000000000000763231046102023000134030ustar 00000000000000# Summary The RFC proposes a new asynchronous Rust runtime backed by [io-uring] as a new crate: tokio-uring. The API aims to be as close to idiomatic Tokio, but will deviate when necessary to provide full access to io-uring's capabilities. It also will be compatible with existing Tokio libraries. The runtime will use an isolated thread-per-core model, and many types will be `!Send`. [io-uring]: https://kernel.dk/io_uring.pdf?source=techstories.org # Motivation Tokio's current Linux implementation uses non-blocking system calls and epoll for event notification. With epoll, a tuned TCP proxy will spend [70% to 80%][overhead] of CPU cycles outside of userspace, including cycles spent performing syscalls and copying data between the kernel and userspace. In 2019, Linux added a new API, io-uring, which reduces overhead by eliminating most syscalls and mapping memory regions used for byte buffers ahead of time. Early benchmarks comparing io-uring against epoll are promising; a TCP echo client and server implemented in C show up to [60% improvement][bench]. Though not yet measured, using io-uring instead of Tokio's thread-pool strategy will likely provide significant gains for file system operations. Because io-uring differs significantly from epoll, Tokio must provide a new set of APIs to take full advantage of the reduced overhead. However, Tokio's [stability guarantee][stability] means Tokio APIs cannot change until 2024 at the earliest. Additionally, the io-uring API is still evolving with [new functionality][tweet] planned for the near future. Instead of waiting for io-uring to mature and a Tokio 2.0 release, we will release a standalone crate dedicated to exposing an io-uring optimal API. This new crate will be able to iterate rapidly with breaking changes without violating Tokio's stability guarantee. Applications deployed exclusively on Linux kernels 5.10 or later may choose to use this crate when taking full advantage of io-uring's benefits provides measurable benefits. Examples of intended use-cases include TCP proxies, HTTP file servers, and databases. [overhead]: https://www.usenix.org/system/files/conference/nsdi14/nsdi14-paper-jeong.pdf [bench]: https://github.com/frevib/io_uring-echo-server/blob/master/benchmarks/benchmarks.md [stability]: https://tokio.rs/blog/2020-12-tokio-1-0#a-stability-guarantee [tweet]: https://twitter.com/axboe/status/1371978266806919168 # Guide-level explanation The tokio-uring crate provides a module layout similar to the Tokio crate, including modules such as net (TCP, UDP, and Unix domain sockets), fs (file system access), io (standard in, out, and error streams). It also provides a runtime module containing an io-uring specific runtime. Modules such as sync and any other containing async Rust utilities are not included by tokio-uring, in favor of the ones provided by the main Tokio crate. ```toml [dependencies] tokio-uring = "0.1" ``` ```rust fn main() { let rt = tokio_uring::runtime::Runtime::new().unwrap(); rt.block_on(async { // The rest of the application comes here. }); } ``` The application's `main` function starts a tokio-uring runtime and launches its asynchronous logic within that. The tokio-uring runtime can drive both io-uring specific resources (e.g., `TcpStream` and `TcpListener`) and Tokio resources, enabling any library written for Tokio to run within the io-uring specific runtime. ## Submit-based operations Operations on io-uring backed resources return futures, representing the operation completion. The caller awaits the future to get the operation result. ```rust let socket = my_listener.accept().await?; ``` The runtime communicates with the kernel using two single-producer, single-consumer queues. It submits operation requests, such as accepting a TCP socket, to the kernel using the submission queue. The kernel then performs the operation. On completion, the kernel returns the operation results via the completion queue and notifies the process. The `io_uring_enter` syscall flushes the submission queue and acquires any pending completion events. Upon request, this syscall may block the thread waiting for a minimum number of completion events. Both queues use memory shared between the process and the kernel and synchronize with atomic operations. Operation futures provide an asynchronous cancel function, enabling the caller to await on a clean cancellation. ```rust let accept = tcp_listener.accept(); tokio::select! { (tcp_stream, addr) = &mut accept => { ... } _ = tokio::time::sleep(Duration::from_millis(100)) => { // Accept timed out, cancel the in-flight accept. match accept.cancel().await { Ok(_) => { ... } // operation canceled gracefully Err(Completed(tcp_stream, addr)) => { // The operation completed between the timeout // and cancellation request. } Err(_) => { ... } } } } ``` In practice, operation timeouts will deserve a dedicated API to handle the boilerplate. ```rust let (tcp_stream, addr) = tcp_listener .accept() .timeout(Duration::from_millis(100)) .await? ``` The cancel and timeout function are inherent methods on operation future types. If the operation's future drops before the operation completes, the runtime will submit a cancellation request for the operation. However, cancellation is asynchronous and best-effort, the operation may still complete. In that case, the runtime discards the operation result. The queue's single-producer characteristic optimizes for a single thread to own a given submission queue. Supporting a multi-threaded runtime requires either synchronizing pushing onto the submission queue or creating a queue pair per thread. The tokio-uring runtime uses an isolated thread-per-core model, differing from Tokio's other runtimes. Unlike Tokio's primary multi-threaded runtime, there is no work-stealing. Tasks remain on the thread that spawned them for the duration of their lifecycle. Each runtime thread will own a dedicated submission and completion queue pair, and operations are submitted using the submission queue associated with the current thread. Operation completion futures will not implement `Send`, guaranteeing that they remain on the thread to receive the operation result. Interestingly, the resources, e.g., `TcpListener`, can be `Send` as long as they do not hold operation futures internally. It is possible to have two operations in-flight from a single resource associated with different queues and threads. Because operations are not `Send`, tasks awaiting these operations are also not `Send`, making them unable to be spawned using the spawn function from the Tokio crate. The tokio-uring crate will provide a spawn function that accepts not `Send` tasks. When using multiple isolated runtime threads, balancing load between them becomes challenging. Applications must take care to ensure load remains balanced across threads, and strategies tend to vary. For example, a TCP server can distribute accepted connections across multiple threads, ideally while maintaining equal load across threads. One approach is to submit "accept" operations for the same listener concurrently across all the runtime threads while ensuring overloaded workers back-off. Defining runtime load is out of this crate's scope and left to the application, though pseudocode follows. ```rust let listener = Arc::new(TcpListener::bind(...)); spawn_on_each_thread(async { loop { // Wait for this worker to have capacity. This // ensures there are a minimum number of workers // in the runtime that are flagged as with capacity // to avoid total starvation. current_worker::wait_for_capacity().await; let socket = listener.accept().await; spawn(async move { ... }); } }) ``` ## Reading and writing Read and write operations require passing ownership of buffers to the kernel. When the operation completes, the kernel returns ownership of the buffer to the caller. The caller is responsible for allocating the buffer's memory and ensuring it remains alive until the operation completes. Additionally, while the kernel owns the memory, the process may not read from or write to the buffer. By designing the Rust APIs using ownership passing, Rust enforces the requirements at compile time. The following example demonstrates reading and writing with a file resource. ```rust use tokio_uring::buf; /// The result of an operation that includes a buffer. The buffer must /// be returned to the caller when the operation completes successfully /// or fails. /// /// This is implemented as a new type to implement std::ops::Try once /// the trait is stabilized. type BufResult = (std::io::Result, buf::Slice); // Positional read and write function definitions impl File { async fn read_at(&self, buf: buf::Slice, pos: u64) -> BufResult; async fn write_at(&self, buf: buf::Slice, pos: u64) -> BufResult; } /// The caller allocates a buffer let buf = buf::IoBuf::with_capacity(4096); // Read the first 1024 bytes of the file, when `std::ops::Try` is // stable, the `?` can be applied directly on the `BufResult` type. let BufResult(res, buf) = file.read_at(0, buf.slice(0..1024)).await; // Check the result. res?; // Write some data back to the file. let BufResult(res, _) = file.write_at(1024, buf.slice(0..512)).await; res?; ``` When reading from a TCP stream, read operations remain in flight until the socket receives data, an exchange that can take an arbitrary amount of time. Suppose each read operation requires pre-allocating memory for the in-flight operation. In that case, the amount of memory consumed by the process grows linearly with the number of in-flight read operations. For applications with a large number of open connections, this can be problematic. The io-uring API supports registering buffer pools with the ring and configuring read operations to use the buffer pool instead of a dedicated per-operation buffer. When the socket receives data, the kernel checks out a buffer from the pool and returns it to the caller. After reading the data, the caller returns the buffer to the kernel. If no buffers are available, the operation pauses until the process returns a buffer to the kernel, at which time the operation completes. ```rust // ... let buf = file.read_at_prepared(0, 1024, DefaultPool).await?; // Write some data back to the file. let BufResult(res, buf) = file.write_at(1024, buf.slice(0..512)).await; res?; // Dropping the buffer lets the kernel know the buffer may // be reused. drop(buf); ``` The runtime supports initializing multiple pools containing buffers of different sizes. When submitting a read operation, the caller specifies the pool from which to draw a buffer. ```rust // Allocate a buffer pool let my_pool = BufferPool::builder() .buffer_size(16_384) .num_buffers(256) .build(); // Create the runtime let mut rt = tokio_uring::runtime::Runtime::new()?; // Provide the buffer pool to the kernel. This passes // ownership of the pool to the kernel. let pool_token = rt.provide_buffers(my_pool)?; rt.block_on(async { // ... let buf = file.read_at_prepared(0, 1024, pool_token).await?; }); ``` ## Buffer management Buffers passed to read and write operations must remain alive and pinned to a memory location while operations are in-flight, ruling out `&mut [u8]` as an option. Additionally, read and write operations may reference buffers using a pointer or, when the buffer is part of a pre-registered buffer pool, using a numeric buffer identifier. The tokio-uring crate will provide its buffer type, `IoBuf`, to use when reading and writing. ```rust pub struct IoBuf { kind: Kind, } enum Kind { /// A vector-backed buffer Vec(Vec), /// Buffer pool backed buffer. The pool is managed by io-uring. Provided(ProvidedBuf), } ``` Internally, the IoBuf type is either backed by individually heap-allocated memory or a buffer pool entry. Acquiring an `IoBuf` is done via `IoBuf::with_capacity` or checking-out an entry from a pool. ```rust // Individually heap-allocated let my_buf = IoBuf::with_capacity(4096); // Checked-out from a pool match my_buffer_pool.checkout() { Ok(io_buf) => ..., Err(e) => panic!("buffer pool empty"), } ``` On drop, if the `IoBuf` is a buffer pool member, it is checked back in. If the kernel initially checked out the buffer as part of a read operation, an io-uring operation is issued to return it. Submitting the io-uring operation requires the buffer to remain on the same thread that checked it out and is enforced by making the `IoBuf` type `!Send`. Buffer pools will also be `!Send` as they contain `IoBuf` values. `IoBuf` provides an owned slice API allowing the caller to read to and write from a buffer's sub-ranges. ```rust pub struct Slice { buf: IoBuf, begin: usize, end: usize, } impl IoBuf { fn slice(self, range: impl ops::RangeBounds) -> Slice { .. } } // Write a sub-slice of a buffer my_file.write(my_io_buf.slice(10..20)).await ``` A slice end may go past the buffer's length but not past the capacity, enabling reads to uninitialized memory. ```rust // The buffer's memory is uninitialized let mut buf = IoBuf::with_capacity(4096); let slice = buf.slice(0..100); assert_eq!(slice.len(), 0); assert_eq!(slice.capacity(), 100); // Read data from a file into the buffer let BufResult(res, slice) = my_file.read_at(slice, 0); assert_eq!(slice.len(), 100); assert_eq!(slice.capacity(), 100); ``` A trait argument for reading and writing may be possible as a future improvement. Consider a read API that takes `T: AsMut<[u8]>` for the buffer argument. ```rust async fn read>(&self, dst: T) { ... } struct MyUnstableBuf { mem: [u8; 10], } impl AsMut<[u8]> for MyUnstableBuf { fn as_mut(&mut self) -> &mut [u8] { &mut self.mem[..] } } ``` This read function takes ownership of the buffer; however, any pointer to the buffer obtained from a value becomes invalid when the value moves. Storing the buffer value at a stable location while the operation is in-flight should be sufficient to satisfy safety. ## Closing resources Idiomatically with Rust, closing a resource is performed in the drop handler, and within an asynchronous context, the drop handler should be non-blocking. Closing an io-uring resource requires canceling any in-flight operations, which is an asynchronous process. Consider an open TcpStream associated with file-descriptor (FD) 10. A task submits a read operation to the kernel, but the `TcpStream` is dropped and closed before the kernel sees it. A new `TcpStream` is accepted, and the kernel reuses FD 10. At this point, the kernel sees the original read operation request with FD 10 and completes it on the new `TcpStream`, not the intended `TcpStream`. The runtime receives the completion of the read operation. It discards the result because the associated operation future is gone, resulting in the caller losing data from the second TcpStream. This problem occurs even issuing a cancellation request for the read operation. There is no guarantee the kernel will see the cancellation request before completing the operation. There are two options for respecting both requirements, neither ideal: closing the resource in the background or blocking the thread in the drop handler. If the resource is closed in the background, the process may encounter unexpected errors, such as "too many open files." Blocking the thread to cancel in-flight operations and close the resource prevents the runtime from processing other tasks and adds latency across the system. Instead, tokio-uring will provide an explicit asynchronous close function on resource types. ```rust impl TcpStream { async fn close(self) { ... } } my_tcp_stream.close().await; ``` The resource must still tolerate the caller dropping it without being explicitly closed. In this case, tokio-uring will close the resource in the background, avoiding blocking the runtime. The drop handler will move ownership of the resource handle to the runtime and submit cancellation requests for any in-flight operation. Once all existing in-flight operations complete, the runtime will submit a close operation. If the drop handler must process closing a resource in the background, it will notify the developer by emitting a warning message using [`tracing`]. In the future, it may be possible for Rust to provide a `#[must_not_drop]` attribute. This attribute will result in compilation warnings if the developer drops a resource without using the explicit close method. [`tracing`]: https://github.com/tokio-rs/tracing ## Byte streams Byte stream types, such as `TcpStream`, will not provide read and write methods (see "Alternatives" below for reasoning). Instead, byte streams will manage their buffers internally, as described in ["Notes on io-uring"][notes], and implement buffered I/O traits, such as AsyncBufRead. The caller starts by waiting for the byte stream to fill its internal buffer, reads the data, and marks the data as consumed. [notes]: https://without.boats/blog/io-uring/ ```rust // `fill_buf()` is provided by `AsyncBufRead` let data: &[u8] = my_stream.fill_buf().await?; println!("Got {} bytes", data.len()); // Consume the data my_stream.consume(data.len()); ``` Internally, byte streams submit read operations using the default buffer pool. Additional methods exist to take and place buffers, supporting zero-copy piping between two byte streams. ```rust my_tcp_stream.fill_buf().await?; let buf: IoBuf = my_tcp_stream.take_read_buf(); // Mutate `buf` if needed here. my_other_stream.place_write_buf(buf); my_other_stream.flush().await?; ``` Implementing buffer management on the `TcpStream` type requires tracking the in-flight read and write operations, making the `TcpStream` type `!Send`. Sending a `TcpStream` across threads is doable by first converting the io-uring `TcpStream` to a standard library `TcpStream`, sending that value to a new thread, and converting it back to an io-uring `TcpStream`. Unlike the standard library, the `File` type does not expose a byte stream directly. Instead, the caller requests a read or write stream, making it possible to support multiple concurrent streams. Each file stream maintains its cursor and issues positional read and write operations based on the cursor. ```rust let read_stream = my_file.read_stream(); let write_stream = my_file.write_stream(); read_stream.fill_buf().await? let buf: IoBuf = read_stream.take_read_buf(); write_stream.place_write_buf(buf); write_stream.flush().await?; // Because `read_stream` and `write_stream` maintain separate // cursors, `my_file` is unchanged at the end of this example. ``` Byte streams may have a configurable number of concurrent in-flight operations. Achieving maximum throughput [requires configuring][modern-storage] this value to take advantage of the underlying hardware's characteristics. [modern-storage]: https://itnext.io/modern-storage-is-plenty-fast-it-is-the-apis-that-are-bad-6a68319fbc1a ## Traits The tokio-uring crate will not expose any traits. The crate does not aim to be a point of abstraction for submission-based I/O models. Instead, to provide compatibility with the existing Rust asynchronous I/O ecosystem, byte stream types, such as `TcpStream`, will implement Tokio's [AsyncRead], [AsyncWrite], and [AsyncBufRead] traits. Using these traits requires an additional copy between the caller's buffer and the byte stream's internal buffer compared to taking and placing buffers. [AsyncRead]: https://docs.rs/tokio/1/tokio/io/trait.AsyncRead.html [AsyncWrite]: https://docs.rs/tokio/1/tokio/io/trait.AsyncWrite.html [AsyncBufRead]: https://docs.rs/tokio/1/tokio/io/trait.AsyncBufRead.html # Implementation details Creating the tokio-uring runtime initializes the io-uring submission and completion queues and a Tokio current-thread epoll-based runtime. Instead of waiting on completion events by blocking the thread on the io-uring completion queue, the tokio-uring runtime registers the completion queue with the epoll handle. By building tokio-uring on top of Tokio's runtime, existing Tokio ecosystem crates can work with the tokio-uring runtime. When the kernel pushes a completion event onto the completion queue, "epoll_wait" unblocks and returns a readiness event. The Tokio current-thread runtime then polls the io-uring driver task, draining the completion queue and notifying completion futures. Like Tokio, using an I/O type does not require explicitly referencing the runtime. Operations access the current runtime via a thread-local variable. Requiring a handle would be intrusive as the application must pass the handle throughout the code. Additionally, an explicit handle would require a pointer-sized struct, causing binary bloat. The disadvantage with such an approach is that there is no way to guarantee operation submission happens within the runtime context at compile time. Attempting to use a tokio-uring resource from outside of the runtime will result in a panic. ```rust use tokio_uring::runtime::Runtime; use tokio_uring::net::TcpListener; fn main() { // Binding a TcpListener does not require access to the runtime. let listener = TcpListener::bind("0.0.0.0:1234".parse().unwrap()); let rt = Runtime::new().unwrap(); rt.block_on(async { // This works, as `block_on` sets the thread-local variable. let _ = listener.accept().await; }); // BOOM: panics because called outside of the runtime futures::future::block_on(async { let _ = listener.accept().await; }); } ``` ## Operation state Most io-uring operations reference resources, such as buffers and file descriptors, for the kernel to use. These resources must remain available while the operation is in-flight. Any memory referenced by pointers must remain allocated, and the process must not access the memory. Because asynchronous Rust allows dropping futures at any time, the operation futures may not own data referenced by the in-flight operation. The tokio-uring runtime will take ownership and store resources referenced by operations while they are in-flight. ```rust struct IoUringDriver { // Storage for state referenced by in-flight operations in_flight_operations: Slab, // The io-uring submission and completion queues. queues: IoUringQueues, } struct Operation { // Resources referenced by the kernel, this must stay // available until the operation completes. state: State, lifecycle: Lifecycle, } enum State { // An in-flight read operation, `None` when reading into // a kernel-owned buffer pool Read { buffer: Option }, Write { buffer: buf::Slice }, // Accept a TCP socket Accept { ... } Close { ... } // ... other operations } enum Lifecycle { /// The operation has been submitted to uring and is currently in-flight Submitted, /// The submitter is waiting for the completion of the operation Waiting(Waker), /// The submitter no longer has interest in the operation result. Ignored, /// The operation has completed. The completion result is stored. Completed(Completion), } // Completion result returned from the kernel via the completion queue struct Completion { result: io::Result, flags: u32, } ``` The `Operation` struct holds any data referenced by the operation submitted to the kernel, preventing the data from being dropped early. The lifecycle field tracks if the operation is in-flight, has completed, or if the associated future has dropped. The lifecycle field also passes operation results from the driver to the future via the `Completed` variant. When a task starts a read operation, the runtime allocates an entry in the in-flight operation store, storing the buffer and initializing the lifecycle to `Submitted`. The runtime then pushes the operation to the submission queue but does not synchronize it with the kernel. Synchronization happens once the task yields to the runtime, enabling the task to submit multiple operations without synchronizing each one. Delaying synchronization can add a small amount of latency. While not enforced, tasks should execute for no more than 500 microseconds before yielding. When the runtime receives completion results, it must complete the associated operation future. The runtime loads the in-flight operation state, stores the result, transitions the lifecycle to `Completed`, and notifies the waker. The next time the caller's task executes, it polls the operation's future which completes and returns the stored result. If the operation's future drops before the operation completes and the operation request is still in the submission queue, the drop function removes the request. Otherwise, it sets the lifecycle to `Ignored` and submits a cancellation request to the kernel. The cancellation request will attempt to terminate the operation, causing it to complete immediately with an error. Cancellation is best-effort; the operation may or may not terminate early. If the operation does complete, the runtime discards the result. The runtime maintains the internal operation state until the completion as this state owns data the kernel may be referencing. ## Read operations Depending on the flavor, read operation can have multiple io-uring operation representations. The io-uring API provides two different read operation codes: `IORING_OP_READ` and `IORING_OP_READ_FIXED`. The first accepts a buffer as a pointer, and the second takes a buffer as an identifier referencing a buffer pool and entry. Additionally, the `IORING_OP_READ` operation can accept a null buffer pointer, indicating that io-uring should pick a buffer from a provided buffer pool. The tokio-uring runtime will determine which opcode to use based on the kind of `IoBuf` kind it receives. The `Vec` kind maps to the `IORING_OP_READ` opcode, and the `Provided` maps to `IORING_OP_READ_FIXED`. ## Prior art [Glommio] is an existing asynchronous Rust runtime built on top of io-uring. This proposal draws heavily from ideas presented there, tweaking concepts to line up with Tokio's idioms. [`@withoutboats`][boats] has explored the space with [ringbahn]. The tokio-uring crate is built on the pure-rust [io-uring] crate, authored by [@quininer]. This crate provides a low-level interface to the io-uring syscalls. [Glommio]: https://github.com/DataDog/glommio [boats]: https://github.com/withoutboats [ringbahn]: https://github.com/ringbahn/ringbahn [@quininer]: https://github.com/quininer/ [io-uring]: https://github.com/tokio-rs/io-uring/ ## Future work The main Tokio crate will likely adopts concepts from tokio-uring in the future. The most obvious area is Tokio's file system API, currently implemented using a thread-pool. The current API would remain, and the implementation would use io-uring when supported by the operating system. The tokio-uring APIs may form the basis for a Tokio 2.0 release, though this cannot happen until 2024 at the earliest. As an intermittent step, the tokio-uring crate could explore supporting alternate backends, such as epoll, kqueue, and iocp. The focus will always remain on io-uring. The current design does not cover registering file descriptors with the kernel, which improves file system access performance. After registering a file descriptor with io-uring, it must not move to a different thread, implying `!Send`. Because many resource types are Send, a likely path for supporting the feature is adding new types to represent the registered state. For example, `File` could have a `RegisteredFile` analog. Making this change would be forwards compatible and not impact the current design. ## Alternatives ### Use a work-stealing scheduler An alternative approach could use a work-stealing scheduler, allowing underutilized worker threads to steal work from overloaded workers. While work-stealing is an efficient strategy for balancing load across worker threads, required synchronization adds overhead. [An earlier article] on the Tokio blog includes an overview of various scheduling strategies. The tokio-uring crate targets use-cases that can benefit from taking advantage of io-uring at the expense of discarding Tokio's portable API. These use cases will also benefit from reduced synchronization overhead and fine-grained control over thread load balancing strategies. [An earlier article]: https://tokio.rs/blog/2019-10-scheduler ### Read and write methods on TcpStream Read and write operations on byte stream types, such as `TcpStream`, are stateful. Each operation operates on the next chunk of the stream, advancing a logical cursor. The combination of submit-based operations with drop-to-cancel semantics presents a challenge. Consider the following. ```rust let buf = IoBuf::with_capacity(4096); // This read will be canceled. select! { _ = my_tcp_stream.read(buf.slice(..)) => unreachable!(), _ = future::ready(()) => {} } let buf = IoBuf::with_capacity(4096); let (res, buf) = my_tcp_stream.read(buf.slice(..)).await; ``` Dropping the first read operation cancels it, and because it never completes, the second read operation should read the first packet of data from the TCP stream. However, the kernel may have already completed the first read operation before seeing the cancellation request. A naive runtime implementation would drop the completion result of any canceled operation, which would result in losing data. Instead, the runtime could preserve the data from the first read operation and return it as part of the second read. The process would not lose data, but the runtime would need to perform an extra copy or return the caller a different buffer than the one it submitted. The proposed API is not vulnerable to this issue as resources track their operations, preventing them from being dropped as long as the resource is open. ### Expose a raw io-uring operation submission API The proposed tokio-uring API does not include a strategy for the user to submit custom io-uring operations. Any raw API would be unsafe or would require the runtime to support taking opaque data as a trait object. Given that io-uring has a well-defined set of supported options, tokio-uring opts to support each operation explicitly. The application also can create its own set of io-uring queues using the io-uring crate directly. ### Load-balancing spawn function The tokio-uring crate omits a spawn function that balances tasks across threads, leaving this to future work. Instead, the application should manage its load balancing. While there are a few common balancing strategies, such as round-robin, randomized, and power of two choices, there is no one-size-fits-all strategy. Additionally, some load-balancing methods require application-specific metrics such as worker load. Additionally, consider pseudocode representing a typical accept loop pattern. ```rust loop { let (socket, _) = listener.accept().await?; spawn(async move { let request = read_request(&socket).await; let response = process(request).await; write_response(response, &socket).await; }); } ``` Often, the request data is already buffered when the socket is accepted. If the spawn call results in the task moving to a different worker thread, it will delay reading the request due to cross-thread synchronization. Additionally, it may be possible to batch an accept operation with an operation that reads from the accepted socket in the future, complicating moving the accepted socket to a new thread. The details of such a strategy are still not finalized but may impact a load-balancing spawn function. tokio-uring-0.5.0/LICENSE000064400000000000000000000020371046102023000131040ustar 00000000000000Copyright (c) 2021 Carl Lerche Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. tokio-uring-0.5.0/README.md000064400000000000000000000041461046102023000133610ustar 00000000000000# tokio-uring This crate provides [`io-uring`] for [Tokio] by exposing a new Runtime that is compatible with Tokio but also can drive [`io-uring`]-backed resources. Any library that works with [Tokio] also works with `tokio-uring`. The crate provides new resource types that work with [`io-uring`]. [`io-uring`]: https://unixism.net/loti/ [Tokio]: https://github.com/tokio-rs/tokio [`fs::File`]: https://docs.rs/tokio-uring/latest/tokio_uring/fs/struct.File.html [API Docs](https://docs.rs/tokio-uring/latest/tokio_uring) | [Chat](https://discord.gg/tokio) # Getting started Using `tokio-uring` requires starting a [`tokio-uring`] runtime. This runtime internally manages the main Tokio runtime and a `io-uring` driver. In your Cargo.toml: ```toml [dependencies] tokio-uring = { version = "0.5.0" } ``` In your main.rs: ```rust use tokio_uring::fs::File; fn main() -> Result<(), Box> { tokio_uring::start(async { // Open a file let file = File::open("hello.txt").await?; let buf = vec![0; 4096]; // Read some data, the buffer is passed by ownership and // submitted to the kernel. When the operation completes, // we get the buffer back. let (res, buf) = file.read_at(buf, 0).await; let n = res?; // Display the contents println!("{:?}", &buf[..n]); Ok(()) }) } ``` ## Requirements `tokio-uring` requires a very recent linux kernel. (Not even all kernels with io_uring support will work) In particular `5.4.0` does not work (This is standard on Ubuntu 20.4). However `5.11.0` (the ubuntu hwe image) does work. ## Project status The `tokio-uring` project is still very young. Currently, we are focusing on supporting filesystem and network operations. Eventually, we will add safe APIs for all io-uring compatible operations. ## License This project is licensed under the [MIT license]. [MIT license]: LICENSE ### Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in tokio-uring by you, shall be licensed as MIT, without any additional terms or conditions. tokio-uring-0.5.0/benches/criterion/no_op.rs000064400000000000000000000041731046102023000171670ustar 00000000000000use criterion::{ criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode, Throughput, }; use std::time::{Duration, Instant}; use tokio::task::JoinSet; #[derive(Clone)] struct Options { iterations: usize, concurrency: usize, sq_size: usize, cq_size: usize, } impl Default for Options { fn default() -> Self { Self { iterations: 100000, concurrency: 1, sq_size: 128, cq_size: 256, } } } fn run_no_ops(opts: &Options, count: u64) -> Duration { let mut ring_opts = tokio_uring::uring_builder(); ring_opts.setup_cqsize(opts.cq_size as _); let mut m = Duration::ZERO; // Run the required number of iterations for _ in 0..count { m += tokio_uring::builder() .entries(opts.sq_size as _) .uring_builder(&ring_opts) .start(async move { let mut js = JoinSet::new(); for _ in 0..opts.iterations { js.spawn_local(tokio_uring::no_op()); } let start = Instant::now(); while let Some(res) = js.join_next().await { res.unwrap().unwrap(); } start.elapsed() }) } m } fn bench(c: &mut Criterion) { let mut group = c.benchmark_group("no_op"); let mut opts = Options::default(); for concurrency in [1, 32, 64, 256].iter() { opts.concurrency = *concurrency; // We perform long running benchmarks: this is the best mode group.sampling_mode(SamplingMode::Flat); group.throughput(Throughput::Elements(opts.iterations as u64)); group.bench_with_input( BenchmarkId::from_parameter(concurrency), &opts, |b, opts| { // Custom iterator used because we don't expose access to runtime, // which is required to do async benchmarking with criterion b.iter_custom(move |iter| run_no_ops(opts, iter)); }, ); } group.finish(); } criterion_group!(benches, bench); criterion_main!(benches); tokio-uring-0.5.0/benches/lai/no_op.rs000064400000000000000000000040231046102023000157300ustar 00000000000000use iai::black_box; use tokio::task::JoinSet; #[derive(Clone)] struct Options { iterations: usize, concurrency: usize, sq_size: usize, cq_size: usize, } impl Default for Options { fn default() -> Self { Self { iterations: 100000, concurrency: 1, sq_size: 64, cq_size: 256, } } } fn runtime_only() -> Result<(), Box> { let opts = Options::default(); let mut ring_opts = tokio_uring::uring_builder(); ring_opts.setup_cqsize(opts.cq_size as _); tokio_uring::builder() .entries(opts.sq_size as _) .uring_builder(&ring_opts) .start(async move { black_box(Ok(())) }) } fn run_no_ops(opts: Options) -> Result<(), Box> { let mut ring_opts = tokio_uring::uring_builder(); ring_opts.setup_cqsize(opts.cq_size as _); tokio_uring::builder() .entries(opts.sq_size as _) .uring_builder(&ring_opts) .start(async move { let mut js = JoinSet::new(); for _ in 0..opts.iterations { js.spawn_local(tokio_uring::no_op()); } while let Some(res) = js.join_next().await { res.unwrap().unwrap(); } Ok(()) }) } // This provides a baseline for estimating op overhead on top of this fn no_op_x1() -> Result<(), Box> { let opts = Options::default(); run_no_ops(black_box(opts)) } fn no_op_x32() -> Result<(), Box> { let mut opts = Options::default(); opts.concurrency = 32; run_no_ops(black_box(opts)) } fn no_op_x64() -> Result<(), Box> { let mut opts = Options::default(); opts.concurrency = 64; run_no_ops(black_box(opts)) } fn no_op_x256() -> Result<(), Box> { let mut opts = Options::default(); opts.concurrency = 256; run_no_ops(black_box(opts)) } iai::main!(runtime_only, no_op_x1, no_op_x32, no_op_x64, no_op_x256); tokio-uring-0.5.0/examples/cat.rs000064400000000000000000000017101046102023000150270ustar 00000000000000use std::{ io::Write, {env, io}, }; use tokio_uring::fs::File; fn main() { // The file to `cat` is passed as a CLI argument let args: Vec<_> = env::args().collect(); if args.len() <= 1 { panic!("no path specified"); } let path = &args[1]; // Lock stdout let out = io::stdout(); let mut out = out.lock(); tokio_uring::start(async { // Open the file without blocking let file = File::open(path).await.unwrap(); let mut buf = vec![0; 16 * 1_024]; // Track the current position in the file; let mut pos = 0; loop { // Read a chunk let (res, b) = file.read_at(buf, pos).await; let n = res.unwrap(); if n == 0 { break; } out.write_all(&b[..n]).unwrap(); pos += n as u64; buf = b; } // Include a new line println!(); }); } tokio-uring-0.5.0/examples/mix.rs000064400000000000000000000026721046102023000150650ustar 00000000000000//! Shows how use Tokio types from the `tokio-uring` runtime. //! //! Serve a single file over TCP use std::env; use tokio_uring::{fs::File, net::TcpListener}; fn main() { // The file to serve over TCP is passed as a CLI argument let args: Vec<_> = env::args().collect(); if args.len() <= 1 { panic!("no path specified"); } tokio_uring::start(async { // Start a TCP listener let listener = TcpListener::bind("0.0.0.0:8080".parse().unwrap()).unwrap(); // Accept new sockets loop { let (socket, _) = listener.accept().await.unwrap(); let path = args[1].clone(); // Spawn a task to send the file back to the socket tokio_uring::spawn(async move { // Open the file without blocking let file = File::open(path).await.unwrap(); let mut buf = vec![0; 16 * 1_024]; // Track the current position in the file; let mut pos = 0; loop { // Read a chunk let (res, b) = file.read_at(buf, pos).await; let n = res.unwrap(); if n == 0 { break; } let (res, b) = socket.write(b).submit().await; pos += res.unwrap() as u64; buf = b; } }); } }); } tokio-uring-0.5.0/examples/tcp_listener.rs000064400000000000000000000027551046102023000167650ustar 00000000000000use std::{env, net::SocketAddr}; use tokio_uring::net::TcpListener; fn main() { let args: Vec<_> = env::args().collect(); let socket_addr = if args.len() <= 1 { "127.0.0.1:0" } else { args[1].as_ref() }; let socket_addr: SocketAddr = socket_addr.parse().unwrap(); tokio_uring::start(async { let listener = TcpListener::bind(socket_addr).unwrap(); println!("Listening on {}", listener.local_addr().unwrap()); loop { let (stream, socket_addr) = listener.accept().await.unwrap(); tokio_uring::spawn(async move { // implement ping-pong loop use tokio_uring::buf::BoundedBuf; // for slice() println!("{} connected", socket_addr); let mut n = 0; let mut buf = vec![0u8; 4096]; loop { let (result, nbuf) = stream.read(buf).await; buf = nbuf; let read = result.unwrap(); if read == 0 { println!("{} closed, {} total ping-ponged", socket_addr, n); break; } let (res, slice) = stream.write_all(buf.slice(..read)).await; res.unwrap(); buf = slice.into_inner(); println!("{} all {} bytes ping-ponged", socket_addr, read); n += read; } }); } }); } tokio-uring-0.5.0/examples/tcp_listener_fixed_buffers.rs000064400000000000000000000065021046102023000216520ustar 00000000000000// An example of an echo server using fixed buffers for reading and writing TCP streams. // A buffer registry size of two is created, to allow a maximum of two simultaneous connections. use std::{env, iter, net::SocketAddr}; use tokio_uring::{ buf::{fixed::FixedBufRegistry, BoundedBuf, IoBufMut}, net::{TcpListener, TcpStream}, }; // BoundedBuf for slice method // A contrived example, where just two fixed buffers are created. const POOL_SIZE: usize = 2; fn main() { let args: Vec<_> = env::args().collect(); let socket_addr = if args.len() <= 1 { "127.0.0.1:0" } else { args[1].as_ref() }; let socket_addr: SocketAddr = socket_addr.parse().unwrap(); tokio_uring::start(accept_loop(socket_addr)); } // Bind to address and accept connections, spawning an echo handler for each connection. async fn accept_loop(listen_addr: SocketAddr) { let listener = TcpListener::bind(listen_addr).unwrap(); println!( "Listening on {}, fixed buffer pool size only {POOL_SIZE}", listener.local_addr().unwrap() ); // Other iterators may be passed to FixedBufRegistry::new also. let registry = FixedBufRegistry::new(iter::repeat(vec![0; 4096]).take(POOL_SIZE)); // Register the buffers with the kernel, asserting the syscall passed. registry.register().unwrap(); loop { let (stream, peer) = listener.accept().await.unwrap(); tokio_uring::spawn(echo_handler(stream, peer, registry.clone())); } } // A loop that echoes input to output. Use one fixed buffer for receiving and sending the response // back. Once the connection is closed, the function returns and the fixed buffer is dropped, // getting the fixed buffer index returned to the available pool kept by the registry. async fn echo_handler( stream: TcpStream, peer: SocketAddr, registry: FixedBufRegistry, ) { println!("peer {} connected", peer); // Get one of the two fixed buffers. // If neither is unavailable, print reason and return immediately, dropping this connection; // be nice and shutdown the connection before dropping it so the client sees the connection is // closed immediately. let mut fbuf = registry.check_out(0); if fbuf.is_none() { fbuf = registry.check_out(1); }; if fbuf.is_none() { let _ = stream.shutdown(std::net::Shutdown::Write); println!("peer {} closed, no fixed buffers available", peer); return; }; let mut fbuf = fbuf.unwrap(); let mut n = 0; loop { // Each time through the loop, use fbuf and then get it back for the next // iteration. let (result, fbuf1) = stream.read_fixed(fbuf).await; fbuf = { let read = result.unwrap(); if read == 0 { break; } assert_eq!(4096, fbuf1.len()); // To prove a point. let (res, nslice) = stream.write_fixed_all(fbuf1.slice(..read)).await; res.unwrap(); println!("peer {} all {} bytes ping-ponged", peer, read); n += read; // Important. One of the points of this example. nslice.into_inner() // Return the buffer we started with. }; } let _ = stream.shutdown(std::net::Shutdown::Write); println!("peer {} closed, {} total ping-ponged", peer, n); } tokio-uring-0.5.0/examples/tcp_stream.rs000064400000000000000000000012111046102023000164150ustar 00000000000000use std::{env, net::SocketAddr}; use tokio_uring::net::TcpStream; fn main() { let args: Vec<_> = env::args().collect(); if args.len() <= 1 { panic!("no addr specified"); } let socket_addr: SocketAddr = args[1].parse().unwrap(); tokio_uring::start(async { let stream = TcpStream::connect(socket_addr).await.unwrap(); let buf = vec![1u8; 128]; let (result, buf) = stream.write(buf).submit().await; println!("written: {}", result.unwrap()); let (result, buf) = stream.read(buf).await; let read = result.unwrap(); println!("read: {:?}", &buf[..read]); }); } tokio-uring-0.5.0/examples/test_create_dir_all.rs000064400000000000000000000215671046102023000202640ustar 00000000000000use std::io; use std::path::Path; use tokio_uring::fs; fn tests() -> std::slice::Iter<'static, Expected<'static>> { [ // // A number of Fail cases because of permissions (assuming not running as root). // Expected::Fail(Op::create_dir("/no-good")), Expected::Fail(Op::create_dir("/no-good/lots/more")), Expected::Fail(Op::create_dir_all("/no-good")), Expected::Fail(Op::create_dir_all("/no-good/lots/more")), Expected::Fail(Op::DirBuilder("/no-good")), Expected::Fail(Op::DirBuilder2("/no-good/lots/more", false, 0o777)), Expected::Fail(Op::DirBuilder2("/no-good/lots/more", true, 0o777)), // // A sequence of steps where assumption is /tmp exists and /tmp/test-good does not. // Expected::Pass(Op::create_dir("/tmp/test-good")), Expected::Pass(Op::statx("/tmp/test-good")), Expected::Pass(Op::StatxBuilder("/tmp/test-good")), Expected::Pass(Op::StatxBuilder2("/tmp", "test-good")), Expected::Pass(Op::StatxBuilder2("/tmp", "./test-good")), Expected::Pass(Op::StatxBuilder2("/tmp/", "./test-good")), Expected::Pass(Op::StatxBuilder2("/etc/", "/tmp/test-good")), Expected::Pass(Op::is_dir("/tmp/test-good")), Expected::Fail(Op::is_regfile("/tmp/test-good")), Expected::Pass(Op::create_dir("/tmp/test-good/x1")), Expected::Fail(Op::create_dir("/tmp/test-good/x1")), Expected::Pass(Op::remove_dir("/tmp/test-good/x1")), Expected::Fail(Op::remove_dir("/tmp/test-good/x1")), Expected::Pass(Op::remove_dir("/tmp/test-good")), Expected::Pass(Op::create_dir_all("/tmp/test-good/lots/lots/more")), Expected::Pass(Op::create_dir_all("/tmp/test-good/lots/lots/more")), Expected::Pass(Op::remove_dir("/tmp/test-good/lots/lots/more")), Expected::Pass(Op::remove_dir("/tmp/test-good/lots/lots")), Expected::Pass(Op::remove_dir("/tmp/test-good/lots")), Expected::Pass(Op::remove_dir("/tmp/test-good")), Expected::Fail(Op::statx("/tmp/test-good")), Expected::Fail(Op::StatxBuilder("/tmp/test-good")), // // A sequence that tests when mode is passed as 0, the directory can't be written to. // Expected::Pass(Op::DirBuilder2("/tmp/test-good", true, 0)), Expected::Pass(Op::matches_mode("/tmp/test-good", 0)), Expected::Fail(Op::create_dir("/tmp/test-good/x1")), Expected::Pass(Op::remove_dir("/tmp/test-good")), // // A sequence that tests creation of a user rwx only directory // Expected::Pass(Op::DirBuilder2("/tmp/test-good", true, 0o700)), Expected::Pass(Op::matches_mode("/tmp/test-good", 0o700)), Expected::Pass(Op::create_dir("/tmp/test-good/x1")), Expected::Pass(Op::remove_dir("/tmp/test-good/x1")), Expected::Pass(Op::remove_dir("/tmp/test-good")), // // Same sequence but with recursive = false // Expected::Pass(Op::DirBuilder2("/tmp/test-good", false, 0)), Expected::Fail(Op::create_dir("/tmp/test-good/x1")), Expected::Pass(Op::remove_dir("/tmp/test-good")), // // Some file operations // Expected::Pass(Op::touch_file("/tmp/test-good-file")), Expected::Pass(Op::is_regfile("/tmp/test-good-file")), Expected::Fail(Op::is_dir("/tmp/test-good-file")), Expected::Pass(Op::remove_file("/tmp/test-good-file")), Expected::Fail(Op::is_regfile("/tmp/test-good-file")), Expected::Fail(Op::is_dir("/tmp/test-good-file")), ] .iter() } type OpPath<'a> = &'a str; #[allow(non_camel_case_types)] #[allow(dead_code)] #[derive(Debug)] enum Op<'a> { statx(OpPath<'a>), StatxBuilder(OpPath<'a>), StatxBuilder2(OpPath<'a>, OpPath<'a>), matches_mode(OpPath<'a>, u16), is_regfile(OpPath<'a>), is_dir(OpPath<'a>), touch_file(OpPath<'a>), create_dir(OpPath<'a>), create_dir_all(OpPath<'a>), DirBuilder(OpPath<'a>), DirBuilder2(OpPath<'a>, bool, u32), remove_file(OpPath<'a>), remove_dir(OpPath<'a>), } #[derive(Debug)] enum Expected<'a> { Pass(Op<'a>), Fail(Op<'a>), } async fn main1() -> io::Result<()> { let (mut as_expected, mut unexpected) = (0, 0); for test in tests() { let (expect_to_pass, op) = match test { Expected::Pass(op) => (true, op), Expected::Fail(op) => (false, op), }; let res = match op { Op::statx(path) => statx(path).await, Op::StatxBuilder(path) => statx_builder(path).await, Op::StatxBuilder2(path, rel_path) => statx_builder2(path, rel_path).await, Op::matches_mode(path, mode) => matches_mode(path, *mode).await, Op::is_regfile(path) => is_regfile(path).await, Op::is_dir(path) => is_dir(path).await, Op::touch_file(path) => touch_file(path).await, Op::create_dir(path) => fs::create_dir(path).await, Op::create_dir_all(path) => fs::create_dir_all(path).await, Op::DirBuilder(path) => fs::DirBuilder::new().create(path).await, Op::DirBuilder2(path, recursive, mode) => { fs::DirBuilder::new() .recursive(*recursive) .mode(*mode) .create(path) .await } Op::remove_file(path) => fs::remove_file(path).await, Op::remove_dir(path) => fs::remove_dir(path).await, }; let verbose = true; match res { Ok(_) => { if expect_to_pass { as_expected += 1; if verbose { println!("Success: {op:?} passed."); } } else { unexpected += 1; println!("Failure: {op:?} expected to fail but passed."); } } Err(e) => { if expect_to_pass { unexpected += 1; println!("Failure: {op:?} expected to pass but failed with error \"{e}\"."); } else { as_expected += 1; if verbose { println!("Success: {op:?} expected to fail and did with error \"{e}\"."); } } } } } println!("{as_expected} as_expected, {unexpected} unexpected"); if unexpected == 0 { Ok(()) } else { Err(std::io::Error::new( std::io::ErrorKind::Other, format!("{unexpected} unexpected result(s)"), )) } } async fn statx>(path: P) -> io::Result<()> { let _statx = tokio_uring::fs::statx(path).await?; Ok(()) } async fn statx_builder>(path: P) -> io::Result<()> { let _statx = tokio_uring::fs::StatxBuilder::new() .pathname(path)? .statx() .await?; Ok(()) } async fn statx_builder2>(dir_path: P, rel_path: P) -> io::Result<()> { // This shows the power of combining an open file, presumably a directory, and the relative // path to have the statx operation return the meta data for the child of the opened directory // descriptor. let f = tokio_uring::fs::File::open(dir_path).await?; // Fetch file metadata let res = f.statx_builder().pathname(rel_path)?.statx().await; // Close the file f.close().await?; res.map(|_| ()) } async fn matches_mode>(path: P, want_mode: u16) -> io::Result<()> { let statx = tokio_uring::fs::StatxBuilder::new() .mask(libc::STATX_MODE) .pathname(path)? .statx() .await?; let got_mode = statx.stx_mode & 0o7777; if want_mode == got_mode { Ok(()) } else { Err(std::io::Error::new( std::io::ErrorKind::Other, format!("want mode {want_mode:#o}, got mode {got_mode:#o}"), )) } } async fn touch_file>(path: P) -> io::Result<()> { let file = tokio_uring::fs::OpenOptions::new() .append(true) .create(true) .open(path) .await?; file.close().await } async fn is_regfile>(path: P) -> io::Result<()> { let (_is_dir, is_regfile) = tokio_uring::fs::is_dir_regfile(path).await; if is_regfile { Ok(()) } else { Err(std::io::Error::new( std::io::ErrorKind::Other, "not regular file", )) } } async fn is_dir>(path: P) -> io::Result<()> { let (is_dir, _is_regfile) = tokio_uring::fs::is_dir_regfile(path).await; if is_dir { Ok(()) } else { Err(std::io::Error::new( std::io::ErrorKind::Other, "not directory", )) } } fn main() { tokio_uring::start(async { if let Err(e) = main1().await { println!("error: {}", e); } }); } tokio-uring-0.5.0/examples/udp_socket.rs000064400000000000000000000013471046102023000164260ustar 00000000000000use std::{env, net::SocketAddr}; use tokio_uring::net::UdpSocket; fn main() { let args: Vec<_> = env::args().collect(); if args.len() <= 1 { panic!("no addr specified"); } let socket_addr: SocketAddr = args[1].parse().unwrap(); tokio_uring::start(async { let socket = UdpSocket::bind(socket_addr).await.unwrap(); let buf = vec![0u8; 128]; let (result, mut buf) = socket.recv_from(buf).await; let (read, socket_addr) = result.unwrap(); buf.resize(read, 0); println!("received from {}: {:?}", socket_addr, &buf[..]); let (result, _buf) = socket.send_to(buf, socket_addr).await; println!("sent to {}: {}", socket_addr, result.unwrap()); }); } tokio-uring-0.5.0/examples/unix_listener.rs000064400000000000000000000016071046102023000171550ustar 00000000000000use std::env; use tokio_uring::net::UnixListener; fn main() { let args: Vec<_> = env::args().collect(); if args.len() <= 1 { panic!("no addr specified"); } let socket_addr: String = args[1].clone(); tokio_uring::start(async { let listener = UnixListener::bind(&socket_addr).unwrap(); loop { let stream = listener.accept().await.unwrap(); let socket_addr = socket_addr.clone(); tokio_uring::spawn(async move { let buf = vec![1u8; 128]; let (result, buf) = stream.write(buf).submit().await; println!("written to {}: {}", &socket_addr, result.unwrap()); let (result, buf) = stream.read(buf).await; let read = result.unwrap(); println!("read from {}: {:?}", &socket_addr, &buf[..read]); }); } }); } tokio-uring-0.5.0/examples/unix_stream.rs000064400000000000000000000011451046102023000166200ustar 00000000000000use std::env; use tokio_uring::net::UnixStream; fn main() { let args: Vec<_> = env::args().collect(); if args.len() <= 1 { panic!("no addr specified"); } let socket_addr: &String = &args[1]; tokio_uring::start(async { let stream = UnixStream::connect(socket_addr).await.unwrap(); let buf = vec![1u8; 128]; let (result, buf) = stream.write(buf).submit().await; println!("written: {}", result.unwrap()); let (result, buf) = stream.read(buf).await; let read = result.unwrap(); println!("read: {:?}", &buf[..read]); }); } tokio-uring-0.5.0/examples/wrk-bench.rs000064400000000000000000000022251046102023000161420ustar 00000000000000use std::io; use std::rc::Rc; use tokio::task::JoinHandle; pub const RESPONSE: &[u8] = b"HTTP/1.1 200 OK\nContent-Type: text/plain\nContent-Length: 12\n\nHello world!"; pub const ADDRESS: &str = "127.0.0.1:8080"; fn main() -> io::Result<()> { tokio_uring::start(async { let mut tasks = Vec::with_capacity(16); let listener = Rc::new(tokio_uring::net::TcpListener::bind( ADDRESS.parse().unwrap(), )?); for _ in 0..16 { let listener = listener.clone(); let task: JoinHandle> = tokio::task::spawn_local(async move { loop { let (stream, _) = listener.accept().await?; tokio_uring::spawn(async move { let (result, _) = stream.write(RESPONSE).submit().await; if let Err(err) = result { eprintln!("Client connection failed: {}", err); } }); } }); tasks.push(task); } for t in tasks { t.await.unwrap()?; } Ok(()) }) } tokio-uring-0.5.0/src/buf/bounded.rs000064400000000000000000000137331046102023000154350ustar 00000000000000use super::{IoBuf, IoBufMut, Slice}; use std::ops; use std::ptr; /// A possibly bounded view into an owned [`IoBuf`] buffer. /// /// Because buffers are passed by ownership to the runtime, Rust's slice API /// (`&buf[..]`) cannot be used. Instead, `tokio-uring` provides an owned slice /// API: [`.slice()`]. The method takes ownership of the buffer and returns a /// [`Slice`] value that tracks the requested range. /// /// This trait provides a generic way to use buffers and `Slice` views /// into such buffers with `io-uring` operations. /// /// [`.slice()`]: BoundedBuf::slice pub trait BoundedBuf: Unpin + 'static { /// The type of the underlying buffer. type Buf: IoBuf; /// The type representing the range bounds of the view. type Bounds: ops::RangeBounds; /// Returns a view of the buffer with the specified range. /// /// This method is similar to Rust's slicing (`&buf[..]`), but takes /// ownership of the buffer. The range bounds are specified against /// the possibly offset beginning of the `self` view into the buffer /// and the end bound, if specified, must not exceed the view's total size. /// Note that the range may extend into the uninitialized part of the /// buffer, but it must start (if so bounded) in the initialized part /// or immediately adjacent to it. /// /// # Panics /// /// If the range is invalid with regard to the recipient's total size or /// the length of its initialized part, the implementation of this method /// should panic. /// /// # Examples /// /// ``` /// use tokio_uring::buf::BoundedBuf; /// /// let buf = b"hello world".to_vec(); /// let slice = buf.slice(5..10); /// assert_eq!(&slice[..], b" worl"); /// let slice = slice.slice(1..3); /// assert_eq!(&slice[..], b"wo"); /// ``` fn slice(self, range: impl ops::RangeBounds) -> Slice; /// Returns a `Slice` with the view's full range. /// /// This method is to be used by the `tokio-uring` runtime and it is not /// expected for users to call it directly. fn slice_full(self) -> Slice; /// Gets a reference to the underlying buffer. fn get_buf(&self) -> &Self::Buf; /// Returns the range bounds for this view. fn bounds(&self) -> Self::Bounds; /// Constructs a view from an underlying buffer and range bounds. fn from_buf_bounds(buf: Self::Buf, bounds: Self::Bounds) -> Self; /// Like [`IoBuf::stable_ptr`], /// but possibly offset to the view's starting position. fn stable_ptr(&self) -> *const u8; /// Number of initialized bytes available via this view. fn bytes_init(&self) -> usize; /// Total size of the view, including uninitialized memory, if any. fn bytes_total(&self) -> usize; } impl BoundedBuf for T { type Buf = Self; type Bounds = ops::RangeFull; fn slice(self, range: impl ops::RangeBounds) -> Slice { use ops::Bound; let begin = match range.start_bound() { Bound::Included(&n) => n, Bound::Excluded(&n) => n.checked_add(1).expect("out of range"), Bound::Unbounded => 0, }; assert!(begin < self.bytes_total()); let end = match range.end_bound() { Bound::Included(&n) => n.checked_add(1).expect("out of range"), Bound::Excluded(&n) => n, Bound::Unbounded => self.bytes_total(), }; assert!(end <= self.bytes_total()); assert!(begin <= self.bytes_init()); Slice::new(self, begin, end) } fn slice_full(self) -> Slice { let end = self.bytes_total(); Slice::new(self, 0, end) } fn get_buf(&self) -> &Self { self } fn bounds(&self) -> Self::Bounds { .. } fn from_buf_bounds(buf: Self, _: ops::RangeFull) -> Self { buf } fn stable_ptr(&self) -> *const u8 { IoBuf::stable_ptr(self) } fn bytes_init(&self) -> usize { IoBuf::bytes_init(self) } fn bytes_total(&self) -> usize { IoBuf::bytes_total(self) } } /// A possibly bounded view into an owned [`IoBufMut`] buffer. /// /// This trait provides a generic way to use mutable buffers and `Slice` views /// into such buffers with `io-uring` operations. pub trait BoundedBufMut: BoundedBuf { /// The type of the underlying buffer. type BufMut: IoBufMut; /// Like [`IoBufMut::stable_mut_ptr`], /// but possibly offset to the view's starting position. fn stable_mut_ptr(&mut self) -> *mut u8; /// Like [`IoBufMut::set_init`], /// but the position is possibly offset to the view's starting position. /// /// # Safety /// /// The caller must ensure that all bytes starting at `stable_mut_ptr()` up /// to `pos` are initialized and owned by the buffer. unsafe fn set_init(&mut self, pos: usize); /// Copies the given byte slice into the buffer, starting at /// this view's offset. /// /// # Panics /// /// If the slice's length exceeds the destination's total capacity, /// this method panics. fn put_slice(&mut self, src: &[u8]) { assert!(self.bytes_total() >= src.len()); let dst = self.stable_mut_ptr(); // Safety: // dst pointer validity is ensured by stable_mut_ptr; // the length is checked to not exceed the view's total capacity; // src (immutable) and dst (mutable) cannot point to overlapping memory; // after copying the amount of bytes given by the slice, it's safe // to mark them as initialized in the buffer. unsafe { ptr::copy_nonoverlapping(src.as_ptr(), dst, src.len()); self.set_init(src.len()); } } } impl BoundedBufMut for T { type BufMut = T; fn stable_mut_ptr(&mut self) -> *mut u8 { IoBufMut::stable_mut_ptr(self) } unsafe fn set_init(&mut self, pos: usize) { IoBufMut::set_init(self, pos) } } tokio-uring-0.5.0/src/buf/fixed/buffers.rs000064400000000000000000000014041046102023000165400ustar 00000000000000use libc::iovec; // Abstracts management of fixed buffers in a buffer registry. pub(crate) trait FixedBuffers { // Provides access to the raw buffers as a slice of iovec. fn iovecs(&self) -> &[iovec]; /// Sets the indexed buffer's state to free and records the updated length /// of its initialized part. /// /// # Panics /// /// The buffer addressed must be in the checked out state, /// otherwise this function may panic. /// /// # Safety /// /// While the implementation of this method typically does not need to /// do anything unsafe, the caller must ensure that the bytes in the buffer /// are initialized up to the specified length. unsafe fn check_in(&mut self, buf_index: u16, init_len: usize); } tokio-uring-0.5.0/src/buf/fixed/handle.rs000064400000000000000000000065641046102023000163530ustar 00000000000000use super::FixedBuffers; use crate::buf::{IoBuf, IoBufMut}; use libc::iovec; use std::cell::RefCell; use std::fmt::{self, Debug}; use std::ops::{Deref, DerefMut}; use std::rc::Rc; // Data to construct a `FixedBuf` handle from. pub(crate) struct CheckedOutBuf { // Pointer and size of the buffer. pub iovec: iovec, // Length of the initialized part. pub init_len: usize, // Buffer index. pub index: u16, } /// A unique handle to a memory buffer that can be pre-registered with /// the kernel for `io-uring` operations. /// /// `FixedBuf` handles can be obtained from a collection of fixed buffers, /// either [`FixedBufRegistry`] or [`FixedBufPool`]. /// For each buffer, only a single `FixedBuf` handle can be either used by the /// application code or owned by an I/O operation at any given time, /// thus avoiding data races between `io-uring` operations in flight and /// the application accessing buffer data. /// /// [`FixedBufRegistry`]: super::FixedBufRegistry /// [`FixedBufPool`]: super::FixedBufPool /// pub struct FixedBuf { registry: Rc>, buf: CheckedOutBuf, } impl Drop for FixedBuf { fn drop(&mut self) { let mut registry = self.registry.borrow_mut(); // Safety: the length of the initialized data in the buffer has been // maintained accordingly to the safety contracts on // Self::new and IoBufMut. unsafe { registry.check_in(self.buf.index, self.buf.init_len); } } } impl FixedBuf { // Safety: Validity constraints must apply to CheckedOutBuf members: // - the array will not be deallocated until the buffer is checked in; // - the data in the array must be initialized up to the number of bytes // given in init_len. pub(super) unsafe fn new(registry: Rc>, buf: CheckedOutBuf) -> Self { FixedBuf { registry, buf } } /// Index of the underlying registry buffer pub fn buf_index(&self) -> u16 { self.buf.index } } unsafe impl IoBuf for FixedBuf { fn stable_ptr(&self) -> *const u8 { self.buf.iovec.iov_base as _ } fn bytes_init(&self) -> usize { self.buf.init_len } fn bytes_total(&self) -> usize { self.buf.iovec.iov_len } } unsafe impl IoBufMut for FixedBuf { fn stable_mut_ptr(&mut self) -> *mut u8 { self.buf.iovec.iov_base as _ } unsafe fn set_init(&mut self, pos: usize) { if self.buf.init_len < pos { self.buf.init_len = pos } } } impl Deref for FixedBuf { type Target = [u8]; fn deref(&self) -> &[u8] { // Safety: The iovec points to a slice held in self.buffers, to which no mutable reference exists. unsafe { std::slice::from_raw_parts(self.buf.iovec.iov_base as _, self.buf.init_len) } } } impl DerefMut for FixedBuf { fn deref_mut(&mut self) -> &mut [u8] { // Safety: The iovec points to a slice held in self.buffers, to which no other reference exists. unsafe { std::slice::from_raw_parts_mut(self.buf.iovec.iov_base as _, self.buf.init_len) } } } impl Debug for FixedBuf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let buf: &[u8] = self; f.debug_struct("FixedBuf") .field("buf", &buf) // as slice .field("index", &self.buf.index) .finish_non_exhaustive() } } tokio-uring-0.5.0/src/buf/fixed/mod.rs000064400000000000000000000017511046102023000156700ustar 00000000000000//! Buffers pre-registered with the kernel. //! //! This module provides facilities for registering in-memory buffers with //! the `tokio-uring` runtime. Operations like [`File::read_fixed_at`][rfa] and //! [`File::write_fixed_at`][wfa] make use of buffers pre-mapped by //! the kernel to reduce per-I/O overhead. //! //! Two kinds of buffer collections are provided: [`FixedBufRegistry`] and //! [`FixedBufPool`], realizing two different patterns of buffer management. //! The `register` method on either of these types is used to register a //! collection of buffers with the kernel. It must be called before any of //! the [`FixedBuf`] handles to the collection's buffers can be used with //! I/O operations. //! //! [rfa]: crate::fs::File::read_fixed_at //! [wfa]: crate::fs::File::write_fixed_at mod handle; pub use handle::FixedBuf; mod buffers; pub(crate) use buffers::FixedBuffers; mod plumbing; pub mod pool; pub use pool::FixedBufPool; mod registry; pub use registry::FixedBufRegistry; tokio-uring-0.5.0/src/buf/fixed/plumbing/mod.rs000064400000000000000000000002751046102023000175050ustar 00000000000000// Internal data structures shared between thread-local and thread-safe // fixed buffer collections. mod pool; pub(super) use pool::Pool; mod registry; pub(super) use registry::Registry; tokio-uring-0.5.0/src/buf/fixed/plumbing/pool.rs000064400000000000000000000160621046102023000177000ustar 00000000000000use crate::buf::fixed::{handle::CheckedOutBuf, FixedBuffers}; use crate::buf::IoBufMut; use libc::{iovec, UIO_MAXIOV}; use tokio::sync::Notify; use std::cmp; use std::collections::HashMap; use std::mem; use std::ptr; use std::slice; use std::sync::Arc; // Internal state shared by FixedBufPool and FixedBuf handles. pub(crate) struct Pool { // Pointer to an allocated array of iovec records referencing // the allocated buffers. The number of initialized records is the // same as the length of the states array. raw_bufs: ptr::NonNull, // Original capacity of raw_bufs as a Vec. orig_cap: usize, // State information on the buffers. Indices in this array correspond to // the indices in the array at raw_bufs. states: Vec, // Table of head indices of the free buffer lists in each size bucket. free_buf_head_by_cap: HashMap, // Original buffers, kept until drop buffers: Vec, // Used to notify tasks pending on `next` notify_next_by_cap: HashMap>, } // State information of a buffer in the registry, enum BufState { // The buffer is not in use. Free { // This field records the length of the initialized part. init_len: usize, // Index of the next buffer of the same capacity in a free buffer list, if any. next: Option, }, // The buffer is checked out. // Its data are logically owned by the FixedBuf handle, // which also keeps track of the length of the initialized part. CheckedOut, } impl Pool { pub(crate) fn new(bufs: impl Iterator) -> Self { // Limit the number of buffers to the maximum allowable number. let bufs = bufs.take(cmp::min(UIO_MAXIOV as usize, u16::MAX as usize)); // Collect into `buffers`, which holds the backing buffers for // the lifetime of the pool. Using collect may allow // the compiler to apply collect in place specialization, // to avoid an allocation. let mut buffers = bufs.collect::>(); let mut iovecs = Vec::with_capacity(buffers.len()); let mut states = Vec::with_capacity(buffers.len()); let mut free_buf_head_by_cap = HashMap::new(); for (index, buf) in buffers.iter_mut().enumerate() { let cap = buf.bytes_total(); // Link the buffer as the head of the free list for its capacity. // This constructs the free buffer list to be initially retrieved // back to front, which should be of no difference to the user. let next = free_buf_head_by_cap.insert(cap, index as u16); iovecs.push(iovec { iov_base: buf.stable_mut_ptr() as *mut _, iov_len: cap, }); states.push(BufState::Free { init_len: buf.bytes_init(), next, }); } debug_assert_eq!(iovecs.len(), states.len()); debug_assert_eq!(iovecs.len(), buffers.len()); // Safety: Vec::as_mut_ptr never returns null let raw_bufs = unsafe { ptr::NonNull::new_unchecked(iovecs.as_mut_ptr()) }; let orig_cap = iovecs.capacity(); mem::forget(iovecs); Pool { raw_bufs, orig_cap, states, free_buf_head_by_cap, buffers, notify_next_by_cap: HashMap::new(), } } // If the free buffer list for this capacity is not empty, checks out the first buffer // from the list and returns its data. Otherwise, returns None. pub(crate) fn try_next(&mut self, cap: usize) -> Option { let free_head = self.free_buf_head_by_cap.get_mut(&cap)?; let index = *free_head as usize; let state = &mut self.states[index]; let (init_len, next) = match *state { BufState::Free { init_len, next } => { *state = BufState::CheckedOut; (init_len, next) } BufState::CheckedOut => panic!("buffer is checked out"), }; // Update the head of the free list for this capacity. match next { Some(i) => { *free_head = i; } None => { self.free_buf_head_by_cap.remove(&cap); } } // Safety: the allocated array under the pointer is valid // for the lifetime of self, a free buffer index is inside the array, // as also asserted by the indexing operation on the states array // that has the same length. let iovec = unsafe { self.raw_bufs.as_ptr().add(index).read() }; debug_assert_eq!(iovec.iov_len, cap); Some(CheckedOutBuf { iovec, init_len, index: index as u16, }) } // Returns a `Notify` to use for waking up tasks awaiting a buffer of // the specified capacity. pub(crate) fn notify_on_next(&mut self, cap: usize) -> Arc { let notify = self.notify_next_by_cap.entry(cap).or_default(); Arc::clone(notify) } fn check_in_internal(&mut self, index: u16, init_len: usize) { let cap = self.iovecs()[index as usize].iov_len; let state = &mut self.states[index as usize]; debug_assert!( matches!(state, BufState::CheckedOut), "the buffer must be checked out" ); // Link the buffer as the new head of the free list for its capacity. // Recently checked in buffers will be first to be reused, // improving cache locality. let next = self.free_buf_head_by_cap.insert(cap, index); *state = BufState::Free { init_len, next }; if let Some(notify) = self.notify_next_by_cap.get(&cap) { // Wake up a single task pending on `next` notify.notify_one(); } } } impl FixedBuffers for Pool { fn iovecs(&self) -> &[iovec] { // Safety: the raw_bufs pointer is valid for the lifetime of self, // the length of the states array is also the length of buffers array // by construction. unsafe { slice::from_raw_parts(self.raw_bufs.as_ptr(), self.states.len()) } } unsafe fn check_in(&mut self, index: u16, init_len: usize) { self.check_in_internal(index, init_len) } } impl Drop for Pool { fn drop(&mut self) { for (i, state) in self.states.iter().enumerate() { match state { BufState::Free { init_len, .. } => { // Update buffer initialization. // The buffer is about to dropped, but this may release it // from Registry ownership, rather than deallocate. unsafe { self.buffers[i].set_init(*init_len) }; } BufState::CheckedOut => unreachable!("all buffers must be checked in"), } } // Rebuild Vec, so it's dropped let _ = unsafe { Vec::from_raw_parts(self.raw_bufs.as_ptr(), self.states.len(), self.orig_cap) }; } } tokio-uring-0.5.0/src/buf/fixed/plumbing/registry.rs000064400000000000000000000117751046102023000206050ustar 00000000000000use crate::buf::fixed::{handle::CheckedOutBuf, FixedBuffers}; use crate::buf::IoBufMut; use libc::{iovec, UIO_MAXIOV}; use std::cmp; use std::mem; use std::ptr; use std::slice; // Internal state shared by FixedBufRegistry and FixedBuf handles. pub(crate) struct Registry { // Pointer to an allocated array of iovec records referencing // the allocated buffers. The number of initialized records is the // same as the length of the states array. raw_bufs: ptr::NonNull, // Original capacity of raw_bufs as a Vec. orig_cap: usize, // State information on the buffers. Indices in this array correspond to // the indices in the array at raw_bufs. states: Vec, // The owned buffers are kept until Drop buffers: Vec, } // State information of a buffer in the registry, enum BufState { // The buffer is not in use. // The field records the length of the initialized part. Free { init_len: usize }, // The buffer is checked out. // Its data are logically owned by the FixedBuf handle, // which also keeps track of the length of the initialized part. CheckedOut, } impl Registry { pub(crate) fn new(bufs: impl Iterator) -> Self { // Limit the number of buffers to the maximum allowable number. let bufs = bufs.take(cmp::min(UIO_MAXIOV as usize, u16::MAX as usize)); // Collect into `buffers`, which holds the backing buffers for // the lifetime of the pool. Using collect may allow // the compiler to apply collect in place specialization, // to avoid an allocation. let mut buffers = bufs.collect::>(); let mut iovecs = Vec::with_capacity(buffers.len()); let mut states = Vec::with_capacity(buffers.len()); for buf in buffers.iter_mut() { iovecs.push(iovec { iov_base: buf.stable_mut_ptr() as *mut _, iov_len: buf.bytes_total(), }); states.push(BufState::Free { init_len: buf.bytes_init(), }); } debug_assert_eq!(iovecs.len(), states.len()); debug_assert_eq!(iovecs.len(), buffers.len()); // Safety: Vec::as_mut_ptr never returns null let raw_bufs = unsafe { ptr::NonNull::new_unchecked(iovecs.as_mut_ptr()) }; let orig_cap = iovecs.capacity(); mem::forget(iovecs); Registry { raw_bufs, orig_cap, states, buffers, } } // If the indexed buffer is free, changes its state to checked out // and returns its data. // If the buffer is already checked out, returns None. pub(crate) fn check_out(&mut self, index: usize) -> Option { let state = self.states.get_mut(index)?; let BufState::Free { init_len } = *state else { return None; }; *state = BufState::CheckedOut; // Safety: the allocated array under the pointer is valid // for the lifetime of self, the index is inside the array // as checked by Vec::get_mut above, called on the array of // states that has the same length. let iovec = unsafe { self.raw_bufs.as_ptr().add(index).read() }; debug_assert!(index <= u16::MAX as usize); Some(CheckedOutBuf { iovec, init_len, index: index as u16, }) } fn check_in_internal(&mut self, index: u16, init_len: usize) { let state = self .states .get_mut(index as usize) .expect("invalid buffer index"); debug_assert!( matches!(state, BufState::CheckedOut), "the buffer must be checked out" ); *state = BufState::Free { init_len }; } } impl FixedBuffers for Registry { fn iovecs(&self) -> &[iovec] { // Safety: the raw_bufs pointer is valid for the lifetime of self, // the length of the states array is also the length of buffers array // by construction. unsafe { slice::from_raw_parts(self.raw_bufs.as_ptr(), self.states.len()) } } unsafe fn check_in(&mut self, index: u16, init_len: usize) { self.check_in_internal(index, init_len) } } impl Drop for Registry { fn drop(&mut self) { for (i, state) in self.states.iter().enumerate() { match state { BufState::Free { init_len, .. } => { // Update buffer initialization. // The buffer is about to be dropped, but this may release it // from Registry ownership, rather than deallocate. unsafe { self.buffers[i].set_init(*init_len) }; } BufState::CheckedOut => unreachable!("all buffers must be checked in"), } } // Rebuild Vec, so it's dropped let _ = unsafe { Vec::from_raw_parts(self.raw_bufs.as_ptr(), self.states.len(), self.orig_cap) }; } } tokio-uring-0.5.0/src/buf/fixed/pool.rs000064400000000000000000000273141046102023000160650ustar 00000000000000//! A dynamic collection of I/O buffers pre-registered with the kernel. //! //! This module provides [`FixedBufPool`], a collection that implements //! dynamic management of sets of interchangeable memory buffers //! registered with the kernel for `io-uring` operations. Asynchronous //! rotation of the buffers shared by multiple tasks is also supported //! by [`FixedBufPool`]. //! //! [`FixedBufPool`]: self::FixedBufPool use super::plumbing; use super::FixedBuf; use crate::buf::IoBufMut; use crate::runtime::CONTEXT; use tokio::pin; use tokio::sync::Notify; use std::cell::RefCell; use std::io; use std::rc::Rc; use std::sync::Arc; /// A dynamic collection of I/O buffers pre-registered with the kernel. /// /// `FixedBufPool` allows the application to manage a collection of buffers /// allocated in memory, that can be registered in the current `tokio-uring` /// context using the [`register`] method. Unlike [`FixedBufRegistry`], /// individual buffers are not retrieved by index; instead, an available /// buffer matching a specified capacity can be retrieved with the [`try_next`] /// method. In asynchronous contexts, the [`next`] method can be used to wait /// until such a buffer becomes available. /// This allows some flexibility in managing sets of buffers with /// different capacity tiers. The need to maintain lists of free buffers, /// however, imposes additional runtime overhead. /// /// A `FixedBufPool` value is a lightweight handle for a collection of /// allocated buffers. Cloning of a `FixedBufPool` creates a new reference to /// the same collection of buffers. /// /// The buffers of the collection are not deallocated until: /// - all `FixedBufPool` references to the collection have been dropped; /// - all [`FixedBuf`] handles to individual buffers in the collection have /// been dropped, including the buffer handles owned by any I/O operations /// in flight; /// - The `tokio-uring` [`Runtime`] the buffers are registered with /// has been dropped. /// /// [`register`]: Self::register /// [`try_next`]: Self::try_next /// [`next`]: Self::next /// [`FixedBufRegistry`]: super::FixedBufRegistry /// [`Runtime`]: crate::Runtime /// [`FixedBuf`]: super::FixedBuf /// /// # Examples /// /// ``` /// use tokio_uring::buf::fixed::FixedBufPool; /// use tokio_uring::buf::IoBuf; /// use std::iter; /// use std::mem; /// /// # #[allow(non_snake_case)] /// # fn main() -> Result<(), std::io::Error> { /// # use nix::sys::resource::{getrlimit, Resource}; /// # let (memlock_limit, _) = getrlimit(Resource::RLIMIT_MEMLOCK)?; /// # let BUF_SIZE_LARGE = memlock_limit as usize / 8; /// # let BUF_SIZE_SMALL = memlock_limit as usize / 16; /// tokio_uring::start(async { /// let pool = FixedBufPool::new( /// iter::once(Vec::with_capacity(BUF_SIZE_LARGE)) /// .chain(iter::repeat_with(|| Vec::with_capacity(BUF_SIZE_SMALL)).take(2)) /// ); /// /// pool.register()?; /// /// let buf = pool.try_next(BUF_SIZE_LARGE).unwrap(); /// assert_eq!(buf.bytes_total(), BUF_SIZE_LARGE); /// let next = pool.try_next(BUF_SIZE_LARGE); /// assert!(next.is_none()); /// let buf1 = pool.try_next(BUF_SIZE_SMALL).unwrap(); /// assert_eq!(buf1.bytes_total(), BUF_SIZE_SMALL); /// let buf2 = pool.try_next(BUF_SIZE_SMALL).unwrap(); /// assert_eq!(buf2.bytes_total(), BUF_SIZE_SMALL); /// let next = pool.try_next(BUF_SIZE_SMALL); /// assert!(next.is_none()); /// mem::drop(buf); /// let buf = pool.try_next(BUF_SIZE_LARGE).unwrap(); /// assert_eq!(buf.bytes_total(), BUF_SIZE_LARGE); /// /// Ok(()) /// }) /// # } /// ``` #[derive(Clone)] pub struct FixedBufPool { inner: Rc>>, } impl FixedBufPool { /// Creates a new collection of buffers from the provided allocated vectors. /// /// The buffers are assigned 0-based indices in the order of the iterable /// input parameter. The returned collection takes up to [`UIO_MAXIOV`] /// buffers from the input. Any items in excess of that amount are silently /// dropped, unless the input iterator produces the vectors lazily. /// /// [`UIO_MAXIOV`]: libc::UIO_MAXIOV /// /// # Examples /// /// When providing uninitialized vectors for the collection, take care to /// not replicate a vector with `.clone()` as that does not preserve the /// capacity and the resulting buffer pointer will be rejected by the kernel. /// This means that the following use of [`iter::repeat`] would not work: /// /// [`iter::repeat`]: std::iter::repeat /// /// ```should_panic /// use tokio_uring::buf::fixed::FixedBufPool; /// use std::iter; /// /// # #[allow(non_snake_case)] /// # fn main() -> Result<(), std::io::Error> { /// # use nix::sys::resource::{getrlimit, Resource}; /// # let (memlock_limit, _) = getrlimit(Resource::RLIMIT_MEMLOCK)?; /// # let NUM_BUFFERS = std::cmp::max(memlock_limit as usize / 4096 / 8, 1); /// # let BUF_SIZE = 4096; /// let pool = FixedBufPool::new( /// iter::repeat(Vec::with_capacity(BUF_SIZE)).take(NUM_BUFFERS) /// ); /// /// tokio_uring::start(async { /// pool.register()?; /// // ... /// Ok(()) /// }) /// # } /// ``` /// /// Instead, create the vectors with requested capacity directly: /// /// ``` /// use tokio_uring::buf::fixed::FixedBufPool; /// use std::iter; /// /// # #[allow(non_snake_case)] /// # fn main() -> Result<(), std::io::Error> { /// # use nix::sys::resource::{getrlimit, Resource}; /// # let (memlock_limit, _) = getrlimit(Resource::RLIMIT_MEMLOCK)?; /// # let NUM_BUFFERS = std::cmp::max(memlock_limit as usize / 4096 / 8, 1); /// # let BUF_SIZE = 4096; /// let pool = FixedBufPool::new( /// iter::repeat_with(|| Vec::with_capacity(BUF_SIZE)).take(NUM_BUFFERS) /// ); /// /// tokio_uring::start(async { /// pool.register()?; /// // ... /// Ok(()) /// }) /// # } /// ``` pub fn new(bufs: impl IntoIterator) -> Self { FixedBufPool { inner: Rc::new(RefCell::new(plumbing::Pool::new(bufs.into_iter()))), } } /// Registers the buffers with the kernel. /// /// This method must be called in the context of a `tokio-uring` runtime. /// The registration persists for the lifetime of the runtime, unless /// revoked by the [`unregister`] method. Dropping the /// `FixedBufPool` instance this method has been called on does not revoke /// the registration or deallocate the buffers. /// /// [`unregister`]: Self::unregister /// /// This call can be blocked in the kernel to complete any operations /// in-flight on the same `io-uring` instance. The application is /// recommended to register buffers before starting any I/O operations. /// /// # Errors /// /// If a collection of buffers is currently registered in the context /// of the `tokio-uring` runtime this call is made in, the function returns /// an error. pub fn register(&self) -> io::Result<()> { CONTEXT.with(|x| { x.handle() .as_ref() .expect("Not in a runtime context") .register_buffers(Rc::clone(&self.inner) as _) }) } /// Unregisters this collection of buffers. /// /// This method must be called in the context of a `tokio-uring` runtime, /// where the buffers should have been previously registered. /// /// This operation invalidates any `FixedBuf` handles checked out from /// this registry instance. Continued use of such handles in I/O /// operations may result in an error. /// /// # Errors /// /// If another collection of buffers is currently registered in the context /// of the `tokio-uring` runtime this call is made in, the function returns /// an error. Calling `unregister` when no `FixedBufPool` is currently /// registered on this runtime also returns an error. pub fn unregister(&self) -> io::Result<()> { CONTEXT.with(|x| { x.handle() .as_ref() .expect("Not in a runtime context") .unregister_buffers(Rc::clone(&self.inner) as _) }) } /// Returns a buffer of requested capacity from this pool /// that is not currently owned by any other [`FixedBuf`] handle. /// If no such free buffer is available, returns `None`. /// /// The buffer is released to be available again once the /// returned `FixedBuf` handle has been dropped. An I/O operation /// using the buffer takes ownership of it and returns it once completed, /// preventing shared use of the buffer while the operation is in flight. /// /// An application should not rely on any particular order /// in which available buffers are retrieved. pub fn try_next(&self, cap: usize) -> Option { let mut inner = self.inner.borrow_mut(); inner.try_next(cap).map(|data| { let pool = Rc::clone(&self.inner); // Safety: the validity of buffer data is ensured by // plumbing::Pool::try_next unsafe { FixedBuf::new(pool, data) } }) } /// Resolves to a buffer of requested capacity /// when it is or becomes available in this pool. /// This may happen when a [`FixedBuf`] handle owning a buffer /// of the same capacity is dropped. /// /// If no matching buffers are available and none are being released, /// this asynchronous function will never resolve. Applications should take /// care to wait on the returned future concurrently with some tasks that /// will complete I/O operations owning the buffers, or back it up with a /// timeout using, for example, `tokio::util::timeout`. pub async fn next(&self, cap: usize) -> FixedBuf { // Fast path: get the buffer if it's already available let notify = { let mut inner = self.inner.borrow_mut(); if let Some(data) = inner.try_next(cap) { // Safety: the validity of buffer data is ensured by // plumbing::Pool::try_next let buf = unsafe { FixedBuf::new(Rc::clone(&self.inner) as _, data) }; return buf; } inner.notify_on_next(cap) }; // Poll for a buffer, engaging the `Notify` machinery. self.next_when_notified(cap, notify).await } #[cold] async fn next_when_notified(&self, cap: usize, notify: Arc) -> FixedBuf { let notified = notify.notified(); pin!(notified); loop { // In the single-threaded case, no buffers could get checked in // between us calling `try_next` and here, so we can't miss a wakeup. notified.as_mut().await; if let Some(data) = self.inner.borrow_mut().try_next(cap) { // Safety: the validity of buffer data is ensured by // plumbing::Pool::try_next let buf = unsafe { FixedBuf::new(Rc::clone(&self.inner) as _, data) }; return buf; } // It's possible that the task did not get a buffer from `try_next`. // The `Notify` entries are created once for each requested capacity // and never removed, so this `Notify` could have been holding // a permit from a buffer checked in previously when no tasks were // waiting. Then a task would call `next` on this pool and receive // the buffer without consuming the permit. It's also possible that // a task calls `try_next` directly. // Reset the `Notified` future to wait for another wakeup. notified.set(notify.notified()); } } } tokio-uring-0.5.0/src/buf/fixed/registry.rs000064400000000000000000000153241046102023000167620ustar 00000000000000use super::plumbing; use super::FixedBuf; use crate::buf::IoBufMut; use crate::runtime::CONTEXT; use std::cell::RefCell; use std::io; use std::rc::Rc; /// An indexed collection of I/O buffers pre-registered with the kernel. /// /// `FixedBufRegistry` allows the application to manage a collection of buffers /// allocated in memory, that can be registered in the current `tokio-uring` /// context using the [`register`] method. The buffers are accessed by their /// indices using the [`check_out`] method. /// /// A `FixedBufRegistry` value is a lightweight handle for a collection of /// allocated buffers. Cloning of a `FixedBufRegistry` creates a new reference to /// the same collection of buffers. /// /// The buffers of the collection are not deallocated until: /// - all `FixedBufRegistry` references to the collection have been dropped; /// - all [`FixedBuf`] handles to individual buffers in the collection have /// been dropped, including the buffer handles owned by any I/O operations /// in flight; /// - The `tokio-uring` [`Runtime`] the buffers are registered with /// has been dropped. /// /// [`register`]: Self::register /// [`check_out`]: Self::check_out /// [`Runtime`]: crate::Runtime #[derive(Clone)] pub struct FixedBufRegistry { inner: Rc>>, } impl FixedBufRegistry { /// Creates a new collection of buffers from the provided allocated vectors. /// /// The buffers are assigned 0-based indices in the order of the iterable /// input parameter. The returned collection takes up to [`UIO_MAXIOV`] /// buffers from the input. Any items in excess of that amount are silently /// dropped, unless the input iterator produces the vectors lazily. /// /// [`UIO_MAXIOV`]: libc::UIO_MAXIOV /// /// # Examples /// /// When providing uninitialized vectors for the collection, take care to /// not replicate a vector with `.clone()` as that does not preserve the /// capacity and the resulting buffer pointer will be rejected by the kernel. /// This means that the following use of [`iter::repeat`] would not work: /// /// [`iter::repeat`]: std::iter::repeat /// /// ```should_panic /// use tokio_uring::buf::fixed::FixedBufRegistry; /// use std::iter; /// /// # #[allow(non_snake_case)] /// # fn main() -> Result<(), std::io::Error> { /// # use nix::sys::resource::{getrlimit, Resource}; /// # let (memlock_limit, _) = getrlimit(Resource::RLIMIT_MEMLOCK)?; /// # let NUM_BUFFERS = std::cmp::max(memlock_limit as usize / 4096 / 8, 1); /// # let BUF_SIZE = 4096; /// let registry = FixedBufRegistry::new( /// iter::repeat(Vec::with_capacity(BUF_SIZE)).take(NUM_BUFFERS) /// ); /// /// tokio_uring::start(async { /// registry.register()?; /// // ... /// Ok(()) /// }) /// # } /// ``` /// /// Instead, create the vectors with requested capacity directly: /// /// ``` /// use tokio_uring::buf::fixed::FixedBufRegistry; /// use std::iter; /// /// # #[allow(non_snake_case)] /// # fn main() -> Result<(), std::io::Error> { /// # use nix::sys::resource::{getrlimit, Resource}; /// # let (memlock_limit, _) = getrlimit(Resource::RLIMIT_MEMLOCK)?; /// # let NUM_BUFFERS = std::cmp::max(memlock_limit as usize / 4096 / 8, 1); /// # let BUF_SIZE = 4096; /// let registry = FixedBufRegistry::new( /// iter::repeat_with(|| Vec::with_capacity(BUF_SIZE)).take(NUM_BUFFERS) /// ); /// /// tokio_uring::start(async { /// registry.register()?; /// // ... /// Ok(()) /// }) /// # } /// ``` pub fn new(bufs: impl IntoIterator) -> Self { FixedBufRegistry { inner: Rc::new(RefCell::new(plumbing::Registry::new(bufs.into_iter()))), } } /// Registers the buffers with the kernel. /// /// This method must be called in the context of a `tokio-uring` runtime. /// The registration persists for the lifetime of the runtime, unless /// revoked by the [`unregister`] method. Dropping the /// `FixedBufRegistry` instance this method has been called on does not revoke /// the registration or deallocate the buffers. /// /// [`unregister`]: Self::unregister /// /// This call can be blocked in the kernel to complete any operations /// in-flight on the same `io-uring` instance. The application is /// recommended to register buffers before starting any I/O operations. /// /// # Errors /// /// If a collection of buffers is currently registered in the context /// of the `tokio-uring` runtime this call is made in, the function returns /// an error. pub fn register(&self) -> io::Result<()> { CONTEXT.with(|x| { x.handle() .as_ref() .expect("Not in a runtime context") .register_buffers(Rc::clone(&self.inner) as _) }) } /// Unregisters this collection of buffers. /// /// This method must be called in the context of a `tokio-uring` runtime, /// where the buffers should have been previously registered. /// /// This operation invalidates any `FixedBuf` handles checked out from /// this registry instance. Continued use of such handles in I/O /// operations may result in an error. /// /// # Errors /// /// If another collection of buffers is currently registered in the context /// of the `tokio-uring` runtime this call is made in, the function returns /// an error. Calling `unregister` when no `FixedBufRegistry` is currently /// registered on this runtime also returns an error. pub fn unregister(&self) -> io::Result<()> { CONTEXT.with(|x| { x.handle() .as_ref() .expect("Not in a runtime context") .unregister_buffers(Rc::clone(&self.inner) as _) }) } /// Returns a buffer identified by the specified index for use by the /// application, unless the buffer is already in use. /// /// The buffer is released to be available again once the /// returned `FixedBuf` handle has been dropped. An I/O operation /// using the buffer takes ownership of it and returns it once completed, /// preventing shared use of the buffer while the operation is in flight. pub fn check_out(&self, index: usize) -> Option { let mut inner = self.inner.borrow_mut(); inner.check_out(index).map(|data| { let registry = Rc::clone(&self.inner); // Safety: the validity of buffer data is ensured by // plumbing::Registry::check_out unsafe { FixedBuf::new(registry, data) } }) } } tokio-uring-0.5.0/src/buf/io_buf.rs000064400000000000000000000054201046102023000152520ustar 00000000000000/// An `io-uring` compatible buffer. /// /// The `IoBuf` trait is implemented by buffer types that can be used with /// io-uring operations. Users will not need to use this trait directly. /// The [`BoundedBuf`] trait provides some useful methods including `slice`. /// /// # Safety /// /// Buffers passed to `io-uring` operations must reference a stable memory /// region. While the runtime holds ownership to a buffer, the pointer returned /// by `stable_ptr` must remain valid even if the `IoBuf` value is moved. /// /// [`BoundedBuf`]: crate::buf::BoundedBuf pub unsafe trait IoBuf: Unpin + 'static { /// Returns a raw pointer to the vector’s buffer. /// /// This method is to be used by the `tokio-uring` runtime and it is not /// expected for users to call it directly. /// /// The implementation must ensure that, while the `tokio-uring` runtime /// owns the value, the pointer returned by `stable_ptr` **does not** /// change. fn stable_ptr(&self) -> *const u8; /// Number of initialized bytes. /// /// This method is to be used by the `tokio-uring` runtime and it is not /// expected for users to call it directly. /// /// For `Vec`, this is identical to `len()`. fn bytes_init(&self) -> usize; /// Total size of the buffer, including uninitialized memory, if any. /// /// This method is to be used by the `tokio-uring` runtime and it is not /// expected for users to call it directly. /// /// For `Vec`, this is identical to `capacity()`. fn bytes_total(&self) -> usize; } unsafe impl IoBuf for Vec { fn stable_ptr(&self) -> *const u8 { self.as_ptr() } fn bytes_init(&self) -> usize { self.len() } fn bytes_total(&self) -> usize { self.capacity() } } unsafe impl IoBuf for &'static [u8] { fn stable_ptr(&self) -> *const u8 { self.as_ptr() } fn bytes_init(&self) -> usize { <[u8]>::len(self) } fn bytes_total(&self) -> usize { self.bytes_init() } } unsafe impl IoBuf for &'static str { fn stable_ptr(&self) -> *const u8 { self.as_ptr() } fn bytes_init(&self) -> usize { ::len(self) } fn bytes_total(&self) -> usize { self.bytes_init() } } #[cfg(feature = "bytes")] unsafe impl IoBuf for bytes::Bytes { fn stable_ptr(&self) -> *const u8 { self.as_ptr() } fn bytes_init(&self) -> usize { self.len() } fn bytes_total(&self) -> usize { self.len() } } #[cfg(feature = "bytes")] unsafe impl IoBuf for bytes::BytesMut { fn stable_ptr(&self) -> *const u8 { self.as_ptr() } fn bytes_init(&self) -> usize { self.len() } fn bytes_total(&self) -> usize { self.capacity() } } tokio-uring-0.5.0/src/buf/io_buf_mut.rs000064400000000000000000000036171046102023000161450ustar 00000000000000use crate::buf::IoBuf; /// A mutable`io-uring` compatible buffer. /// /// The `IoBufMut` trait is implemented by buffer types that can be used with /// io-uring operations. Users will not need to use this trait directly. /// /// # Safety /// /// Buffers passed to `io-uring` operations must reference a stable memory /// region. While the runtime holds ownership to a buffer, the pointer returned /// by `stable_mut_ptr` must remain valid even if the `IoBufMut` value is moved. pub unsafe trait IoBufMut: IoBuf { /// Returns a raw mutable pointer to the vector’s buffer. /// /// This method is to be used by the `tokio-uring` runtime and it is not /// expected for users to call it directly. /// /// The implementation must ensure that, while the `tokio-uring` runtime /// owns the value, the pointer returned by `stable_mut_ptr` **does not** /// change. fn stable_mut_ptr(&mut self) -> *mut u8; /// Updates the number of initialized bytes. /// /// If the specified `pos` is greater than the value returned by /// [`IoBuf::bytes_init`], it becomes the new water mark as returned by /// `IoBuf::bytes_init`. /// /// # Safety /// /// The caller must ensure that all bytes starting at `stable_mut_ptr()` up /// to `pos` are initialized and owned by the buffer. unsafe fn set_init(&mut self, pos: usize); } unsafe impl IoBufMut for Vec { fn stable_mut_ptr(&mut self) -> *mut u8 { self.as_mut_ptr() } unsafe fn set_init(&mut self, init_len: usize) { if self.len() < init_len { self.set_len(init_len); } } } #[cfg(feature = "bytes")] unsafe impl IoBufMut for bytes::BytesMut { fn stable_mut_ptr(&mut self) -> *mut u8 { self.as_mut_ptr() } unsafe fn set_init(&mut self, init_len: usize) { if self.len() < init_len { self.set_len(init_len); } } } tokio-uring-0.5.0/src/buf/mod.rs000064400000000000000000000016541046102023000145730ustar 00000000000000//! Utilities for working with buffers. //! //! `io-uring` APIs require passing ownership of buffers to the runtime. The //! crate defines [`IoBuf`] and [`IoBufMut`] traits which are implemented by buffer //! types that respect the `io-uring` contract. pub mod fixed; mod io_buf; pub use io_buf::IoBuf; mod io_buf_mut; pub use io_buf_mut::IoBufMut; mod slice; pub use slice::Slice; mod bounded; pub use bounded::{BoundedBuf, BoundedBufMut}; pub(crate) fn deref(buf: &impl IoBuf) -> &[u8] { // Safety: the `IoBuf` trait is marked as unsafe and is expected to be // implemented correctly. unsafe { std::slice::from_raw_parts(buf.stable_ptr(), buf.bytes_init()) } } pub(crate) fn deref_mut(buf: &mut impl IoBufMut) -> &mut [u8] { // Safety: the `IoBufMut` trait is marked as unsafe and is expected to be // implemented correct. unsafe { std::slice::from_raw_parts_mut(buf.stable_mut_ptr(), buf.bytes_init()) } } tokio-uring-0.5.0/src/buf/slice.rs000064400000000000000000000125231046102023000151100ustar 00000000000000use super::{BoundedBuf, BoundedBufMut, IoBuf, IoBufMut}; use std::cmp; use std::ops; /// An owned view into a contiguous sequence of bytes. /// /// This is similar to Rust slices (`&buf[..]`) but owns the underlying buffer. /// This type is useful for performing io-uring read and write operations using /// a subset of a buffer. /// /// Slices are created using [`BoundedBuf::slice`]. /// /// # Examples /// /// Creating a slice /// /// ``` /// use tokio_uring::buf::BoundedBuf; /// /// let buf = b"hello world".to_vec(); /// let slice = buf.slice(..5); /// /// assert_eq!(&slice[..], b"hello"); /// ``` pub struct Slice { buf: T, begin: usize, end: usize, } impl Slice { pub(crate) fn new(buf: T, begin: usize, end: usize) -> Slice { Slice { buf, begin, end } } /// Offset in the underlying buffer at which this slice starts. /// /// # Examples /// /// ``` /// use tokio_uring::buf::BoundedBuf; /// /// let buf = b"hello world".to_vec(); /// let slice = buf.slice(1..5); /// /// assert_eq!(1, slice.begin()); /// ``` pub fn begin(&self) -> usize { self.begin } /// Ofset in the underlying buffer at which this slice ends. /// /// # Examples /// /// ``` /// use tokio_uring::buf::BoundedBuf; /// /// let buf = b"hello world".to_vec(); /// let slice = buf.slice(1..5); /// /// assert_eq!(5, slice.end()); /// ``` pub fn end(&self) -> usize { self.end } /// Gets a reference to the underlying buffer. /// /// This method escapes the slice's view. /// /// # Examples /// /// ``` /// use tokio_uring::buf::BoundedBuf; /// /// let buf = b"hello world".to_vec(); /// let slice = buf.slice(..5); /// /// assert_eq!(slice.get_ref(), b"hello world"); /// assert_eq!(&slice[..], b"hello"); /// ``` pub fn get_ref(&self) -> &T { &self.buf } /// Gets a mutable reference to the underlying buffer. /// /// This method escapes the slice's view. /// /// # Examples /// /// ``` /// use tokio_uring::buf::BoundedBuf; /// /// let buf = b"hello world".to_vec(); /// let mut slice = buf.slice(..5); /// /// slice.get_mut()[0] = b'b'; /// /// assert_eq!(slice.get_mut(), b"bello world"); /// assert_eq!(&slice[..], b"bello"); /// ``` pub fn get_mut(&mut self) -> &mut T { &mut self.buf } /// Unwraps this `Slice`, returning the underlying buffer. /// /// # Examples /// /// ``` /// use tokio_uring::buf::BoundedBuf; /// /// let buf = b"hello world".to_vec(); /// let slice = buf.slice(..5); /// /// let buf = slice.into_inner(); /// assert_eq!(buf, b"hello world"); /// ``` pub fn into_inner(self) -> T { self.buf } } impl ops::Deref for Slice { type Target = [u8]; fn deref(&self) -> &[u8] { let buf_bytes = super::deref(&self.buf); let end = cmp::min(self.end, buf_bytes.len()); &buf_bytes[self.begin..end] } } impl ops::DerefMut for Slice { fn deref_mut(&mut self) -> &mut [u8] { let buf_bytes = super::deref_mut(&mut self.buf); let end = cmp::min(self.end, buf_bytes.len()); &mut buf_bytes[self.begin..end] } } impl BoundedBuf for Slice { type Buf = T; type Bounds = ops::Range; fn slice(self, range: impl ops::RangeBounds) -> Slice { use ops::Bound; let begin = match range.start_bound() { Bound::Included(&n) => self.begin.checked_add(n).expect("out of range"), Bound::Excluded(&n) => self .begin .checked_add(n) .and_then(|x| x.checked_add(1)) .expect("out of range"), Bound::Unbounded => self.begin, }; assert!(begin <= self.end); let end = match range.end_bound() { Bound::Included(&n) => self .begin .checked_add(n) .and_then(|x| x.checked_add(1)) .expect("out of range"), Bound::Excluded(&n) => self.begin.checked_add(n).expect("out of range"), Bound::Unbounded => self.end, }; assert!(end <= self.end); assert!(begin <= self.buf.bytes_init()); Slice::new(self.buf, begin, end) } fn slice_full(self) -> Slice { self } fn get_buf(&self) -> &T { &self.buf } fn bounds(&self) -> Self::Bounds { self.begin..self.end } fn from_buf_bounds(buf: T, bounds: Self::Bounds) -> Self { assert!(bounds.start <= buf.bytes_init()); assert!(bounds.end <= buf.bytes_total()); Slice::new(buf, bounds.start, bounds.end) } fn stable_ptr(&self) -> *const u8 { super::deref(&self.buf)[self.begin..].as_ptr() } fn bytes_init(&self) -> usize { ops::Deref::deref(self).len() } fn bytes_total(&self) -> usize { self.end - self.begin } } impl BoundedBufMut for Slice { type BufMut = T; fn stable_mut_ptr(&mut self) -> *mut u8 { super::deref_mut(&mut self.buf)[self.begin..].as_mut_ptr() } unsafe fn set_init(&mut self, pos: usize) { self.buf.set_init(self.begin + pos); } } tokio-uring-0.5.0/src/fs/create_dir_all.rs000064400000000000000000000141561046102023000166020ustar 00000000000000use futures_util::future::LocalBoxFuture; use std::io; use std::path::Path; /// Recursively create a directory and all of its parent components if they are missing. /// /// # Examples /// /// ```no_run /// tokio_uring::start(async { /// tokio_uring::fs::create_dir_all("/some/dir").await.unwrap(); /// }); /// ``` pub async fn create_dir_all>(path: P) -> io::Result<()> { DirBuilder::new() .recursive(true) .create(path.as_ref()) .await } /// A builder used to create directories in various manners, based on uring async operations. /// /// This builder supports the Linux specific option `mode` and may support `at` in the future. #[derive(Debug)] pub struct DirBuilder { inner: fs_imp::DirBuilder, recursive: bool, } impl Default for DirBuilder { fn default() -> Self { Self::new() } } impl DirBuilder { /// Creates a new set of options with default mode/security settings for all /// platforms and also non-recursive. /// /// # Examples /// /// ``` /// let builder = tokio_uring::fs::DirBuilder::new(); /// ``` #[must_use] pub fn new() -> DirBuilder { DirBuilder { inner: fs_imp::DirBuilder::new(), recursive: false, } } /// Indicates that directories should be created recursively, creating all /// parent directories. Parents that do not exist are created with the same /// security and permissions settings. /// /// This option defaults to `false`. /// /// # Examples /// /// ``` /// let mut builder = tokio_uring::fs::DirBuilder::new(); /// builder.recursive(true); /// ``` #[must_use] pub fn recursive(&mut self, recursive: bool) -> &mut Self { self.recursive = recursive; self } /// Sets the mode to create new directories with. This option defaults to 0o777. /// /// This option defaults to 0o777. /// /// # Examples /// /// ``` /// let mut builder = tokio_uring::fs::DirBuilder::new(); /// builder.mode(0o700); /// ``` #[must_use] pub fn mode(&mut self, mode: u32) -> &mut Self { self.inner.set_mode(mode); self } /// Creates the specified directory with the options configured in this /// builder. /// /// It is considered an error if the directory already exists unless /// recursive mode is enabled. /// /// # Examples /// /// ```no_run /// tokio_uring::start(async { /// let path = "/tmp/foo/bar/baz"; /// tokio_uring::fs::DirBuilder::new() /// .recursive(true) /// .mode(0o700) // user-only mode: drwx------ /// .create(path).await.unwrap(); /// /// // TODO change with tokio_uring version /// assert!(std::fs::metadata(path).unwrap().is_dir()); /// }) /// ``` pub async fn create>(&self, path: P) -> io::Result<()> { self._create(path.as_ref()).await } async fn _create(&self, path: &Path) -> io::Result<()> { if self.recursive { self.recurse_create_dir_all(path).await } else { self.inner.mkdir(path).await } } // This recursive function is very closely modeled after the std library version. // // A recursive async function requires a Boxed Future. TODO There may be an implementation that // is less costly in terms of heap allocations. Maybe a non-recursive version is possible given // we even know the path separator for Linux. Or maybe expand the first level to avoid // recursion when only the first level of the directory needs to be built. For now, this serves // its purpose. fn recurse_create_dir_all<'a>(&'a self, path: &'a Path) -> LocalBoxFuture> { Box::pin(async move { if path == Path::new("") { return Ok(()); } match self.inner.mkdir(path).await { Ok(()) => return Ok(()), Err(ref e) if e.kind() == io::ErrorKind::NotFound => {} Err(_) if is_dir(path).await => return Ok(()), Err(e) => return Err(e), } match path.parent() { Some(p) => self.recurse_create_dir_all(p).await?, None => { return Err(std::io::Error::new( std::io::ErrorKind::Other, "failed to create whole tree", )); /* TODO build own allocation free error some day like the std library does. return Err(io::const_io_error!( io::ErrorKind::Uncategorized, "failed to create whole tree", )); */ } } match self.inner.mkdir(path).await { Ok(()) => Ok(()), Err(_) if is_dir(path).await => Ok(()), Err(e) => Err(e), } }) } } // TODO this DirBuilder and this fs_imp module is modeled after the std library's. Here there is // only Linux supported so is it worth to continue this separation? mod fs_imp { use crate::runtime::driver::op::Op; use libc::mode_t; use std::path::Path; #[derive(Debug)] pub struct DirBuilder { mode: mode_t, } impl DirBuilder { pub fn new() -> DirBuilder { DirBuilder { mode: 0o777 } } pub async fn mkdir(&self, p: &Path) -> std::io::Result<()> { Op::make_dir(p, self.mode)?.await } pub fn set_mode(&mut self, mode: u32) { self.mode = mode as mode_t; } } } // Returns true if the path represents a directory. // // Uses one asynchronous uring call to determine this. async fn is_dir>(path: P) -> bool { let mut builder = crate::fs::StatxBuilder::new(); if builder.mask(libc::STATX_TYPE).pathname(path).is_err() { return false; } let res = builder.statx().await; match res { Ok(statx) => (u32::from(statx.stx_mode) & libc::S_IFMT) == libc::S_IFDIR, Err(_) => false, } } tokio-uring-0.5.0/src/fs/directory.rs000064400000000000000000000046061046102023000156540ustar 00000000000000use crate::runtime::driver::op::Op; use std::io; use std::path::Path; /// Creates a directory on the local filesystem. /// /// # Errors /// /// This function will return an error in the following situations, but is not /// limited to just these cases: /// /// * User lacks permissions to create a directory at `path` /// * [`io::ErrorKind`] would be set to `PermissionDenied` /// * A parent of the given path doesn't exist. /// * [`io::ErrorKind`] would be set to `NotFound` or `NotADirectory` /// * `path` already exists. /// * [`io::ErrorKind`] would be set to `AlreadyExists` /// /// [`ErrorKind`]: std::io::ErrorKind /// # Examples /// /// ```no_run /// use tokio_uring::fs::create_dir; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// create_dir("/some/dir").await?; /// Ok::<(), std::io::Error>(()) /// })?; /// Ok(()) /// } /// ``` pub async fn create_dir>(path: P) -> io::Result<()> { Op::make_dir(path.as_ref(), 0o777)?.await } /// Removes a directory on the local filesystem. /// /// This will only remove empty directories with no children. If you want to destroy the entire /// contents of a directory, you may try [`remove_dir_all`] which uses the standard Tokio executor. /// There currently is no implementation of `remove_dir_all` in tokio-uring. /// /// [`remove_dir_all`]: https://docs.rs/tokio/latest/tokio/fs/fn.remove_dir_all.html /// /// # Errors /// /// This function will return an error in the following situations, but is not /// limited to just these cases: /// /// * `path` doesn't exist. /// * [`io::ErrorKind`] would be set to `NotFound` /// * `path` isn't a directory. /// * [`io::ErrorKind`] would be set to `NotADirectory` /// * The user lacks permissions to modify/remove the directory at the provided `path`. /// * [`io::ErrorKind`] would be set to `PermissionDenied` /// * The directory isn't empty. /// * [`io::ErrorKind`] would be set to `DirectoryNotEmpty` /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::remove_dir; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// remove_dir("/some/dir").await?; /// Ok::<(), std::io::Error>(()) /// })?; /// Ok(()) /// } /// ``` pub async fn remove_dir>(path: P) -> io::Result<()> { Op::unlink_dir(path.as_ref())?.await } tokio-uring-0.5.0/src/fs/file.rs000064400000000000000000001014061046102023000145630ustar 00000000000000use crate::buf::fixed::FixedBuf; use crate::buf::{BoundedBuf, BoundedBufMut, IoBuf, IoBufMut, Slice}; use crate::fs::OpenOptions; use crate::io::SharedFd; use crate::runtime::driver::op::Op; use crate::{UnsubmittedOneshot, UnsubmittedWrite}; use std::fmt; use std::io; use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; use std::path::Path; /// A reference to an open file on the filesystem. /// /// An instance of a `File` can be read and/or written depending on what options /// it was opened with. The `File` type provides **positional** read and write /// operations. The file does not maintain an internal cursor. The caller is /// required to specify an offset when issuing an operation. /// /// While files are automatically closed when they go out of scope, the /// operation happens asynchronously in the background. It is recommended to /// call the `close()` function in order to guarantee that the file successfully /// closed before exiting the scope. Closing a file does not guarantee writes /// have persisted to disk. Use [`sync_all`] to ensure all writes have reached /// the filesystem. /// /// [`sync_all`]: File::sync_all /// /// # Examples /// /// Creates a new file and write data to it: /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// // Open a file /// let file = File::create("hello.txt").await?; /// /// // Write some data /// let (res, buf) = file.write_at(&b"hello world"[..], 0).submit().await; /// let n = res?; /// /// println!("wrote {} bytes", n); /// /// // Sync data to the file system. /// file.sync_all().await?; /// /// // Close the file /// file.close().await?; /// /// Ok(()) /// }) /// } /// ``` pub struct File { /// Open file descriptor pub(crate) fd: SharedFd, } impl File { /// Attempts to open a file in read-only mode. /// /// See the [`OpenOptions::open`] method for more details. /// /// # Errors /// /// This function will return an error if `path` does not already exist. /// Other errors may also be returned according to [`OpenOptions::open`]. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let f = File::open("foo.txt").await?; /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) /// } /// ``` pub async fn open(path: impl AsRef) -> io::Result { OpenOptions::new().read(true).open(path).await } /// Opens a file in write-only mode. /// /// This function will create a file if it does not exist, /// and will truncate it if it does. /// /// See the [`OpenOptions::open`] function for more details. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let f = File::create("foo.txt").await?; /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) /// } /// ``` pub async fn create(path: impl AsRef) -> io::Result { OpenOptions::new() .write(true) .create(true) .truncate(true) .open(path) .await } pub(crate) fn from_shared_fd(fd: SharedFd) -> File { File { fd } } /// Converts a [`std::fs::File`][std] to a [`tokio_uring::fs::File`][file]. /// /// [std]: std::fs::File /// [file]: File pub fn from_std(file: std::fs::File) -> File { File::from_shared_fd(SharedFd::new(file.into_raw_fd())) } /// Read some bytes at the specified offset from the file into the specified /// buffer, returning how many bytes were read. /// /// # Return /// /// The method returns the operation result and the same buffer value passed /// as an argument. /// /// If the method returns [`Ok(n)`], then the read was successful. A nonzero /// `n` value indicates that the buffer has been filled with `n` bytes of /// data from the file. If `n` is `0`, then one of the following happened: /// /// 1. The specified offset is the end of the file. /// 2. The buffer specified was 0 bytes in length. /// /// It is not an error if the returned value `n` is smaller than the buffer /// size, even when the file contains enough data to fill the buffer. /// /// # Errors /// /// If this function encounters any form of I/O or other error, an error /// variant will be returned. The buffer is returned on error. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let f = File::open("foo.txt").await?; /// let buffer = vec![0; 10]; /// /// // Read up to 10 bytes /// let (res, buffer) = f.read_at(buffer, 0).await; /// let n = res?; /// /// println!("The bytes: {:?}", &buffer[..n]); /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) /// } /// ``` pub async fn read_at(&self, buf: T, pos: u64) -> crate::BufResult { // Submit the read operation let op = Op::read_at(&self.fd, buf, pos).unwrap(); op.await } /// Read some bytes at the specified offset from the file into the specified /// array of buffers, returning how many bytes were read. /// /// # Return /// /// The method returns the operation result and the same array of buffers /// passed as an argument. /// /// If the method returns [`Ok(n)`], then the read was successful. A nonzero /// `n` value indicates that the buffers have been filled with `n` bytes of /// data from the file. If `n` is `0`, then one of the following happened: /// /// 1. The specified offset is the end of the file. /// 2. The buffers specified were 0 bytes in length. /// /// It is not an error if the returned value `n` is smaller than the buffer /// size, even when the file contains enough data to fill the buffer. /// /// # Errors /// /// If this function encounters any form of I/O or other error, an error /// variant will be returned. The buffer is returned on error. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let f = File::open("foo.txt").await?; /// let buffers = vec![Vec::::with_capacity(10), Vec::::with_capacity(10)]; /// /// // Read up to 20 bytes /// let (res, buffer) = f.readv_at(buffers, 0).await; /// let n = res?; /// /// println!("Read {} bytes", n); /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) /// } /// ``` pub async fn readv_at( &self, bufs: Vec, pos: u64, ) -> crate::BufResult> { // Submit the read operation let op = Op::readv_at(&self.fd, bufs, pos).unwrap(); op.await } /// Write data from buffers into this file at the specified offset, /// returning how many bytes were written. /// /// This function will attempt to write the entire contents of `bufs`, but /// the entire write may not succeed, or the write may also generate an /// error. The bytes will be written starting at the specified offset. /// /// # Return /// /// The method returns the operation result and the same array of buffers passed /// in as an argument. A return value of `0` typically means that the /// underlying file is no longer able to accept bytes and will likely not be /// able to in the future as well, or that the buffer provided is empty. /// /// # Errors /// /// Each call to `write` may generate an I/O error indicating that the /// operation could not be completed. If an error is returned then no bytes /// in the buffer were written to this writer. /// /// It is **not** considered an error if the entire buffer could not be /// written to this writer. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = File::create("foo.txt").await?; /// /// // Writes some prefix of the byte string, not necessarily all of it. /// let bufs = vec!["some".to_owned().into_bytes(), " bytes".to_owned().into_bytes()]; /// let (res, _) = file.writev_at(bufs, 0).await; /// let n = res?; /// /// println!("wrote {} bytes", n); /// /// // Close the file /// file.close().await?; /// Ok(()) /// }) /// } /// ``` /// /// [`Ok(n)`]: Ok pub async fn writev_at( &self, buf: Vec, pos: u64, ) -> crate::BufResult> { let op = Op::writev_at(&self.fd, buf, pos).unwrap(); op.await } /// Like `writev_at` but will call the `io_uring` `writev` operation multiple times if /// necessary. /// /// Parameter `pos` is an `Option` to allow this function to be used for both files that /// are seekable and those that are not. The caller is responsible for knowing this. /// /// When `None` is supplied, the offset passed to the `io_uring` call will always be zero, even /// if multiple writev calls are necessary; only the iovec information would be adjusted /// between calls. A Unix pipe would fall into this category. /// /// When `Some(n)` is suppied, the offset passed to the writev call will be incremented by the /// progress of prior writev calls. A file system's regular file would fall into this category. /// /// If the caller passes `Some(n)` for a file that is not seekable, the `io_uring` `writev` /// operation will return an error once n is not zero. /// /// If the caller passes `None`, when the file *is* seekable, when multiple `writev` calls are /// required to complete the writing of all the bytes, the bytes at position 0 of the file will /// have been overwritten one or more times with incorrect data. This is true just as if the /// caller had invoked seperate write calls to a file, all with position 0, when in fact the /// file was seekable. /// /// Performance considerations: /// /// The user may want to check that this function is necessary in their use case or performs /// better than a series of write_all operations would. There is overhead either way and it is /// not clear which should be faster or give better throughput. /// /// This function causes the temporary allocation of a Vec one time to hold the array of iovec /// that is passed to the kernel. The same array is used for any subsequent calls to get all /// the bytes written. Whereas individual calls to write_all do not require the Vec to be /// allocated, they do each incur the normal overhead of setting up the submission and /// completion structures and going through the future poll mechanism. /// /// TODO decide, would a separate `writev_all` function for `file` that did not take a `pos` /// make things less ambiguous? /// /// TODO more complete documentation here. /// TODO define writev_all functions for net/unix/stream, net/tcp/stream, io/socket. /// TODO remove usize from result, to be consistent with other write_all_vectored functions. /// TODO find a way to test this with some stress to the file so the writev calls don't all /// succeed on their first try. /// TODO consider replacing the current `write_all` and `write_all_at` functions with a similar /// mechanism so all the write-all logic is in one place, in the io/write_all.rs file. pub async fn writev_at_all( &self, buf: Vec, pos: Option, // Use None for files that can't seek ) -> crate::BufResult> { let op = crate::io::writev_at_all(&self.fd, buf, pos); op.await } /// Read the exact number of bytes required to fill `buf` at the specified /// offset from the file. /// /// This function reads as many as bytes as necessary to completely fill the /// specified buffer `buf`. /// /// # Return /// /// The method returns the operation result and the same buffer value passed /// as an argument. /// /// If the method returns [`Ok(())`], then the read was successful. /// /// # Errors /// /// If this function encounters an "end of file" before completely filling /// the buffer, it returns an error of the kind [`ErrorKind::UnexpectedEof`]. /// The buffer is returned on error. /// /// If this function encounters any form of I/O or other error, an error /// variant will be returned. The buffer is returned on error. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let f = File::open("foo.txt").await?; /// let buffer = Vec::with_capacity(10); /// /// // Read up to 10 bytes /// let (res, buffer) = f.read_exact_at(buffer, 0).await; /// res?; /// /// println!("The bytes: {:?}", buffer); /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) /// } /// ``` /// /// [`ErrorKind::UnexpectedEof`]: std::io::ErrorKind::UnexpectedEof pub async fn read_exact_at(&self, buf: T, pos: u64) -> crate::BufResult<(), T> where T: BoundedBufMut, { let orig_bounds = buf.bounds(); let (res, buf) = self.read_exact_slice_at(buf.slice_full(), pos).await; (res, T::from_buf_bounds(buf, orig_bounds)) } async fn read_exact_slice_at( &self, mut buf: Slice, mut pos: u64, ) -> crate::BufResult<(), T> { if pos.checked_add(buf.bytes_total() as u64).is_none() { return ( Err(io::Error::new( io::ErrorKind::InvalidInput, "buffer too large for file", )), buf.into_inner(), ); } while buf.bytes_total() != 0 { let (res, slice) = self.read_at(buf, pos).await; match res { Ok(0) => { return ( Err(io::Error::new( io::ErrorKind::UnexpectedEof, "failed to fill whole buffer", )), slice.into_inner(), ) } Ok(n) => { pos += n as u64; buf = slice.slice(n..); } // No match on an EINTR error is performed because this // crate's design ensures we are not calling the 'wait' option // in the ENTER syscall. Only an Enter with 'wait' can generate // an EINTR according to the io_uring man pages. Err(e) => return (Err(e), slice.into_inner()), }; } (Ok(()), buf.into_inner()) } /// Like [`read_at`], but using a pre-mapped buffer /// registered with [`FixedBufRegistry`]. /// /// [`read_at`]: Self::read_at /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry /// /// # Errors /// /// In addition to errors that can be reported by `read_at`, /// this operation fails if the buffer is not registered in the /// current `tokio-uring` runtime. /// /// # Examples /// /// ```no_run ///# fn main() -> Result<(), Box> { /// use tokio_uring::fs::File; /// use tokio_uring::buf::fixed::FixedBufRegistry; /// use tokio_uring::buf::BoundedBuf; /// use std::iter; /// /// tokio_uring::start(async { /// let registry = FixedBufRegistry::new(iter::repeat(vec![0; 10]).take(10)); /// registry.register()?; /// /// let f = File::open("foo.txt").await?; /// let buffer = registry.check_out(2).unwrap(); /// /// // Read up to 10 bytes /// let (res, buffer) = f.read_fixed_at(buffer, 0).await; /// let n = res?; /// /// println!("The bytes: {:?}", &buffer[..n]); /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) ///# } /// ``` pub async fn read_fixed_at(&self, buf: T, pos: u64) -> crate::BufResult where T: BoundedBufMut, { // Submit the read operation let op = Op::read_fixed_at(&self.fd, buf, pos).unwrap(); op.await } /// Write a buffer into this file at the specified offset, returning how /// many bytes were written. /// /// This function will attempt to write the entire contents of `buf`, but /// the entire write may not succeed, or the write may also generate an /// error. The bytes will be written starting at the specified offset. /// /// # Return /// /// The method returns the operation result and the same buffer value passed /// in as an argument. A return value of `0` typically means that the /// underlying file is no longer able to accept bytes and will likely not be /// able to in the future as well, or that the buffer provided is empty. /// /// # Errors /// /// Each call to `write` may generate an I/O error indicating that the /// operation could not be completed. If an error is returned then no bytes /// in the buffer were written to this writer. /// /// It is **not** considered an error if the entire buffer could not be /// written to this writer. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = File::create("foo.txt").await?; /// /// // Writes some prefix of the byte string, not necessarily all of it. /// let (res, _) = file.write_at(&b"some bytes"[..], 0).submit().await; /// let n = res?; /// /// println!("wrote {} bytes", n); /// /// // Close the file /// file.close().await?; /// Ok(()) /// }) /// } /// ``` /// /// [`Ok(n)`]: Ok pub fn write_at(&self, buf: T, pos: u64) -> UnsubmittedWrite { UnsubmittedOneshot::write_at(&self.fd, buf, pos) } /// Attempts to write an entire buffer into this file at the specified offset. /// /// This method will continuously call [`write_at`] until there is no more data /// to be written or an error is returned. /// This method will not return until the entire buffer has been successfully /// written or an error occurs. /// /// If the buffer contains no data, this will never call [`write_at`]. /// /// # Return /// /// The method returns the operation result and the same buffer value passed /// in as an argument. /// /// # Errors /// /// This function will return the first error that [`write_at`] returns. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = File::create("foo.txt").await?; /// /// // Writes some prefix of the byte string, not necessarily all of it. /// let (res, _) = file.write_all_at(&b"some bytes"[..], 0).await; /// res?; /// /// println!("wrote all bytes"); /// /// // Close the file /// file.close().await?; /// Ok(()) /// }) /// } /// ``` /// /// [`write_at`]: File::write_at pub async fn write_all_at(&self, buf: T, pos: u64) -> crate::BufResult<(), T> where T: BoundedBuf, { let orig_bounds = buf.bounds(); let (res, buf) = self.write_all_slice_at(buf.slice_full(), pos).await; (res, T::from_buf_bounds(buf, orig_bounds)) } async fn write_all_slice_at( &self, mut buf: Slice, mut pos: u64, ) -> crate::BufResult<(), T> { if pos.checked_add(buf.bytes_init() as u64).is_none() { return ( Err(io::Error::new( io::ErrorKind::InvalidInput, "buffer too large for file", )), buf.into_inner(), ); } while buf.bytes_init() != 0 { let (res, slice) = self.write_at(buf, pos).submit().await; match res { Ok(0) => { return ( Err(io::Error::new( io::ErrorKind::WriteZero, "failed to write whole buffer", )), slice.into_inner(), ) } Ok(n) => { pos += n as u64; buf = slice.slice(n..); } // No match on an EINTR error is performed because this // crate's design ensures we are not calling the 'wait' option // in the ENTER syscall. Only an Enter with 'wait' can generate // an EINTR according to the io_uring man pages. Err(e) => return (Err(e), slice.into_inner()), }; } (Ok(()), buf.into_inner()) } /// Like [`write_at`], but using a pre-mapped buffer /// registered with [`FixedBufRegistry`]. /// /// [`write_at`]: Self::write_at /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry /// /// # Errors /// /// In addition to errors that can be reported by `write_at`, /// this operation fails if the buffer is not registered in the /// current `tokio-uring` runtime. /// /// # Examples /// /// ```no_run ///# fn main() -> Result<(), Box> { /// use tokio_uring::fs::File; /// use tokio_uring::buf::fixed::FixedBufRegistry; /// use tokio_uring::buf::BoundedBuf; /// /// tokio_uring::start(async { /// let registry = FixedBufRegistry::new([b"some bytes".to_vec()]); /// registry.register()?; /// /// let file = File::create("foo.txt").await?; /// /// let buffer = registry.check_out(0).unwrap(); /// /// // Writes some prefix of the buffer content, /// // not necessarily all of it. /// let (res, _) = file.write_fixed_at(buffer, 0).await; /// let n = res?; /// /// println!("wrote {} bytes", n); /// /// // Close the file /// file.close().await?; /// Ok(()) /// }) ///# } /// ``` pub async fn write_fixed_at(&self, buf: T, pos: u64) -> crate::BufResult where T: BoundedBuf, { let op = Op::write_fixed_at(&self.fd, buf, pos).unwrap(); op.await } /// Attempts to write an entire buffer into this file at the specified offset. /// /// This method will continuously call [`write_fixed_at`] until there is no more data /// to be written or an error is returned. /// This method will not return until the entire buffer has been successfully /// written or an error occurs. /// /// If the buffer contains no data, this will never call [`write_fixed_at`]. /// /// # Return /// /// The method returns the operation result and the same buffer value passed /// in as an argument. /// /// # Errors /// /// This function will return the first error that [`write_fixed_at`] returns. /// /// [`write_fixed_at`]: Self::write_fixed_at pub async fn write_fixed_all_at(&self, buf: T, pos: u64) -> crate::BufResult<(), T> where T: BoundedBuf, { let orig_bounds = buf.bounds(); let (res, buf) = self.write_fixed_all_at_slice(buf.slice_full(), pos).await; (res, T::from_buf_bounds(buf, orig_bounds)) } async fn write_fixed_all_at_slice( &self, mut buf: Slice, mut pos: u64, ) -> crate::BufResult<(), FixedBuf> { if pos.checked_add(buf.bytes_init() as u64).is_none() { return ( Err(io::Error::new( io::ErrorKind::InvalidInput, "buffer too large for file", )), buf.into_inner(), ); } while buf.bytes_init() != 0 { let (res, slice) = self.write_fixed_at(buf, pos).await; match res { Ok(0) => { return ( Err(io::Error::new( io::ErrorKind::WriteZero, "failed to write whole buffer", )), slice.into_inner(), ) } Ok(n) => { pos += n as u64; buf = slice.slice(n..); } // No match on an EINTR error is performed because this // crate's design ensures we are not calling the 'wait' option // in the ENTER syscall. Only an Enter with 'wait' can generate // an EINTR according to the io_uring man pages. Err(e) => return (Err(e), slice.into_inner()), }; } (Ok(()), buf.into_inner()) } /// Attempts to sync all OS-internal metadata to disk. /// /// This function will attempt to ensure that all in-memory data reaches the /// filesystem before completing. /// /// This can be used to handle errors that would otherwise only be caught /// when the `File` is closed. Dropping a file will ignore errors in /// synchronizing this in-memory data. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let f = File::create("foo.txt").await?; /// let (res, buf) = f.write_at(&b"Hello, world!"[..], 0).submit().await; /// let n = res?; /// /// f.sync_all().await?; /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) /// } /// ``` pub async fn sync_all(&self) -> io::Result<()> { Op::fsync(&self.fd)?.await } /// Attempts to sync file data to disk. /// /// This method is similar to [`sync_all`], except that it may not /// synchronize file metadata to the filesystem. /// /// This is intended for use cases that must synchronize content, but don't /// need the metadata on disk. The goal of this method is to reduce disk /// operations. /// /// Note that some platforms may simply implement this in terms of /// [`sync_all`]. /// /// [`sync_all`]: File::sync_all /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let f = File::create("foo.txt").await?; /// let (res, buf) = f.write_at(&b"Hello, world!"[..], 0).submit().await; /// let n = res?; /// /// f.sync_data().await?; /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) /// } /// ``` pub async fn sync_data(&self) -> io::Result<()> { Op::datasync(&self.fd)?.await } /// Manipulate the allocated disk space of the file. /// /// The manipulated range starts at the `offset` and continues for `len` bytes. /// /// The specific manipulation to the allocated disk space are specified by /// the `flags`, to understand what are the possible values here check /// the `fallocate(2)` man page. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let f = File::create("foo.txt").await?; /// /// // Allocate a 1024 byte file setting all the bytes to zero /// f.fallocate(0, 1024, libc::FALLOC_FL_ZERO_RANGE).await?; /// /// // Close the file /// f.close().await?; /// Ok(()) /// }) /// } pub async fn fallocate(&self, offset: u64, len: u64, flags: i32) -> io::Result<()> { Op::fallocate(&self.fd, offset, len, flags)?.await } /// Closes the file using the uring asynchronous close operation and returns the possible error /// as described in the close(2) man page. /// /// The programmer has the choice of calling this asynchronous close and waiting for the result /// or letting the library close the file automatically and simply letting the file go out of /// scope and having the library close the file descriptor automatically and synchronously. /// /// Calling this asynchronous close is to be preferred because it returns the close result /// which as the man page points out, should not be ignored. This asynchronous close also /// avoids the synchronous close system call and may result in better throughput as the thread /// is not blocked during the close. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// // Open the file /// let f = File::open("foo.txt").await?; /// // Close the file /// f.close().await?; /// /// Ok(()) /// }) /// } /// ``` pub async fn close(mut self) -> io::Result<()> { self.fd.close().await } } impl FromRawFd for File { unsafe fn from_raw_fd(fd: RawFd) -> Self { File::from_shared_fd(SharedFd::new(fd)) } } impl AsRawFd for File { fn as_raw_fd(&self) -> RawFd { self.fd.raw_fd() } } impl fmt::Debug for File { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("File") .field("fd", &self.fd.raw_fd()) .finish() } } /// Removes a File /// /// This function will return an error in the following situations, but is not /// limited to just these cases: /// /// * `path` doesn't exist. /// * [`io::ErrorKind`] would be set to `NotFound` /// * The user lacks permissions to modify/remove the file at the provided `path`. /// * [`io::ErrorKind`] would be set to `PermissionDenied` /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::remove_file; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// remove_file("/some/file.txt").await?; /// Ok::<(), std::io::Error>(()) /// })?; /// Ok(()) /// } /// ``` pub async fn remove_file>(path: P) -> io::Result<()> { Op::unlink_file(path.as_ref())?.await } /// Renames a file or directory to a new name, replacing the original file if /// `to` already exists. /// /// #Errors /// /// * `path` doesn't exist. /// * [`io::ErrorKind`] would be set to `NotFound` /// * The user lacks permissions to modify/remove the file at the provided `path`. /// * [`io::ErrorKind`] would be set to `PermissionDenied` /// * The new name/path is on a different mount point. /// * [`io::ErrorKind`] would be set to `CrossesDevices` /// /// # Example /// /// ```no_run /// use tokio_uring::fs::rename; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// rename("a.txt", "b.txt").await?; // Rename a.txt to b.txt /// Ok::<(), std::io::Error>(()) /// })?; /// Ok(()) /// } /// ``` pub async fn rename(from: impl AsRef, to: impl AsRef) -> io::Result<()> { Op::rename_at(from.as_ref(), to.as_ref(), 0)?.await } tokio-uring-0.5.0/src/fs/mod.rs000064400000000000000000000006731046102023000144270ustar 00000000000000//! Filesystem manipulation operations. mod directory; pub use directory::create_dir; pub use directory::remove_dir; mod create_dir_all; pub use create_dir_all::create_dir_all; pub use create_dir_all::DirBuilder; mod file; pub use file::remove_file; pub use file::rename; pub use file::File; mod open_options; pub use open_options::OpenOptions; mod statx; pub use statx::is_dir_regfile; pub use statx::statx; pub use statx::StatxBuilder; tokio-uring-0.5.0/src/fs/open_options.rs000064400000000000000000000303511046102023000163600ustar 00000000000000use crate::fs::File; use crate::runtime::driver::op::Op; use std::io; use std::os::unix::fs::OpenOptionsExt; use std::path::Path; /// Options and flags which can be used to configure how a file is opened. /// /// This builder exposes the ability to configure how a [`File`] is opened and /// what operations are permitted on the open file. The [`File::open`] and /// [`File::create`] methods are aliases for commonly used options using this /// builder. /// /// Generally speaking, when using `OpenOptions`, you'll first call /// [`OpenOptions::new`], then chain calls to methods to set each option, then /// call [`OpenOptions::open`], passing the path of the file you're trying to /// open. This will give you a [`io::Result`] with a [`File`] inside that you /// can further operate on. /// /// # Examples /// /// Opening a file to read: /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .read(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` /// /// Opening a file for both reading and writing, as well as creating it if it /// doesn't exist: /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .read(true) /// .write(true) /// .create(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` #[derive(Debug, Clone)] pub struct OpenOptions { read: bool, write: bool, append: bool, truncate: bool, create: bool, create_new: bool, pub(crate) mode: libc::mode_t, pub(crate) custom_flags: libc::c_int, } impl OpenOptions { /// Creates a blank new set of options ready for configuration. /// /// All options are initially set to `false`. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .read(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` pub fn new() -> OpenOptions { OpenOptions { // generic read: false, write: false, append: false, truncate: false, create: false, create_new: false, mode: 0o666, custom_flags: 0, } } /// Sets the option for read access. /// /// This option, when true, will indicate that the file should be /// `read`-able if opened. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .read(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` pub fn read(&mut self, read: bool) -> &mut OpenOptions { self.read = read; self } /// Sets the option for write access. /// /// This option, when true, will indicate that the file should be /// `write`-able if opened. /// /// If the file already exists, any write calls on it will overwrite its /// contents, without truncating it. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .write(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` pub fn write(&mut self, write: bool) -> &mut OpenOptions { self.write = write; self } /// Sets the option for the append mode. /// /// This option, when true, means that writes will append to a file instead /// of overwriting previous contents. Note that setting /// `.write(true).append(true)` has the same effect as setting only /// `.append(true)`. /// /// For most filesystems, the operating system guarantees that all writes /// are atomic: no writes get mangled because another process writes at the /// same time. /// /// ## Note /// /// This function doesn't create the file if it doesn't exist. Use the /// [`OpenOptions::create`] method to do so. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .append(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` pub fn append(&mut self, append: bool) -> &mut OpenOptions { self.append = append; self } /// Sets the option for truncating a previous file. /// /// If a file is successfully opened with this option set it will truncate /// the file to 0 length if it already exists. /// /// The file must be opened with write access for truncate to work. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .write(true) /// .truncate(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` pub fn truncate(&mut self, truncate: bool) -> &mut OpenOptions { self.truncate = truncate; self } /// Sets the option to create a new file, or open it if it already exists. /// /// In order for the file to be created, [`OpenOptions::write`] or /// [`OpenOptions::append`] access must be used. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .write(true) /// .create(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` pub fn create(&mut self, create: bool) -> &mut OpenOptions { self.create = create; self } /// Sets the option to create a new file, failing if it already exists. /// /// No file is allowed to exist at the target location, also no (dangling) symlink. In this /// way, if the call succeeds, the file returned is guaranteed to be new. /// /// This option is useful because it is atomic. Otherwise between checking /// whether a file exists and creating a new one, the file may have been /// created by another process (a TOCTOU race condition / attack). /// /// If `.create_new(true)` is set, [`.create()`] and [`.truncate()`] are /// ignored. /// /// The file must be opened with write or append access in order to create /// a new file. /// /// [`.create()`]: OpenOptions::create /// [`.truncate()`]: OpenOptions::truncate /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .write(true) /// .create_new(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` pub fn create_new(&mut self, create_new: bool) -> &mut OpenOptions { self.create_new = create_new; self } /// Opens a file at `path` with the options specified by `self`. /// /// # Errors /// /// This function will return an error under a number of different /// circumstances. Some of these error conditions are listed here, together /// with their [`io::ErrorKind`]. The mapping to [`io::ErrorKind`]s is not /// part of the compatibility contract of the function, especially the /// [`Other`] kind might change to more specific kinds in the future. /// /// * [`NotFound`]: The specified file does not exist and neither `create` /// or `create_new` is set. /// * [`NotFound`]: One of the directory components of the file path does /// not exist. /// * [`PermissionDenied`]: The user lacks permission to get the specified /// access rights for the file. /// * [`PermissionDenied`]: The user lacks permission to open one of the /// directory components of the specified path. /// * [`AlreadyExists`]: `create_new` was specified and the file already /// exists. /// * [`InvalidInput`]: Invalid combinations of open options (truncate /// without write access, no access mode set, etc.). /// * [`Other`]: One of the directory components of the specified file path /// was not, in fact, a directory. /// * [`Other`]: Filesystem-level errors: full disk, write permission /// requested on a read-only file system, exceeded disk quota, too many /// open files, too long filename, too many symbolic links in the /// specified path (Unix-like systems only), etc. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::OpenOptions; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let file = OpenOptions::new() /// .read(true) /// .open("foo.txt") /// .await?; /// Ok(()) /// }) /// } /// ``` /// /// [`AlreadyExists`]: io::ErrorKind::AlreadyExists /// [`InvalidInput`]: io::ErrorKind::InvalidInput /// [`NotFound`]: io::ErrorKind::NotFound /// [`Other`]: io::ErrorKind::Other /// [`PermissionDenied`]: io::ErrorKind::PermissionDenied pub async fn open(&self, path: impl AsRef) -> io::Result { Op::open(path.as_ref(), self)?.await } pub(crate) fn access_mode(&self) -> io::Result { match (self.read, self.write, self.append) { (true, false, false) => Ok(libc::O_RDONLY), (false, true, false) => Ok(libc::O_WRONLY), (true, true, false) => Ok(libc::O_RDWR), (false, _, true) => Ok(libc::O_WRONLY | libc::O_APPEND), (true, _, true) => Ok(libc::O_RDWR | libc::O_APPEND), (false, false, false) => Err(io::Error::from_raw_os_error(libc::EINVAL)), } } pub(crate) fn creation_mode(&self) -> io::Result { match (self.write, self.append) { (true, false) => {} (false, false) => { if self.truncate || self.create || self.create_new { return Err(io::Error::from_raw_os_error(libc::EINVAL)); } } (_, true) => { if self.truncate && !self.create_new { return Err(io::Error::from_raw_os_error(libc::EINVAL)); } } } Ok(match (self.create, self.truncate, self.create_new) { (false, false, false) => 0, (true, false, false) => libc::O_CREAT, (false, true, false) => libc::O_TRUNC, (true, true, false) => libc::O_CREAT | libc::O_TRUNC, (_, _, true) => libc::O_CREAT | libc::O_EXCL, }) } } impl Default for OpenOptions { fn default() -> Self { Self::new() } } impl OpenOptionsExt for OpenOptions { fn mode(&mut self, mode: u32) -> &mut OpenOptions { self.mode = mode; self } fn custom_flags(&mut self, flags: i32) -> &mut OpenOptions { self.custom_flags = flags; self } } tokio-uring-0.5.0/src/fs/statx.rs000064400000000000000000000235641046102023000150170ustar 00000000000000use super::File; use crate::io::{cstr, SharedFd}; use crate::runtime::driver::op::Op; use std::{ffi::CString, io, path::Path}; impl File { /// Returns statx(2) metadata for an open file via a uring call. /// /// The libc::statx structure returned is described in the statx(2) man page. /// /// This high level version of the statx function uses `flags` set to libc::AT_EMPTY_PATH and /// `mask` set to libc::STATX_ALL which are described in the same man page. /// /// More specific uring statx(2) calls can be made with the StatxBuilder. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// tokio_uring::start(async { /// let f = File::create("foo.txt").await.unwrap(); /// /// // Fetch file metadata /// let statx = f.statx().await.unwrap(); /// /// // Close the file /// f.close().await.unwrap(); /// }) /// ``` pub async fn statx(&self) -> io::Result { let flags = libc::AT_EMPTY_PATH; let mask = libc::STATX_ALL; Op::statx(Some(self.fd.clone()), None, flags, mask)?.await } /// Returns a builder that can return statx(2) metadata for an open file using the uring /// device. /// /// `flags` and `mask` can be changed from their defaults and a `path` that is absolule or /// relative can also be provided. /// /// `flags` defaults to libc::AT_EMPTY_PATH. /// /// `mask` defaults to libc::STATX_ALL. /// /// Refer to statx(2) for details on the arguments and the returned value. /// /// A little from the man page: /// /// - statx(2) uses path, dirfd, and flags to identify the target file. /// - statx(2) uses mask to tell the kernel which fields the caller is interested in. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// tokio_uring::start(async { /// let f = File::create("foo.txt").await.unwrap(); /// /// // Fetch file metadata /// let statx = f.statx_builder() /// .flags(libc::AT_NO_AUTOMOUNT) /// .statx().await.unwrap(); /// /// // Close the file /// f.close().await.unwrap(); /// }) /// ``` pub fn statx_builder(&self) -> StatxBuilder { StatxBuilder { file: Some(self.fd.clone()), path: None, flags: libc::AT_EMPTY_PATH, mask: libc::STATX_ALL, } } } /// Returns statx(2) metadata for a path via a uring call. /// /// The libc::statx structure returned is described in the statx(2) man page. /// /// This high level version of the statx function uses `flags` set to libc::AT_EMPTY_PATH and /// `mask` set to libc::STATX_ALL which are described in the same man page. /// /// And this version of the function does not work on an open file descriptor can be more expedient /// when an open file descriptor isn't necessary for other reasons anyway. /// /// The path can be absolute or relative. A relative path is interpreted against the current /// working direcgtory. /// /// More specific uring statx(2) calls can be made with the StatxBuilder. /// /// # Examples /// /// ```no_run /// tokio_uring::start(async { /// /// // Fetch file metadata /// let statx = tokio_uring::fs::statx("foo.txt").await.unwrap(); /// }) /// ``` pub async fn statx>(path: P) -> io::Result { StatxBuilder::new().pathname(path).unwrap().statx().await } /// A builder used to make a uring statx(2) call. /// /// This builder supports the `flags` and `mask` options and can be finished with a call to /// `statx()`. /// /// See StatxBuilder::new for more details. pub struct StatxBuilder { file: Option, path: Option, flags: i32, mask: u32, } impl Default for StatxBuilder { fn default() -> Self { Self::new() } } impl StatxBuilder { /// Returns a builder to fully specify the arguments to the uring statx(2) operation. /// /// The libc::statx structure returned in described in the statx(2) man page. /// /// This builder defaults to having no open file descriptor and defaults `flags` to /// libc::AT_EMPTY_PATH and `mask` to libc::STATX_ALL. /// /// Refer to the man page for details about the `flags`, `mask` values and returned structure, /// libc::statx. /// /// # Examples /// /// ```no_run /// tokio_uring::start(async { /// let want_mode: u16 = 0o775; /// /// // Fetch file metadata /// let statx = tokio_uring::fs::StatxBuilder::new() /// .mask(libc::STATX_MODE) /// .pathname("foo.txt").unwrap() /// .statx().await.unwrap(); /// let got_mode = statx.stx_mode & 0o7777; /// /// if want_mode == got_mode { /// println!("Success: wanted mode {want_mode:#o}, got mode {got_mode:#o}"); /// } else { /// println!("Fail: wanted mode {want_mode:#o}, got mode {got_mode:#o}"); /// } /// }) /// ``` #[must_use] pub fn new() -> StatxBuilder { StatxBuilder { file: None, path: None, flags: libc::AT_EMPTY_PATH, mask: libc::STATX_ALL, } } /// Sets the `dirfd` option, setting or replacing the file descriptor which may be for a /// directory but doesn't have to be. When used with a path, it should be a directory but when /// used without a path, can be any file type. So `dirfd` is a bit of a misnomer but it is what /// the statx(2) man page calls it. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::{self, File}; /// /// tokio_uring::start(async { /// let dir = fs::OpenOptions::new() /// .open("/home/linux") /// .await.unwrap(); /// /// // Fetch file metadata /// let statx = fs::StatxBuilder::new() /// .dirfd(&dir) /// .mask(libc::STATX_TYPE) /// .pathname(".cargo").unwrap() /// .statx().await.unwrap(); /// /// dir.close().await.unwrap(); /// }) /// ``` #[must_use] pub fn dirfd(&mut self, file: &File) -> &mut Self { self.file = Some(file.fd.clone()); self } /// Sets the `path` option, setting or replacing the path option to the command. /// The path may be absolute or relative. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::{self, File}; /// /// tokio_uring::start(async { /// let dir = fs::OpenOptions::new() /// .open("/home/linux") /// .await.unwrap(); /// /// // Fetch file metadata /// let statx = fs::StatxBuilder::new() /// .dirfd(&dir) /// .pathname(".cargo").unwrap() /// .mask(libc::STATX_TYPE) /// .statx().await.unwrap(); /// /// dir.close().await.unwrap(); /// }) /// ``` pub fn pathname>(&mut self, path: P) -> io::Result<&mut Self> { self.path = Some(cstr(path.as_ref())?); Ok(self) } /// Sets the `flags` option, replacing the default. /// /// See statx(2) for a full description of `flags`. /// /// # Examples /// /// ```no_run /// tokio_uring::start(async { /// // Fetch file metadata /// let statx = tokio_uring::fs::StatxBuilder::new() /// .flags(libc::AT_NO_AUTOMOUNT) /// .pathname("foo.txt").unwrap() /// .statx().await.unwrap(); /// }) /// ``` #[must_use] pub fn flags(&mut self, flags: i32) -> &mut Self { self.flags = flags; self } /// Sets the `mask` option, replacing the default. /// /// # Examples /// /// ```no_run /// tokio_uring::start(async { /// // Fetch file metadata /// let statx = tokio_uring::fs::StatxBuilder::new() /// .mask(libc::STATX_BASIC_STATS) /// .pathname("foo.txt").unwrap() /// .statx().await.unwrap(); /// }) /// ``` #[must_use] pub fn mask(&mut self, mask: u32) -> &mut Self { self.mask = mask; self } /// Returns the metadata requested for the optional open file. If no open file was provided, /// the metadata for the current working directory is returned. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::{self, File}; /// /// tokio_uring::start(async { /// let dir = fs::OpenOptions::new() /// .open("/home/linux") /// .await.unwrap(); /// /// // Fetch file metadata /// let statx = fs::StatxBuilder::new() /// .dirfd(&dir) /// .pathname(".cargo").unwrap() /// .mask(libc::STATX_TYPE) /// .statx().await.unwrap(); /// /// dir.close().await.unwrap(); /// }) /// ``` pub async fn statx(&mut self) -> io::Result { // TODO should the statx() terminator be renamed to something like submit()? let fd = self.file.take(); let path = self.path.take(); Op::statx(fd, path, self.flags, self.mask)?.await } } // TODO consider replacing this with a Statx struct with useful helper methods. /// Returns two bools, is_dir and is_regfile. /// /// They both can't be true at the same time and there are many reasons they may both be false. #[allow(dead_code)] pub async fn is_dir_regfile>(path: P) -> (bool, bool) { let mut builder = crate::fs::StatxBuilder::new(); if builder.mask(libc::STATX_TYPE).pathname(path).is_err() { return (false, false); } let res = builder.statx().await; match res { Ok(statx) => ( (u32::from(statx.stx_mode) & libc::S_IFMT) == libc::S_IFDIR, (u32::from(statx.stx_mode) & libc::S_IFMT) == libc::S_IFREG, ), Err(_) => (false, false), } } tokio-uring-0.5.0/src/future.rs000064400000000000000000000004241046102023000145440ustar 00000000000000// TODO see about removing or just commenting out. #[allow(unused_macros)] macro_rules! ready { ($e:expr $(,)?) => { match $e { std::task::Poll::Ready(t) => t, std::task::Poll::Pending => return std::task::Poll::Pending, } }; } tokio-uring-0.5.0/src/io/accept.rs000064400000000000000000000033631046102023000151050ustar 00000000000000use crate::io::{SharedFd, Socket}; use crate::runtime::driver::op; use crate::runtime::driver::op::{Completable, Op}; use crate::runtime::CONTEXT; use std::net::SocketAddr; use std::{boxed::Box, io}; pub(crate) struct Accept { fd: SharedFd, pub(crate) socketaddr: Box<(libc::sockaddr_storage, libc::socklen_t)>, } impl Op { pub(crate) fn accept(fd: &SharedFd) -> io::Result> { use io_uring::{opcode, types}; let socketaddr = Box::new(( unsafe { std::mem::zeroed() }, std::mem::size_of::() as libc::socklen_t, )); CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( Accept { fd: fd.clone(), socketaddr, }, |accept| { opcode::Accept::new( types::Fd(accept.fd.raw_fd()), &mut accept.socketaddr.0 as *mut _ as *mut _, &mut accept.socketaddr.1, ) .flags(libc::O_CLOEXEC) .build() }, ) }) } } impl Completable for Accept { type Output = io::Result<(Socket, Option)>; fn complete(self, cqe: op::CqeResult) -> Self::Output { let fd = cqe.result?; let fd = SharedFd::new(fd as i32); let socket = Socket { fd }; let (_, addr) = unsafe { socket2::SockAddr::init(move |addr_storage, len| { self.socketaddr.0.clone_into(&mut *addr_storage); *len = self.socketaddr.1; Ok(()) })? }; Ok((socket, addr.as_socket())) } } tokio-uring-0.5.0/src/io/bind.rs000064400000000000000000000000001046102023000145430ustar 00000000000000tokio-uring-0.5.0/src/io/close.rs000064400000000000000000000013641046102023000147520ustar 00000000000000use crate::runtime::driver::op; use crate::runtime::driver::op::{Completable, Op}; use crate::runtime::CONTEXT; use std::io; use std::os::unix::io::RawFd; pub(crate) struct Close { fd: RawFd, } impl Op { pub(crate) fn close(fd: RawFd) -> io::Result> { use io_uring::{opcode, types}; CONTEXT.with(|x| { x.handle() .expect("Not in a runtime context") .submit_op(Close { fd }, |close| { opcode::Close::new(types::Fd(close.fd)).build() }) }) } } impl Completable for Close { type Output = io::Result<()>; fn complete(self, cqe: op::CqeResult) -> Self::Output { let _ = cqe.result?; Ok(()) } } tokio-uring-0.5.0/src/io/connect.rs000064400000000000000000000024671046102023000153030ustar 00000000000000use crate::io::SharedFd; use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use socket2::SockAddr; use std::io; /// Open a file pub(crate) struct Connect { fd: SharedFd, // this avoids a UAF (UAM?) if the future is moved, but not if the future is // dropped. no Op can be dropped before completion in tokio-uring land right now. socket_addr: Box, } impl Op { /// Submit a request to connect. pub(crate) fn connect(fd: &SharedFd, socket_addr: SockAddr) -> io::Result> { use io_uring::{opcode, types}; CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( Connect { fd: fd.clone(), socket_addr: Box::new(socket_addr), }, |connect| { opcode::Connect::new( types::Fd(connect.fd.raw_fd()), connect.socket_addr.as_ptr(), connect.socket_addr.len(), ) .build() }, ) }) } } impl Completable for Connect { type Output = io::Result<()>; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result.map(|_| ()) } } tokio-uring-0.5.0/src/io/fallocate.rs000064400000000000000000000017241046102023000155770ustar 00000000000000use std::io; use io_uring::{opcode, types}; use crate::{ io::SharedFd, runtime::{ driver::op::{Completable, CqeResult, Op}, CONTEXT, }, }; pub(crate) struct Fallocate { fd: SharedFd, } impl Op { pub(crate) fn fallocate( fd: &SharedFd, offset: u64, len: u64, flags: i32, ) -> io::Result> { CONTEXT.with(|x| { x.handle().expect("not in a runtime context").submit_op( Fallocate { fd: fd.clone() }, |fallocate| { opcode::Fallocate::new(types::Fd(fallocate.fd.raw_fd()), len as _) .offset(offset as _) .mode(flags) .build() }, ) }) } } impl Completable for Fallocate { type Output = io::Result<()>; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result.map(|_| ()) } } tokio-uring-0.5.0/src/io/fsync.rs000064400000000000000000000022321046102023000147620ustar 00000000000000use std::io; use crate::io::SharedFd; use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use io_uring::{opcode, types}; pub(crate) struct Fsync { fd: SharedFd, } impl Op { pub(crate) fn fsync(fd: &SharedFd) -> io::Result> { CONTEXT.with(|x| { x.handle() .expect("Not in a runtime context") .submit_op(Fsync { fd: fd.clone() }, |fsync| { opcode::Fsync::new(types::Fd(fsync.fd.raw_fd())).build() }) }) } pub(crate) fn datasync(fd: &SharedFd) -> io::Result> { CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( Fsync { fd: fd.clone() }, |fsync| { opcode::Fsync::new(types::Fd(fsync.fd.raw_fd())) .flags(types::FsyncFlags::DATASYNC) .build() }, ) }) } } impl Completable for Fsync { type Output = io::Result<()>; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result.map(|_| ()) } } tokio-uring-0.5.0/src/io/mkdir_at.rs000064400000000000000000000021341046102023000154330ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use super::util::cstr; use std::ffi::CString; use std::io; use std::path::Path; /// Create a directory at path relative to the current working directory /// of the caller's process. pub(crate) struct Mkdir { pub(crate) _path: CString, } impl Op { /// Submit a request to create a directory pub(crate) fn make_dir(path: &Path, mode: u32) -> io::Result> { use io_uring::{opcode, types}; let _path = cstr(path)?; CONTEXT.with(|x| { x.handle() .expect("Not in a runtime context") .submit_op(Mkdir { _path }, |mkdir| { let p_ref = mkdir._path.as_c_str().as_ptr(); opcode::MkDirAt::new(types::Fd(libc::AT_FDCWD), p_ref) .mode(mode) .build() }) }) } } impl Completable for Mkdir { type Output = io::Result<()>; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result.map(|_| ()) } } tokio-uring-0.5.0/src/io/mod.rs000064400000000000000000000010461046102023000144210ustar 00000000000000mod accept; mod close; mod connect; mod fallocate; mod fsync; mod mkdir_at; mod noop; pub(crate) use noop::NoOp; mod open; mod read; mod read_fixed; mod readv; mod recv_from; mod recvmsg; mod rename_at; mod send_to; mod send_zc; mod sendmsg; mod sendmsg_zc; mod shared_fd; pub(crate) use shared_fd::SharedFd; mod socket; pub(crate) use socket::Socket; mod statx; mod unlink_at; mod util; pub(crate) use util::cstr; pub(crate) mod write; mod write_fixed; mod writev; mod writev_all; pub(crate) use writev_all::writev_at_all; tokio-uring-0.5.0/src/io/noop.rs000064400000000000000000000015431046102023000146170ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use std::io; /// No operation. Just posts a completion event, nothing else. /// /// Has a place in benchmarking. pub struct NoOp {} impl Op { pub fn no_op() -> io::Result> { use io_uring::opcode; CONTEXT.with(|x| { x.handle() .expect("Not in a runtime context") .submit_op(NoOp {}, |_| opcode::Nop::new().build()) }) } } impl Completable for NoOp { type Output = io::Result<()>; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result.map(|_| ()) } } #[cfg(test)] mod test { use crate as tokio_uring; #[test] fn perform_no_op() -> () { tokio_uring::start(async { tokio_uring::no_op().await.unwrap(); }) } } tokio-uring-0.5.0/src/io/open.rs000064400000000000000000000030701046102023000146020ustar 00000000000000use crate::fs::{File, OpenOptions}; use crate::io::SharedFd; use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use std::ffi::CString; use std::io; use std::path::Path; /// Open a file #[allow(dead_code)] pub(crate) struct Open { pub(crate) path: CString, pub(crate) flags: libc::c_int, } impl Op { /// Submit a request to open a file. pub(crate) fn open(path: &Path, options: &OpenOptions) -> io::Result> { use io_uring::{opcode, types}; let path = super::util::cstr(path)?; let flags = libc::O_CLOEXEC | options.access_mode()? | options.creation_mode()? | (options.custom_flags & !libc::O_ACCMODE); CONTEXT.with(|x| { x.handle() .expect("Not in a runtime context") .submit_op(Open { path, flags }, |open| { // Get a reference to the memory. The string will be held by the // operation state and will not be accessed again until the operation // completes. let p_ref = open.path.as_c_str().as_ptr(); opcode::OpenAt::new(types::Fd(libc::AT_FDCWD), p_ref) .flags(flags) .mode(options.mode) .build() }) }) } } impl Completable for Open { type Output = io::Result; fn complete(self, cqe: CqeResult) -> Self::Output { Ok(File::from_shared_fd(SharedFd::new(cqe.result? as _))) } } tokio-uring-0.5.0/src/io/pool.rs000064400000000000000000000034451046102023000146200ustar 00000000000000use crate::driver; use io_uring::{opcode, IoUring}; use std::io; use std::mem::ManuallyDrop; /// Buffer pool shared with kernel pub(crate) struct Pool { mem: *mut u8, num: usize, size: usize, } pub(crate) struct ProvidedBuf { buf: ManuallyDrop>, driver: driver::Handle, } impl Pool { pub(super) fn new(num: usize, size: usize) -> Pool { let total = num * size; let mut mem = ManuallyDrop::new(Vec::::with_capacity(total)); assert_eq!(mem.capacity(), total); Pool { mem: mem.as_mut_ptr(), num, size, } } pub(super) fn provide_buffers(&self, uring: &mut IoUring) -> io::Result<()> { let op = opcode::ProvideBuffers::new(self.mem, self.size as _, self.num as _, 0, 0) .build() .user_data(0); // Scoped to ensure `sq` drops before trying to submit { let mut sq = uring.submission(); if unsafe { sq.push(&op) }.is_err() { unimplemented!("when is this hit?"); } } uring.submit_and_wait(1)?; let mut cq = uring.completion(); for cqe in &mut cq { assert_eq!(cqe.user_data(), 0); } Ok(()) } } impl ProvidedBuf {} impl Drop for ProvidedBuf { fn drop(&mut self) { let mut driver = self.driver.borrow_mut(); let pool = &driver.pool; let ptr = self.buf.as_mut_ptr(); let bid = (ptr as usize - pool.mem as usize) / pool.size; let op = opcode::ProvideBuffers::new(ptr, pool.size as _, 1, 0, bid as _) .build() .user_data(u64::MAX); let mut sq = driver.uring.submission(); if unsafe { sq.push(&op) }.is_err() { unimplemented!(); } } } tokio-uring-0.5.0/src/io/read.rs000064400000000000000000000034021046102023000145530ustar 00000000000000use crate::buf::BoundedBufMut; use crate::io::SharedFd; use crate::BufResult; use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use std::io; pub(crate) struct Read { /// Holds a strong ref to the FD, preventing the file from being closed /// while the operation is in-flight. #[allow(dead_code)] fd: SharedFd, /// Reference to the in-flight buffer. pub(crate) buf: T, } impl Op> { pub(crate) fn read_at(fd: &SharedFd, buf: T, offset: u64) -> io::Result>> { use io_uring::{opcode, types}; CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( Read { fd: fd.clone(), buf, }, |read| { // Get raw buffer info let ptr = read.buf.stable_mut_ptr(); let len = read.buf.bytes_total(); opcode::Read::new(types::Fd(fd.raw_fd()), ptr, len as _) .offset(offset as _) .build() }, ) }) } } impl Completable for Read where T: BoundedBufMut, { type Output = BufResult; fn complete(self, cqe: CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); // Recover the buffer let mut buf = self.buf; // If the operation was successful, advance the initialized cursor. if let Ok(n) = res { // Safety: the kernel wrote `n` bytes to the buffer. unsafe { buf.set_init(n); } } (res, buf) } } tokio-uring-0.5.0/src/io/read_fixed.rs000064400000000000000000000037471046102023000157460ustar 00000000000000use crate::buf::fixed::FixedBuf; use crate::buf::BoundedBufMut; use crate::io::SharedFd; use crate::runtime::driver::op::{self, Completable, Op}; use crate::BufResult; use crate::runtime::CONTEXT; use std::io; pub(crate) struct ReadFixed { /// Holds a strong ref to the FD, preventing the file from being closed /// while the operation is in-flight. #[allow(dead_code)] fd: SharedFd, /// The in-flight buffer. buf: T, } impl Op> where T: BoundedBufMut, { pub(crate) fn read_fixed_at( fd: &SharedFd, buf: T, offset: u64, ) -> io::Result>> { use io_uring::{opcode, types}; CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( ReadFixed { fd: fd.clone(), buf, }, |read_fixed| { // Get raw buffer info let ptr = read_fixed.buf.stable_mut_ptr(); let len = read_fixed.buf.bytes_total(); let buf_index = read_fixed.buf.get_buf().buf_index(); opcode::ReadFixed::new(types::Fd(fd.raw_fd()), ptr, len as _, buf_index) .offset(offset as _) .build() }, ) }) } } impl Completable for ReadFixed where T: BoundedBufMut, { type Output = BufResult; fn complete(self, cqe: op::CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); // Recover the buffer let mut buf = self.buf; // If the operation was successful, advance the initialized cursor. if let Ok(n) = res { // Safety: the kernel wrote `n` bytes to the buffer. unsafe { buf.set_init(n); } } (res, buf) } } tokio-uring-0.5.0/src/io/readv.rs000064400000000000000000000053431046102023000147470ustar 00000000000000use crate::buf::BoundedBufMut; use crate::BufResult; use crate::io::SharedFd; use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use libc::iovec; use std::io; pub(crate) struct Readv { /// Holds a strong ref to the FD, preventing the file from being closed /// while the operation is in-flight. #[allow(dead_code)] fd: SharedFd, /// Reference to the in-flight buffer. pub(crate) bufs: Vec, /// Parameter for `io_uring::op::readv`, referring `bufs`. iovs: Vec, } impl Op> { pub(crate) fn readv_at( fd: &SharedFd, mut bufs: Vec, offset: u64, ) -> io::Result>> { use io_uring::{opcode, types}; // Build `iovec` objects referring the provided `bufs` for `io_uring::opcode::Readv`. let iovs: Vec = bufs .iter_mut() .map(|b| iovec { // Safety guaranteed by `BoundedBufMut`. iov_base: unsafe { b.stable_mut_ptr().add(b.bytes_init()) as *mut libc::c_void }, iov_len: b.bytes_total() - b.bytes_init(), }) .collect(); CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( Readv { fd: fd.clone(), bufs, iovs, }, |read| { opcode::Readv::new( types::Fd(fd.raw_fd()), read.iovs.as_ptr(), read.iovs.len() as u32, ) .offset(offset as _) .build() }, ) }) } } impl Completable for Readv where T: BoundedBufMut, { type Output = BufResult>; fn complete(self, cqe: CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); // Recover the buffer let mut bufs = self.bufs; // If the operation was successful, advance the initialized cursor. if let Ok(n) = res { let mut count = n; for b in bufs.iter_mut() { let sz = std::cmp::min(count, b.bytes_total() - b.bytes_init()); let pos = b.bytes_init() + sz; // Safety: the kernel returns bytes written, and we have ensured that `pos` is // valid for current buffer. unsafe { b.set_init(pos) }; count -= sz; if count == 0 { break; } } assert_eq!(count, 0); } (res, bufs) } } tokio-uring-0.5.0/src/io/recv_from.rs000064400000000000000000000046521046102023000156320ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use crate::{buf::BoundedBufMut, io::SharedFd, BufResult}; use socket2::SockAddr; use std::{ io::IoSliceMut, {boxed::Box, io, net::SocketAddr}, }; #[allow(dead_code)] pub(crate) struct RecvFrom { fd: SharedFd, pub(crate) buf: T, io_slices: Vec>, pub(crate) socket_addr: Box, pub(crate) msghdr: Box, } impl Op> { pub(crate) fn recv_from(fd: &SharedFd, mut buf: T) -> io::Result>> { use io_uring::{opcode, types}; let mut io_slices = vec![IoSliceMut::new(unsafe { std::slice::from_raw_parts_mut(buf.stable_mut_ptr(), buf.bytes_total()) })]; let socket_addr = Box::new(unsafe { SockAddr::init(|_, _| Ok(()))?.1 }); let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); msghdr.msg_iov = io_slices.as_mut_ptr().cast(); msghdr.msg_iovlen = io_slices.len() as _; msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; msghdr.msg_namelen = socket_addr.len(); CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( RecvFrom { fd: fd.clone(), buf, io_slices, socket_addr, msghdr, }, |recv_from| { opcode::RecvMsg::new( types::Fd(recv_from.fd.raw_fd()), recv_from.msghdr.as_mut() as *mut _, ) .build() }, ) }) } } impl Completable for RecvFrom where T: BoundedBufMut, { type Output = BufResult<(usize, SocketAddr), T>; fn complete(self, cqe: CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); // Recover the buffer let mut buf = self.buf; let socket_addr = (*self.socket_addr).as_socket(); let res = res.map(|n| { let socket_addr: SocketAddr = socket_addr.unwrap(); // Safety: the kernel wrote `n` bytes to the buffer. unsafe { buf.set_init(n); } (n, socket_addr) }); (res, buf) } } tokio-uring-0.5.0/src/io/recvmsg.rs000064400000000000000000000057331046102023000153170ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use crate::{buf::BoundedBufMut, io::SharedFd, BufResult}; use socket2::SockAddr; use std::{ io::IoSliceMut, {boxed::Box, io, net::SocketAddr}, }; pub(crate) struct RecvMsg { #[allow(dead_code)] fd: SharedFd, pub(crate) buf: Vec, #[allow(dead_code)] io_slices: Vec>, pub(crate) socket_addr: Box, pub(crate) msghdr: Box, } impl Op> { pub(crate) fn recvmsg(fd: &SharedFd, mut bufs: Vec) -> io::Result>> { use io_uring::{opcode, types}; let mut io_slices = Vec::with_capacity(bufs.len()); for buf in &mut bufs { io_slices.push(IoSliceMut::new(unsafe { std::slice::from_raw_parts_mut(buf.stable_mut_ptr(), buf.bytes_total()) })); } let socket_addr = Box::new(unsafe { SockAddr::init(|_, _| Ok(()))?.1 }); let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); msghdr.msg_iov = io_slices.as_mut_ptr().cast(); msghdr.msg_iovlen = io_slices.len() as _; msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; msghdr.msg_namelen = socket_addr.len(); CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( RecvMsg { fd: fd.clone(), buf: bufs, io_slices, socket_addr, msghdr, }, |recv_from| { opcode::RecvMsg::new( types::Fd(recv_from.fd.raw_fd()), recv_from.msghdr.as_mut() as *mut _, ) .build() }, ) }) } } impl Completable for RecvMsg where T: BoundedBufMut, { type Output = BufResult<(usize, SocketAddr), Vec>; fn complete(self, cqe: CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); // Recover the buffers let mut bufs = self.buf; let socket_addr = (*self.socket_addr).as_socket(); let res = res.map(|n| { let socket_addr: SocketAddr = socket_addr.unwrap(); let mut bytes = n; for buf in &mut bufs { // Safety: the kernel wrote `n` bytes to the buffer. unsafe { buf.set_init(bytes); } let total = buf.bytes_total(); if bytes > total { bytes -= total; } else { // In the current API bytes_init is a watermark, // so remaining don't need zeroing. break; } } (n, socket_addr) }); (res, bufs) } } tokio-uring-0.5.0/src/io/rename_at.rs000064400000000000000000000033461046102023000156020ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use std::ffi::CString; use std::io; use std::path::Path; /// Renames a file, moving it between directories if required. /// /// The given paths are interpreted relative to the current working directory /// of the calling process. pub(crate) struct RenameAt { pub(crate) from: CString, pub(crate) to: CString, } impl Op { /// Submit a request to rename a specified path to a new name with /// the provided flags. pub(crate) fn rename_at(from: &Path, to: &Path, flags: u32) -> io::Result> { use io_uring::{opcode, types}; let from = super::util::cstr(from)?; let to = super::util::cstr(to)?; CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( RenameAt { from, to }, |rename| { // Get a reference to the memory. The string will be held by the // operation state and will not be accessed again until the operation // completes. let from_ref = rename.from.as_c_str().as_ptr(); let to_ref = rename.to.as_c_str().as_ptr(); opcode::RenameAt::new( types::Fd(libc::AT_FDCWD), from_ref, types::Fd(libc::AT_FDCWD), to_ref, ) .flags(flags) .build() }, ) }) } } impl Completable for RenameAt { type Output = io::Result<()>; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result.map(|_| ()) } } tokio-uring-0.5.0/src/io/send_to.rs000064400000000000000000000046561046102023000153070ustar 00000000000000use crate::buf::BoundedBuf; use crate::io::SharedFd; use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use crate::BufResult; use socket2::SockAddr; use std::io::IoSlice; use std::{boxed::Box, io, net::SocketAddr}; pub(crate) struct SendTo { #[allow(dead_code)] fd: SharedFd, pub(crate) buf: T, #[allow(dead_code)] io_slices: Vec>, #[allow(dead_code)] socket_addr: Option>, pub(crate) msghdr: Box, } impl Op> { pub(crate) fn send_to( fd: &SharedFd, buf: T, socket_addr: Option, ) -> io::Result>> { use io_uring::{opcode, types}; let io_slices = vec![IoSlice::new(unsafe { std::slice::from_raw_parts(buf.stable_ptr(), buf.bytes_init()) })]; let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); msghdr.msg_iov = io_slices.as_ptr() as *mut _; msghdr.msg_iovlen = io_slices.len() as _; let socket_addr = match socket_addr { Some(_socket_addr) => { let socket_addr = Box::new(SockAddr::from(_socket_addr)); msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; msghdr.msg_namelen = socket_addr.len(); Some(socket_addr) } None => { msghdr.msg_name = std::ptr::null_mut(); msghdr.msg_namelen = 0; None } }; CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( SendTo { fd: fd.clone(), buf, io_slices, socket_addr, msghdr, }, |send_to| { opcode::SendMsg::new( types::Fd(send_to.fd.raw_fd()), send_to.msghdr.as_ref() as *const _, ) .build() }, ) }) } } impl Completable for SendTo { type Output = BufResult; fn complete(self, cqe: CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); // Recover the buffer let buf = self.buf; (res, buf) } } tokio-uring-0.5.0/src/io/send_zc.rs000064400000000000000000000033101046102023000152630ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, MultiCQEFuture, Op, Updateable}; use crate::runtime::CONTEXT; use crate::{buf::BoundedBuf, io::SharedFd, BufResult}; use std::io; pub(crate) struct SendZc { /// Holds a strong ref to the FD, preventing the file from being closed /// while the operation is in-flight. #[allow(dead_code)] fd: SharedFd, pub(crate) buf: T, /// Hold the number of transmitted bytes bytes: usize, } impl Op, MultiCQEFuture> { pub(crate) fn send_zc(fd: &SharedFd, buf: T) -> io::Result { use io_uring::{opcode, types}; CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( SendZc { fd: fd.clone(), buf, bytes: 0, }, |send| { // Get raw buffer info let ptr = send.buf.stable_ptr(); let len = send.buf.bytes_init(); opcode::SendZc::new(types::Fd(fd.raw_fd()), ptr, len as _).build() }, ) }) } } impl Completable for SendZc { type Output = BufResult; fn complete(self, cqe: CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| self.bytes + v as usize); // Recover the buffer let buf = self.buf; (res, buf) } } impl Updateable for SendZc { fn update(&mut self, cqe: CqeResult) { // uring send_zc promises there will be no error on CQE's marked more self.bytes += *cqe.result.as_ref().unwrap() as usize; } } tokio-uring-0.5.0/src/io/sendmsg.rs000064400000000000000000000062231046102023000153040ustar 00000000000000use crate::buf::BoundedBuf; use crate::io::SharedFd; use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use socket2::SockAddr; use std::io; use std::io::IoSlice; use std::net::SocketAddr; pub(crate) struct SendMsg { _fd: SharedFd, _io_bufs: Vec, _io_slices: Vec>, _socket_addr: Option>, msg_control: Option, msghdr: Box, } impl Op> { pub(crate) fn sendmsg( fd: &SharedFd, io_bufs: Vec, socket_addr: Option, msg_control: Option, ) -> io::Result { use io_uring::{opcode, types}; let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); let mut io_slices: Vec> = Vec::with_capacity(io_bufs.len()); for io_buf in &io_bufs { io_slices.push(IoSlice::new(unsafe { std::slice::from_raw_parts(io_buf.stable_ptr(), io_buf.bytes_init()) })) } msghdr.msg_iov = io_slices.as_ptr() as *mut _; msghdr.msg_iovlen = io_slices.len() as _; let socket_addr = match socket_addr { Some(_socket_addr) => { let socket_addr = Box::new(SockAddr::from(_socket_addr)); msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; msghdr.msg_namelen = socket_addr.len(); Some(socket_addr) } None => { msghdr.msg_name = std::ptr::null_mut(); msghdr.msg_namelen = 0; None } }; match msg_control { Some(ref _msg_control) => { msghdr.msg_control = _msg_control.stable_ptr() as *mut _; msghdr.msg_controllen = _msg_control.bytes_init(); } None => { msghdr.msg_control = std::ptr::null_mut(); msghdr.msg_controllen = 0_usize; } } CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( SendMsg { _fd: fd.clone(), _io_bufs: io_bufs, _socket_addr: socket_addr, _io_slices: io_slices, msg_control, msghdr, }, |sendmsg| { opcode::SendMsg::new( types::Fd(sendmsg._fd.raw_fd()), &*sendmsg.msghdr as *const _, ) .build() }, ) }) } } impl Completable for SendMsg { type Output = (io::Result, Vec, Option); fn complete(self, cqe: CqeResult) -> (io::Result, Vec, Option) { // Convert the operation result to `usize` let res = cqe.result.map(|n| n as usize); // Recover the data buffers. let io_bufs = self._io_bufs; // Recover the ancillary data buffer. let msg_control = self.msg_control; (res, io_bufs, msg_control) } } tokio-uring-0.5.0/src/io/sendmsg_zc.rs000064400000000000000000000072051046102023000160010ustar 00000000000000use crate::buf::BoundedBuf; use crate::io::SharedFd; use crate::runtime::driver::op::{Completable, CqeResult, MultiCQEFuture, Op, Updateable}; use crate::runtime::CONTEXT; use socket2::SockAddr; use std::io; use std::io::IoSlice; use std::net::SocketAddr; pub(crate) struct SendMsgZc { #[allow(dead_code)] fd: SharedFd, #[allow(dead_code)] io_bufs: Vec, #[allow(dead_code)] io_slices: Vec>, #[allow(dead_code)] socket_addr: Option>, msg_control: Option, msghdr: Box, /// Hold the number of transmitted bytes bytes: usize, } impl Op, MultiCQEFuture> { pub(crate) fn sendmsg_zc( fd: &SharedFd, io_bufs: Vec, socket_addr: Option, msg_control: Option, ) -> io::Result { use io_uring::{opcode, types}; let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); let mut io_slices: Vec> = Vec::with_capacity(io_bufs.len()); for io_buf in &io_bufs { io_slices.push(IoSlice::new(unsafe { std::slice::from_raw_parts(io_buf.stable_ptr(), io_buf.bytes_init()) })) } msghdr.msg_iov = io_slices.as_ptr() as *mut _; msghdr.msg_iovlen = io_slices.len() as _; let socket_addr = match socket_addr { Some(_socket_addr) => { let socket_addr = Box::new(SockAddr::from(_socket_addr)); msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; msghdr.msg_namelen = socket_addr.len(); Some(socket_addr) } None => { msghdr.msg_name = std::ptr::null_mut(); msghdr.msg_namelen = 0; None } }; match msg_control { Some(ref _msg_control) => { msghdr.msg_control = _msg_control.stable_ptr() as *mut _; msghdr.msg_controllen = _msg_control.bytes_init(); } None => { msghdr.msg_control = std::ptr::null_mut(); msghdr.msg_controllen = 0_usize; } } CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( SendMsgZc { fd: fd.clone(), io_bufs, socket_addr, io_slices, msg_control, msghdr, bytes: 0, }, |sendmsg_zc| { opcode::SendMsgZc::new( types::Fd(sendmsg_zc.fd.raw_fd()), sendmsg_zc.msghdr.as_mut() as *const _, ) .build() }, ) }) } } impl Completable for SendMsgZc { type Output = (io::Result, Vec, Option); fn complete(self, cqe: CqeResult) -> (io::Result, Vec, Option) { // Convert the operation result to `usize`, and add previous byte count let res = cqe.result.map(|v| self.bytes + v as usize); // Recover the data buffers. let io_bufs = self.io_bufs; // Recover the ancillary data buffer. let msg_control = self.msg_control; (res, io_bufs, msg_control) } } impl Updateable for SendMsgZc { fn update(&mut self, cqe: CqeResult) { // uring send_zc promises there will be no error on CQE's marked more self.bytes += *cqe.result.as_ref().unwrap() as usize; } } tokio-uring-0.5.0/src/io/shared_fd.rs000064400000000000000000000112311046102023000155560ustar 00000000000000use std::future::poll_fn; use std::{ cell::RefCell, io, os::unix::io::{FromRawFd, RawFd}, rc::Rc, task::Waker, }; use crate::runtime::driver::op::Op; // Tracks in-flight operations on a file descriptor. Ensures all in-flight // operations complete before submitting the close. // // When closing the file descriptor because it is going out of scope, a synchronous close is // employed. // // The closed state is tracked so close calls after the first are ignored. // Only the first close call returns the true result of closing the file descriptor. #[derive(Clone)] pub(crate) struct SharedFd { inner: Rc, } struct Inner { // Open file descriptor fd: RawFd, // Track the sharing state of the file descriptor: // normal, being waited on to allow a close by the parent's owner, or already closed. state: RefCell, } enum State { /// Initial state Init, /// Waiting for the number of strong Rc pointers to drop to 1. WaitingForUniqueness(Waker), /// The close has been triggered by the parent owner. Closed, } impl SharedFd { pub(crate) fn new(fd: RawFd) -> SharedFd { SharedFd { inner: Rc::new(Inner { fd, state: RefCell::new(State::Init), }), } } /// Returns the RawFd pub(crate) fn raw_fd(&self) -> RawFd { self.inner.fd } /// An FD cannot be closed until all in-flight operation have completed. /// This prevents bugs where in-flight reads could operate on the incorrect /// file descriptor. /// pub(crate) async fn close(&mut self) -> io::Result<()> { loop { // Get a mutable reference to Inner, indicating there are no // in-flight operations on the FD. if let Some(inner) = Rc::get_mut(&mut self.inner) { // Wait for the close operation. return inner.async_close_op().await; } self.sharedfd_is_unique().await; } } /// Completes when the SharedFd's Inner Rc strong count is 1. /// Gets polled any time a SharedFd is dropped. async fn sharedfd_is_unique(&self) { use std::task::Poll; poll_fn(|cx| { if Rc::::strong_count(&self.inner) == 1 { return Poll::Ready(()); } let mut state = self.inner.state.borrow_mut(); match &mut *state { State::Init => { *state = State::WaitingForUniqueness(cx.waker().clone()); Poll::Pending } State::WaitingForUniqueness(waker) => { if !waker.will_wake(cx.waker()) { waker.clone_from(cx.waker()); } Poll::Pending } State::Closed => Poll::Ready(()), } }) .await; } } impl Inner { async fn async_close_op(&mut self) -> io::Result<()> { // &mut self implies there are no outstanding operations. // If state already closed, the user closed multiple times; simply return Ok. // Otherwise, set state to closed and then submit and await the uring close operation. { // Release state guard before await. let state = RefCell::get_mut(&mut self.state); if let State::Closed = *state { return Ok(()); } *state = State::Closed; } Op::close(self.fd)?.await } } impl Drop for SharedFd { fn drop(&mut self) { // If the SharedFd state is Waiting // The job of the SharedFd's drop is to possibly wake a task that is waiting for the // reference count to go down. use std::mem; let mut state = self.inner.state.borrow_mut(); if let State::WaitingForUniqueness(_) = *state { let state = &mut *state; if let State::WaitingForUniqueness(waker) = mem::replace(state, State::Init) { // Wake the task wanting to close this SharedFd and let it try again. If it finds // there are no more outstanding clones, it will succeed. Otherwise it will start a new // Future, waiting for another SharedFd to be dropped. waker.wake() } } } } impl Drop for Inner { fn drop(&mut self) { // If the inner state isn't `Closed`, the user hasn't called close().await // so do it synchronously. let state = self.state.borrow_mut(); if let State::Closed = *state { return; } let _ = unsafe { std::fs::File::from_raw_fd(self.fd) }; } } tokio-uring-0.5.0/src/io/socket.rs000064400000000000000000000224701046102023000151360ustar 00000000000000use crate::io::write::UnsubmittedWrite; use crate::runtime::driver::op::Op; use crate::{ buf::fixed::FixedBuf, buf::{BoundedBuf, BoundedBufMut, IoBuf, Slice}, io::SharedFd, UnsubmittedOneshot, }; use std::{ io, net::SocketAddr, os::unix::io::{AsRawFd, IntoRawFd, RawFd}, path::Path, }; #[derive(Clone)] pub(crate) struct Socket { /// Open file descriptor pub(crate) fd: SharedFd, } pub(crate) fn get_domain(socket_addr: SocketAddr) -> libc::c_int { match socket_addr { SocketAddr::V4(_) => libc::AF_INET, SocketAddr::V6(_) => libc::AF_INET6, } } impl Socket { pub(crate) fn new(socket_addr: SocketAddr, socket_type: libc::c_int) -> io::Result { let socket_type = socket_type | libc::SOCK_CLOEXEC; let domain = get_domain(socket_addr); let fd = socket2::Socket::new(domain.into(), socket_type.into(), None)?.into_raw_fd(); let fd = SharedFd::new(fd); Ok(Socket { fd }) } pub(crate) fn new_unix(socket_type: libc::c_int) -> io::Result { let socket_type = socket_type | libc::SOCK_CLOEXEC; let domain = libc::AF_UNIX; let fd = socket2::Socket::new(domain.into(), socket_type.into(), None)?.into_raw_fd(); let fd = SharedFd::new(fd); Ok(Socket { fd }) } pub(crate) fn write(&self, buf: T) -> UnsubmittedWrite { UnsubmittedOneshot::write_at(&self.fd, buf, 0) } pub async fn write_all(&self, buf: T) -> crate::BufResult<(), T> { let orig_bounds = buf.bounds(); let (res, buf) = self.write_all_slice(buf.slice_full()).await; (res, T::from_buf_bounds(buf, orig_bounds)) } async fn write_all_slice(&self, mut buf: Slice) -> crate::BufResult<(), T> { while buf.bytes_init() != 0 { let res = self.write(buf).submit().await; match res { (Ok(0), slice) => { return ( Err(std::io::Error::new( std::io::ErrorKind::WriteZero, "failed to write whole buffer", )), slice.into_inner(), ) } (Ok(n), slice) => { buf = slice.slice(n..); } // No match on an EINTR error is performed because this // crate's design ensures we are not calling the 'wait' option // in the ENTER syscall. Only an Enter with 'wait' can generate // an EINTR according to the io_uring man pages. (Err(e), slice) => return (Err(e), slice.into_inner()), } } (Ok(()), buf.into_inner()) } pub(crate) async fn write_fixed(&self, buf: T) -> crate::BufResult where T: BoundedBuf, { let op = Op::write_fixed_at(&self.fd, buf, 0).unwrap(); op.await } pub(crate) async fn write_fixed_all(&self, buf: T) -> crate::BufResult<(), T> where T: BoundedBuf, { let orig_bounds = buf.bounds(); let (res, buf) = self.write_fixed_all_slice(buf.slice_full()).await; (res, T::from_buf_bounds(buf, orig_bounds)) } async fn write_fixed_all_slice( &self, mut buf: Slice, ) -> crate::BufResult<(), FixedBuf> { while buf.bytes_init() != 0 { let res = self.write_fixed(buf).await; match res { (Ok(0), slice) => { return ( Err(std::io::Error::new( std::io::ErrorKind::WriteZero, "failed to write whole buffer", )), slice.into_inner(), ) } (Ok(n), slice) => { buf = slice.slice(n..); } // No match on an EINTR error is performed because this // crate's design ensures we are not calling the 'wait' option // in the ENTER syscall. Only an Enter with 'wait' can generate // an EINTR according to the io_uring man pages. (Err(e), slice) => return (Err(e), slice.into_inner()), } } (Ok(()), buf.into_inner()) } pub async fn writev(&self, buf: Vec) -> crate::BufResult> { let op = Op::writev_at(&self.fd, buf, 0).unwrap(); op.await } pub(crate) async fn send_to( &self, buf: T, socket_addr: Option, ) -> crate::BufResult { let op = Op::send_to(&self.fd, buf, socket_addr).unwrap(); op.await } pub(crate) async fn send_zc(&self, buf: T) -> crate::BufResult { let op = Op::send_zc(&self.fd, buf).unwrap(); op.await } pub(crate) async fn sendmsg( &self, io_slices: Vec, socket_addr: Option, msg_control: Option, ) -> (io::Result, Vec, Option) { let op = Op::sendmsg(&self.fd, io_slices, socket_addr, msg_control).unwrap(); op.await } pub(crate) async fn sendmsg_zc( &self, io_slices: Vec, socket_addr: Option, msg_control: Option, ) -> (io::Result, Vec, Option) { let op = Op::sendmsg_zc(&self.fd, io_slices, socket_addr, msg_control).unwrap(); op.await } pub(crate) async fn read(&self, buf: T) -> crate::BufResult { let op = Op::read_at(&self.fd, buf, 0).unwrap(); op.await } pub(crate) async fn read_fixed(&self, buf: T) -> crate::BufResult where T: BoundedBufMut, { let op = Op::read_fixed_at(&self.fd, buf, 0).unwrap(); op.await } pub(crate) async fn recv_from( &self, buf: T, ) -> crate::BufResult<(usize, SocketAddr), T> { let op = Op::recv_from(&self.fd, buf).unwrap(); op.await } pub(crate) async fn recvmsg( &self, buf: Vec, ) -> crate::BufResult<(usize, SocketAddr), Vec> { let op = Op::recvmsg(&self.fd, buf).unwrap(); op.await } pub(crate) async fn accept(&self) -> io::Result<(Socket, Option)> { let op = Op::accept(&self.fd)?; op.await } pub(crate) async fn connect(&self, socket_addr: socket2::SockAddr) -> io::Result<()> { let op = Op::connect(&self.fd, socket_addr)?; op.await } pub(crate) fn bind(socket_addr: SocketAddr, socket_type: libc::c_int) -> io::Result { Self::bind_internal( socket_addr.into(), get_domain(socket_addr).into(), socket_type.into(), ) } pub(crate) fn bind_unix>( path: P, socket_type: libc::c_int, ) -> io::Result { let addr = socket2::SockAddr::unix(path.as_ref())?; Self::bind_internal(addr, libc::AF_UNIX.into(), socket_type.into()) } pub(crate) fn from_std(socket: T) -> Socket { let fd = SharedFd::new(socket.into_raw_fd()); Self::from_shared_fd(fd) } pub(crate) fn from_shared_fd(fd: SharedFd) -> Socket { Self { fd } } fn bind_internal( socket_addr: socket2::SockAddr, domain: socket2::Domain, socket_type: socket2::Type, ) -> io::Result { let sys_listener = socket2::Socket::new(domain, socket_type, None)?; sys_listener.set_reuse_port(true)?; sys_listener.set_reuse_address(true)?; // TODO: config for buffer sizes // sys_listener.set_send_buffer_size(send_buf_size)?; // sys_listener.set_recv_buffer_size(recv_buf_size)?; sys_listener.bind(&socket_addr)?; let fd = SharedFd::new(sys_listener.into_raw_fd()); Ok(Self { fd }) } pub(crate) fn listen(&self, backlog: libc::c_int) -> io::Result<()> { syscall!(listen(self.as_raw_fd(), backlog))?; Ok(()) } /// Shuts down the read, write, or both halves of this connection. /// /// This function will cause all pending and future I/O on the specified portions to return /// immediately with an appropriate value. pub fn shutdown(&self, how: std::net::Shutdown) -> io::Result<()> { let socket_ref = socket2::SockRef::from(self); socket_ref.shutdown(how) } /// Set the value of the `TCP_NODELAY` option on this socket. /// /// If set, this option disables the Nagle algorithm. This means that /// segments are always sent as soon as possible, even if there is only a /// small amount of data. When not set, data is buffered until there is a /// sufficient amount to send out, thereby avoiding the frequent sending of /// small packets. pub fn set_nodelay(&self, nodelay: bool) -> io::Result<()> { let socket_ref = socket2::SockRef::from(self); socket_ref.set_nodelay(nodelay) } } impl AsRawFd for Socket { fn as_raw_fd(&self) -> RawFd { self.fd.raw_fd() } } tokio-uring-0.5.0/src/io/statx.rs000064400000000000000000000043731046102023000150130ustar 00000000000000use std::ffi::CString; use std::{ffi::CStr, io}; use io_uring::{opcode, types}; use crate::runtime::{ driver::op::{Completable, CqeResult, Op}, CONTEXT, }; use super::SharedFd; pub(crate) struct Statx { #[allow(dead_code)] fd: Option, #[allow(dead_code)] path: CString, // TODO consider returning this type when the operation is complete so the caller has the boxed value. // The builder could even recycle an old boxed value and pass it in here. statx: Box, } impl Op { // If we are passed a reference to a shared fd, clone it so we keep it live during the // Future. If we aren't, use the libc::AT_FDCWD value. // If Path is None, the flags is combined with libc::AT_EMPTY_PATH automatically. pub(crate) fn statx( fd: Option, path: Option, flags: i32, mask: u32, ) -> io::Result> { let raw = fd.as_ref().map_or(libc::AT_FDCWD, |fd| fd.raw_fd()); let mut flags = flags; let path = match path { Some(path) => path, None => { // If there is no path, add appropriate bit to flags. flags |= libc::AT_EMPTY_PATH; CStr::from_bytes_with_nul(b"\0").unwrap().into() // TODO Is there a constant CString we // could use here. } }; CONTEXT.with(|x| { x.handle().expect("not in a runtime context").submit_op( Statx { fd, path, statx: Box::new(unsafe { std::mem::zeroed() }), }, |statx| { opcode::Statx::new( types::Fd(raw), statx.path.as_ptr(), &mut *statx.statx as *mut libc::statx as *mut types::statx, ) .flags(flags) .mask(mask) .build() }, ) }) } } impl Completable for Statx { type Output = io::Result; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result?; Ok(*self.statx) } } tokio-uring-0.5.0/src/io/unlink_at.rs000064400000000000000000000032311046102023000156240ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use std::ffi::CString; use std::io; use std::path::Path; /// Unlink a path relative to the current working directory of the caller's process. pub(crate) struct Unlink { pub(crate) path: CString, } impl Op { /// Submit a request to unlink a directory with provided flags. pub(crate) fn unlink_dir(path: &Path) -> io::Result> { Self::unlink(path, libc::AT_REMOVEDIR) } /// Submit a request to unlink a file with provided flags. pub(crate) fn unlink_file(path: &Path) -> io::Result> { Self::unlink(path, 0) } /// Submit a request to unlink a specified path with provided flags. pub(crate) fn unlink(path: &Path, flags: i32) -> io::Result> { use io_uring::{opcode, types}; let path = super::util::cstr(path)?; CONTEXT.with(|x| { x.handle() .expect("Not in a runtime context") .submit_op(Unlink { path }, |unlink| { // Get a reference to the memory. The string will be held by the // operation state and will not be accessed again until the operation // completes. let p_ref = unlink.path.as_c_str().as_ptr(); opcode::UnlinkAt::new(types::Fd(libc::AT_FDCWD), p_ref) .flags(flags) .build() }) }) } } impl Completable for Unlink { type Output = io::Result<()>; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result.map(|_| ()) } } tokio-uring-0.5.0/src/io/util.rs000064400000000000000000000003101046102023000146100ustar 00000000000000use std::ffi::CString; use std::io; use std::path::Path; pub(crate) fn cstr(p: &Path) -> io::Result { use std::os::unix::ffi::OsStrExt; Ok(CString::new(p.as_os_str().as_bytes())?) } tokio-uring-0.5.0/src/io/write.rs000064400000000000000000000031141046102023000147720ustar 00000000000000use crate::{buf::BoundedBuf, io::SharedFd, BufResult, OneshotOutputTransform, UnsubmittedOneshot}; use io_uring::cqueue::Entry; use std::io; use std::marker::PhantomData; /// An unsubmitted write operation. pub type UnsubmittedWrite = UnsubmittedOneshot, WriteTransform>; #[allow(missing_docs)] pub struct WriteData { /// Holds a strong ref to the FD, preventing the file from being closed /// while the operation is in-flight. _fd: SharedFd, buf: T, } #[allow(missing_docs)] pub struct WriteTransform { _phantom: PhantomData, } impl OneshotOutputTransform for WriteTransform { type Output = BufResult; type StoredData = WriteData; fn transform_oneshot_output(self, data: Self::StoredData, cqe: Entry) -> Self::Output { let res = if cqe.result() >= 0 { Ok(cqe.result() as usize) } else { Err(io::Error::from_raw_os_error(-cqe.result())) }; (res, data.buf) } } impl UnsubmittedWrite { pub(crate) fn write_at(fd: &SharedFd, buf: T, offset: u64) -> Self { use io_uring::{opcode, types}; // Get raw buffer info let ptr = buf.stable_ptr(); let len = buf.bytes_init(); Self::new( WriteData { _fd: fd.clone(), buf, }, WriteTransform { _phantom: PhantomData, }, opcode::Write::new(types::Fd(fd.raw_fd()), ptr, len as _) .offset(offset as _) .build(), ) } } tokio-uring-0.5.0/src/io/write_fixed.rs000064400000000000000000000032321046102023000161520ustar 00000000000000use crate::buf::fixed::FixedBuf; use crate::buf::BoundedBuf; use crate::io::SharedFd; use crate::runtime::driver::op::{self, Completable, Op}; use crate::BufResult; use crate::runtime::CONTEXT; use std::io; pub(crate) struct WriteFixed { /// Holds a strong ref to the FD, preventing the file from being closed /// while the operation is in-flight. #[allow(dead_code)] fd: SharedFd, buf: T, } impl Op> where T: BoundedBuf, { pub(crate) fn write_fixed_at( fd: &SharedFd, buf: T, offset: u64, ) -> io::Result>> { use io_uring::{opcode, types}; CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( WriteFixed { fd: fd.clone(), buf, }, |write_fixed| { // Get raw buffer info let ptr = write_fixed.buf.stable_ptr(); let len = write_fixed.buf.bytes_init(); let buf_index = write_fixed.buf.get_buf().buf_index(); opcode::WriteFixed::new(types::Fd(fd.raw_fd()), ptr, len as _, buf_index) .offset(offset as _) .build() }, ) }) } } impl Completable for WriteFixed { type Output = BufResult; fn complete(self, cqe: op::CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); // Recover the buffer let buf = self.buf; (res, buf) } } tokio-uring-0.5.0/src/io/writev.rs000064400000000000000000000036451046102023000151710ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use crate::{buf::BoundedBuf, io::SharedFd, BufResult}; use libc::iovec; use std::io; pub(crate) struct Writev { /// Holds a strong ref to the FD, preventing the file from being closed /// while the operation is in-flight. #[allow(dead_code)] fd: SharedFd, pub(crate) bufs: Vec, /// Parameter for `io_uring::op::readv`, referring `bufs`. iovs: Vec, } impl Op> { pub(crate) fn writev_at( fd: &SharedFd, mut bufs: Vec, offset: u64, ) -> io::Result>> { use io_uring::{opcode, types}; // Build `iovec` objects referring the provided `bufs` for `io_uring::opcode::Readv`. let iovs: Vec = bufs .iter_mut() .map(|b| iovec { iov_base: b.stable_ptr() as *mut libc::c_void, iov_len: b.bytes_init(), }) .collect(); CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( Writev { fd: fd.clone(), bufs, iovs, }, |write| { opcode::Writev::new( types::Fd(fd.raw_fd()), write.iovs.as_ptr(), write.iovs.len() as u32, ) .offset(offset as _) .build() }, ) }) } } impl Completable for Writev where T: BoundedBuf, { type Output = BufResult>; fn complete(self, cqe: CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); // Recover the buffer let buf = self.bufs; (res, buf) } } tokio-uring-0.5.0/src/io/writev_all.rs000064400000000000000000000124111046102023000160100ustar 00000000000000use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use crate::{buf::BoundedBuf, io::SharedFd}; use libc::iovec; use std::io; // This provides a common write-all implementation for writev and is fairly efficient by allocating // the Vec just once, and computing the individual iovec entries just once, at the cost of // some unsafe calls to advance the iovec array pointer and the iovec_base pointer from time to // time when a further call to `writev` is necessary. // // The fd, bufs, and iovecs resources are passed to and from the operation's future to ensure they // stay live while the operation is active, even if the future returned by this call is cancelled. // The SharedFd is only cloned once but at the cost of also passing it back and forth within this // module. pub(crate) async fn writev_at_all( fd: &SharedFd, mut bufs: Vec, offset: Option, ) -> crate::BufResult> { // TODO decide if the function should return immediately if all the buffer lengths // were to sum to zero. That would save an allocation and one call into writev. // The fd is cloned once. let mut fd = fd.clone(); // iovs is allocated once. let mut iovs: Vec = bufs .iter_mut() .map(|b| iovec { iov_base: b.stable_ptr() as *mut libc::c_void, iov_len: b.bytes_init(), }) .collect(); let mut iovs_ptr = iovs.as_ptr(); let mut iovs_len: u32 = iovs.len() as _; let mut total: usize = 0; // Loop until all the bytes have been written or an error has been returned by the io_uring // device. loop { // If caller provided some offset, pass an updated offset to writev // else keep passing zero. let o = match offset { Some(m) => m + (total as u64), None => 0, }; // Call the Op that is internal to this module. let op = Op::writev_at_all2(fd, bufs, iovs, iovs_ptr, iovs_len, o).unwrap(); let res; (res, fd, bufs, iovs) = op.await; let mut n: usize = match res { Ok(m) => m, // On error, there is no indication how many bytes were written. This is standard. // The device doesn't tell us that either. Err(e) => return (Err(e), bufs), }; // TODO if n is zero, while there was more data to be written, should this be interpreted // as the file is closed so an error should be returned? Otherwise we reach the // unreachable! panic below. // // if n == 0 { return Err(..); } total += n; // Consume n and iovs_len until one or the other is exhausted. while n != 0 && iovs_len > 0 { // safety: iovs_len > 0, so safe to dereference the const *. let mut iovec = unsafe { *iovs_ptr }; let iov_len = iovec.iov_len; if n >= iov_len { n -= iov_len; // safety: iovs_len > 0, so safe to add 1 as iovs_len is decremented by 1. iovs_ptr = unsafe { iovs_ptr.add(1) }; iovs_len -= 1; } else { // safety: n was found to be less than iov_len, so adding to base and keeping // iov_len updated by decrementing maintains the invariant of the iovec // representing how much of the buffer remains to be written to. iovec.iov_base = unsafe { (iovec.iov_base as *const u8).add(n) } as _; iovec.iov_len -= n; n = 0; } } // Assert that both n and iovs_len become exhausted simultaneously. if (iovs_len == 0 && n != 0) || (iovs_len > 0 && n == 0) { unreachable!(); } // We are done when n and iovs_len have been consumed. if n == 0 { break; } } (Ok(total), bufs) } struct WritevAll { /// Holds a strong ref to the FD, preventing the file from being closed /// while the operation is in-flight. fd: SharedFd, bufs: Vec, iovs: Vec, } impl Op> { fn writev_at_all2( // Three values to share to keep live. fd: SharedFd, bufs: Vec, iovs: Vec, // Three values to use for this invocation. iovs_ptr: *const iovec, iovs_len: u32, offset: u64, ) -> io::Result>> { use io_uring::{opcode, types}; CONTEXT.with(|x| { x.handle().expect("Not in a runtime context").submit_op( WritevAll { fd, bufs, iovs }, // So this wouldn't need to be a function. Just pass in the entry. |write| { opcode::Writev::new(types::Fd(write.fd.raw_fd()), iovs_ptr, iovs_len) .offset(offset as _) .build() }, ) }) } } impl Completable for WritevAll where T: BoundedBuf, { type Output = (Result, SharedFd, Vec, Vec); fn complete(self, cqe: CqeResult) -> Self::Output { // Convert the operation result to `usize` let res = cqe.result.map(|v| v as usize); (res, self.fd, self.bufs, self.iovs) } } tokio-uring-0.5.0/src/lib.rs000064400000000000000000000220411046102023000137770ustar 00000000000000//! Tokio-uring provides a safe [io-uring] interface for the Tokio runtime. The //! library requires Linux kernel 5.10 or later. //! //! [io-uring]: https://kernel.dk/io_uring.pdf //! //! # Getting started //! //! Using `tokio-uring` requires starting a [`tokio-uring`] runtime. This //! runtime internally manages the main Tokio runtime and a `io-uring` driver. //! //! ```no_run //! use tokio_uring::fs::File; //! //! fn main() -> Result<(), Box> { //! tokio_uring::start(async { //! // Open a file //! let file = File::open("hello.txt").await?; //! //! let buf = vec![0; 4096]; //! // Read some data, the buffer is passed by ownership and //! // submitted to the kernel. When the operation completes, //! // we get the buffer back. //! let (res, buf) = file.read_at(buf, 0).await; //! let n = res?; //! //! // Display the contents //! println!("{:?}", &buf[..n]); //! //! Ok(()) //! }) //! } //! ``` //! //! Under the hood, `tokio_uring::start` starts a [`current-thread`] Runtime. //! For concurrency, spawn multiple threads, each with a `tokio-uring` runtime. //! The `tokio-uring` resource types are optimized for single-threaded usage and //! most are `!Sync`. //! //! # Submit-based operations //! //! Unlike Tokio proper, `io-uring` is based on submission based operations. //! Ownership of resources are passed to the kernel, which then performs the //! operation. When the operation completes, ownership is passed back to the //! caller. Because of this difference, the `tokio-uring` APIs diverge. //! //! For example, in the above example, reading from a `File` requires passing //! ownership of the buffer. //! //! # Closing resources //! //! With `io-uring`, closing a resource (e.g. a file) is an asynchronous //! operation. Because Rust does not support asynchronous drop yet, resource //! types provide an explicit `close()` function. If the `close()` function is //! not called, the resource will still be closed on drop, but the operation //! will happen in the background. There is no guarantee as to **when** the //! implicit close-on-drop operation happens, so it is recommended to explicitly //! call `close()`. #![warn(missing_docs)] #![allow(clippy::thread_local_initializer_can_be_made_const)] macro_rules! syscall { ($fn: ident ( $($arg: expr),* $(,)* ) ) => {{ let res = unsafe { libc::$fn($($arg, )*) }; if res == -1 { Err(std::io::Error::last_os_error()) } else { Ok(res) } }}; } #[macro_use] mod future; mod io; mod runtime; pub mod buf; pub mod fs; pub mod net; pub use io::write::*; pub use runtime::driver::op::{InFlightOneshot, OneshotOutputTransform, UnsubmittedOneshot}; pub use runtime::spawn; pub use runtime::Runtime; use crate::runtime::driver::op::Op; use std::future::Future; /// Starts an `io_uring` enabled Tokio runtime. /// /// All `tokio-uring` resource types must be used from within the context of a /// runtime. The `start` method initializes the runtime and runs it for the /// duration of `future`. /// /// The `tokio-uring` runtime is compatible with all Tokio, so it is possible to /// run Tokio based libraries (e.g. hyper) from within the tokio-uring runtime. /// A `tokio-uring` runtime consists of a Tokio `current_thread` runtime and an /// `io-uring` driver. All tasks spawned on the `tokio-uring` runtime are /// executed on the current thread. To add concurrency, spawn multiple threads, /// each with a `tokio-uring` runtime. /// /// # Examples /// /// Basic usage /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// // Open a file /// let file = File::open("hello.txt").await?; /// /// let buf = vec![0; 4096]; /// // Read some data, the buffer is passed by ownership and /// // submitted to the kernel. When the operation completes, /// // we get the buffer back. /// let (res, buf) = file.read_at(buf, 0).await; /// let n = res?; /// /// // Display the contents /// println!("{:?}", &buf[..n]); /// /// Ok(()) /// }) /// } /// ``` /// /// Using Tokio types from the `tokio-uring` runtime /// /// /// ```no_run /// use tokio::net::TcpListener; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// let listener = TcpListener::bind("127.0.0.1:8080").await?; /// /// loop { /// let (socket, _) = listener.accept().await?; /// // process socket /// } /// }) /// } /// ``` pub fn start(future: F) -> F::Output { let rt = runtime::Runtime::new(&builder()).unwrap(); rt.block_on(future) } /// Creates and returns an io_uring::Builder that can then be modified /// through its implementation methods. /// /// This function is provided to avoid requiring the user of this crate from /// having to use the io_uring crate as well. Refer to Builder::start example /// for its intended usage. pub fn uring_builder() -> io_uring::Builder { io_uring::IoUring::builder() } /// Builder API that can create and start the `io_uring` runtime with non-default parameters, /// while abstracting away the underlying io_uring crate. // #[derive(Clone, Default)] pub struct Builder { entries: u32, urb: io_uring::Builder, } /// Constructs a [`Builder`] with default settings. /// /// Use this to alter submission and completion queue parameters, and to create the io_uring /// Runtime. /// /// Refer to [`Builder::start`] for an example. pub fn builder() -> Builder { Builder { entries: 256, urb: io_uring::IoUring::builder(), } } impl Builder { /// Sets the number of Submission Queue entries in uring. /// /// The default value is 256. /// The kernel requires the number of submission queue entries to be a power of two, /// and that it be less than the number of completion queue entries. /// This function will adjust the `cq_entries` value to be at least 2 times `sq_entries` pub fn entries(&mut self, sq_entries: u32) -> &mut Self { self.entries = sq_entries; self } /// Replaces the default [`io_uring::Builder`], which controls the settings for the /// inner `io_uring` API. /// /// Refer to the [`io_uring::Builder`] documentation for all the supported methods. pub fn uring_builder(&mut self, b: &io_uring::Builder) -> &mut Self { self.urb = b.clone(); self } /// Starts an `io_uring` enabled Tokio runtime. /// /// # Examples /// /// Creating a uring driver with only 64 submission queue entries but /// many more completion queue entries. /// /// ```no_run /// use tokio::net::TcpListener; /// /// fn main() -> Result<(), Box> { /// tokio_uring::builder() /// .entries(64) /// .uring_builder(tokio_uring::uring_builder() /// .setup_cqsize(1024) /// ) /// .start(async { /// let listener = TcpListener::bind("127.0.0.1:8080").await?; /// /// loop { /// let (socket, _) = listener.accept().await?; /// // process socket /// } /// } /// ) /// } /// ``` pub fn start(&self, future: F) -> F::Output { let rt = runtime::Runtime::new(self).unwrap(); rt.block_on(future) } } /// A specialized `Result` type for `io-uring` operations with buffers. /// /// This type is used as a return value for asynchronous `io-uring` methods that /// require passing ownership of a buffer to the runtime. When the operation /// completes, the buffer is returned whether or not the operation completed /// successfully. /// /// # Examples /// /// ```no_run /// use tokio_uring::fs::File; /// /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// // Open a file /// let file = File::open("hello.txt").await?; /// /// let buf = vec![0; 4096]; /// // Read some data, the buffer is passed by ownership and /// // submitted to the kernel. When the operation completes, /// // we get the buffer back. /// let (res, buf) = file.read_at(buf, 0).await; /// let n = res?; /// /// // Display the contents /// println!("{:?}", &buf[..n]); /// /// Ok(()) /// }) /// } /// ``` pub type BufResult = (std::io::Result, B); /// The simplest possible operation. Just posts a completion event, nothing else. /// /// This has a place in benchmarking and sanity checking uring. /// /// # Examples /// /// ```no_run /// fn main() -> Result<(), Box> { /// tokio_uring::start(async { /// // Place a NoOp on the ring, and await completion event /// tokio_uring::no_op().await?; /// Ok(()) /// }) /// } /// ``` pub async fn no_op() -> std::io::Result<()> { let op = Op::::no_op().unwrap(); op.await } tokio-uring-0.5.0/src/net/mod.rs000064400000000000000000000011361046102023000146000ustar 00000000000000//! TCP/UDP bindings for `tokio-uring`. //! //! This module contains the TCP/UDP networking types, similar to the standard //! library, which can be used to implement networking protocols. //! //! # Organization //! //! * [`TcpListener`] and [`TcpStream`] provide functionality for communication over TCP //! * [`UdpSocket`] provides functionality for communication over UDP //! //! [`TcpListener`]: TcpListener //! [`TcpStream`]: TcpStream //! [`UdpSocket`]: UdpSocket mod tcp; mod udp; mod unix; pub use tcp::{TcpListener, TcpStream}; pub use udp::UdpSocket; pub use unix::{UnixListener, UnixStream}; tokio-uring-0.5.0/src/net/tcp/listener.rs000064400000000000000000000122651046102023000164410ustar 00000000000000use super::TcpStream; use crate::io::{SharedFd, Socket}; use std::{ io, net::SocketAddr, os::unix::prelude::{AsRawFd, FromRawFd, RawFd}, }; /// A TCP socket server, listening for connections. /// /// You can accept a new connection by using the [`accept`](`TcpListener::accept`) /// method. /// /// # Examples /// /// ``` /// use tokio_uring::net::TcpListener; /// use tokio_uring::net::TcpStream; /// /// let listener = TcpListener::bind("127.0.0.1:2345".parse().unwrap()).unwrap(); /// /// tokio_uring::start(async move { /// let (tx_ch, rx_ch) = tokio::sync::oneshot::channel(); /// /// tokio_uring::spawn(async move { /// let (rx, _) = listener.accept().await.unwrap(); /// if let Err(_) = tx_ch.send(rx) { /// panic!("The receiver dropped"); /// } /// }); /// tokio::task::yield_now().await; // Ensure the listener.accept().await has been kicked off. /// /// let tx = TcpStream::connect("127.0.0.1:2345".parse().unwrap()).await.unwrap(); /// let rx = rx_ch.await.expect("The spawned task expected to send a TcpStream"); /// /// tx.write(b"test" as &'static [u8]).submit().await.0.unwrap(); /// /// let (_, buf) = rx.read(vec![0; 4]).await; /// /// assert_eq!(buf, b"test"); /// }); /// ``` pub struct TcpListener { inner: Socket, } impl TcpListener { /// Creates a new TcpListener, which will be bound to the specified address. /// /// The returned listener is ready for accepting connections. /// /// Binding with a port number of 0 will request that the OS assigns a port /// to this listener. pub fn bind(addr: SocketAddr) -> io::Result { let socket = Socket::bind(addr, libc::SOCK_STREAM)?; socket.listen(1024)?; Ok(TcpListener { inner: socket }) } /// Creates new `TcpListener` from a previously bound `std::net::TcpListener`. /// /// This function is intended to be used to wrap a TCP listener from the /// standard library in the tokio-uring equivalent. The conversion assumes nothing /// about the underlying socket; it is left up to the user to decide what socket /// options are appropriate for their use case. /// /// This can be used in conjunction with socket2's `Socket` interface to /// configure a socket before it's handed off, such as setting options like /// `reuse_address` or binding to multiple addresses. /// /// # Example /// /// ``` /// tokio_uring::start(async { /// let address: std::net::SocketAddr = "[::0]:8443".parse().unwrap(); /// let socket = tokio::net::TcpSocket::new_v6().unwrap(); /// socket.set_reuseaddr(true).unwrap(); /// socket.set_reuseport(true).unwrap(); /// socket.bind(address).unwrap(); /// /// let listener = socket.listen(1024).unwrap(); /// /// let listener = tokio_uring::net::TcpListener::from_std(listener.into_std().unwrap()); /// }) /// ``` pub fn from_std(socket: std::net::TcpListener) -> Self { let inner = Socket::from_std(socket); Self { inner } } pub(crate) fn from_socket(inner: Socket) -> Self { Self { inner } } /// Returns the local address that this listener is bound to. /// /// This can be useful, for example, when binding to port 0 to /// figure out which port was actually bound. /// /// # Examples /// /// ``` /// use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; /// use tokio_uring::net::TcpListener; /// /// let listener = TcpListener::bind("127.0.0.1:8080".parse().unwrap()).unwrap(); /// /// let addr = listener.local_addr().expect("Couldn't get local address"); /// assert_eq!(addr, SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), 8080))); /// ``` pub fn local_addr(&self) -> io::Result { let fd = self.inner.as_raw_fd(); // SAFETY: Our fd is the handle the kernel has given us for a TcpListener. // Create a std::net::TcpListener long enough to call its local_addr method // and then forget it so the socket is not closed here. let l = unsafe { std::net::TcpListener::from_raw_fd(fd) }; let local_addr = l.local_addr(); std::mem::forget(l); local_addr } /// Accepts a new incoming connection from this listener. /// /// This function will yield once a new TCP connection is established. When /// established, the corresponding [`TcpStream`] and the remote peer's /// address will be returned. /// /// [`TcpStream`]: struct@crate::net::TcpStream pub async fn accept(&self) -> io::Result<(TcpStream, SocketAddr)> { let (socket, socket_addr) = self.inner.accept().await?; let stream = TcpStream { inner: socket }; let socket_addr = socket_addr.ok_or_else(|| { io::Error::new(io::ErrorKind::Other, "Could not get socket IP address") })?; Ok((stream, socket_addr)) } } impl FromRawFd for TcpListener { unsafe fn from_raw_fd(fd: RawFd) -> Self { TcpListener::from_socket(Socket::from_shared_fd(SharedFd::new(fd))) } } impl AsRawFd for TcpListener { fn as_raw_fd(&self) -> RawFd { self.inner.as_raw_fd() } } tokio-uring-0.5.0/src/net/tcp/mod.rs000064400000000000000000000001251046102023000153630ustar 00000000000000mod listener; pub use listener::TcpListener; mod stream; pub use stream::TcpStream; tokio-uring-0.5.0/src/net/tcp/stream.rs000064400000000000000000000216571046102023000161140ustar 00000000000000use std::{ io, net::SocketAddr, os::unix::prelude::{AsRawFd, FromRawFd, RawFd}, }; use crate::{ buf::fixed::FixedBuf, buf::{BoundedBuf, BoundedBufMut}, io::{SharedFd, Socket}, UnsubmittedWrite, }; /// A TCP stream between a local and a remote socket. /// /// A TCP stream can either be created by connecting to an endpoint, via the /// [`connect`] method, or by [`accepting`] a connection from a [`listener`]. /// /// # Examples /// /// ```no_run /// use tokio_uring::net::TcpStream; /// use std::net::ToSocketAddrs; /// /// fn main() -> std::io::Result<()> { /// tokio_uring::start(async { /// // Connect to a peer /// let mut stream = TcpStream::connect("127.0.0.1:8080".parse().unwrap()).await?; /// /// // Write some data. /// let (result, _) = stream.write(b"hello world!".as_slice()).submit().await; /// result.unwrap(); /// /// Ok(()) /// }) /// } /// ``` /// /// [`connect`]: TcpStream::connect /// [`accepting`]: crate::net::TcpListener::accept /// [`listener`]: crate::net::TcpListener pub struct TcpStream { pub(super) inner: Socket, } impl TcpStream { /// Opens a TCP connection to a remote host at the given `SocketAddr` pub async fn connect(addr: SocketAddr) -> io::Result { let socket = Socket::new(addr, libc::SOCK_STREAM)?; socket.connect(socket2::SockAddr::from(addr)).await?; let tcp_stream = TcpStream { inner: socket }; Ok(tcp_stream) } /// Creates new `TcpStream` from a previously bound `std::net::TcpStream`. /// /// This function is intended to be used to wrap a TCP stream from the /// standard library in the tokio-uring equivalent. The conversion assumes nothing /// about the underlying socket; it is left up to the user to decide what socket /// options are appropriate for their use case. /// /// This can be used in conjunction with socket2's `Socket` interface to /// configure a socket before it's handed off, such as setting options like /// `reuse_address` or binding to multiple addresses. pub fn from_std(socket: std::net::TcpStream) -> Self { let inner = Socket::from_std(socket); Self { inner } } pub(crate) fn from_socket(inner: Socket) -> Self { Self { inner } } /// Read some data from the stream into the buffer. /// /// Returns the original buffer and quantity of data read. pub async fn read(&self, buf: T) -> crate::BufResult { self.inner.read(buf).await } /// Read some data from the stream into a registered buffer. /// /// Like [`read`], but using a pre-mapped buffer /// registered with [`FixedBufRegistry`]. /// /// [`read`]: Self::read /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry /// /// # Errors /// /// In addition to errors that can be reported by `read`, /// this operation fails if the buffer is not registered in the /// current `tokio-uring` runtime. pub async fn read_fixed(&self, buf: T) -> crate::BufResult where T: BoundedBufMut, { self.inner.read_fixed(buf).await } /// Write some data to the stream from the buffer. /// /// Returns the original buffer and quantity of data written. pub fn write(&self, buf: T) -> UnsubmittedWrite { self.inner.write(buf) } /// Attempts to write an entire buffer to the stream. /// /// This method will continuously call [`write`] until there is no more data to be /// written or an error is returned. This method will not return until the entire /// buffer has been successfully written or an error has occurred. /// /// If the buffer contains no data, this will never call [`write`]. /// /// # Errors /// /// This function will return the first error that [`write`] returns. /// /// # Examples /// /// ```no_run /// use std::net::SocketAddr; /// use tokio_uring::net::TcpListener; /// use tokio_uring::buf::BoundedBuf; /// /// let addr: SocketAddr = "127.0.0.1:0".parse().unwrap(); /// /// tokio_uring::start(async { /// let listener = TcpListener::bind(addr).unwrap(); /// /// println!("Listening on {}", listener.local_addr().unwrap()); /// /// loop { /// let (stream, _) = listener.accept().await.unwrap(); /// tokio_uring::spawn(async move { /// let mut n = 0; /// let mut buf = vec![0u8; 4096]; /// loop { /// let (result, nbuf) = stream.read(buf).await; /// buf = nbuf; /// let read = result.unwrap(); /// if read == 0 { /// break; /// } /// /// let (res, slice) = stream.write_all(buf.slice(..read)).await; /// let _ = res.unwrap(); /// buf = slice.into_inner(); /// n += read; /// } /// }); /// } /// }); /// ``` /// /// [`write`]: Self::write pub async fn write_all(&self, buf: T) -> crate::BufResult<(), T> { self.inner.write_all(buf).await } /// Writes data into the socket from a registered buffer. /// /// Like [`write`], but using a pre-mapped buffer /// registered with [`FixedBufRegistry`]. /// /// [`write`]: Self::write /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry /// /// # Errors /// /// In addition to errors that can be reported by `write`, /// this operation fails if the buffer is not registered in the /// current `tokio-uring` runtime. pub async fn write_fixed(&self, buf: T) -> crate::BufResult where T: BoundedBuf, { self.inner.write_fixed(buf).await } /// Attempts to write an entire buffer to the stream. /// /// This method will continuously call [`write_fixed`] until there is no more data to be /// written or an error is returned. This method will not return until the entire /// buffer has been successfully written or an error has occurred. /// /// If the buffer contains no data, this will never call [`write_fixed`]. /// /// # Errors /// /// This function will return the first error that [`write_fixed`] returns. /// /// [`write_fixed`]: Self::write_fixed pub async fn write_fixed_all(&self, buf: T) -> crate::BufResult<(), T> where T: BoundedBuf, { self.inner.write_fixed_all(buf).await } /// Writes data from multiple buffers into this socket using the scatter/gather IO style. /// /// This function will attempt to write the entire contents of `bufs`, but /// the entire write may not succeed, or the write may also generate an /// error. The bytes will be written starting at the specified offset. /// /// # Return /// /// The method returns the operation result and the same array of buffers /// passed in as an argument. A return value of `0` typically means that the /// underlying socket is no longer able to accept bytes and will likely not /// be able to in the future as well, or that the buffer provided is empty. /// /// # Errors /// /// Each call to `write` may generate an I/O error indicating that the /// operation could not be completed. If an error is returned then no bytes /// in the buffer were written to this writer. /// /// It is **not** considered an error if the entire buffer could not be /// written to this writer. /// /// [`Ok(n)`]: Ok pub async fn writev(&self, buf: Vec) -> crate::BufResult> { self.inner.writev(buf).await } /// Shuts down the read, write, or both halves of this connection. /// /// This function will cause all pending and future I/O on the specified portions to return /// immediately with an appropriate value. pub fn shutdown(&self, how: std::net::Shutdown) -> io::Result<()> { self.inner.shutdown(how) } /// Sets the value of the TCP_NODELAY option on this socket. /// /// If set, this option disables the Nagle algorithm. This means that segments are always sent /// as soon as possible, even if there is only a small amount of data. When not set, data is /// buffered until there is a sufficient amount to send out, thereby avoiding the frequent /// sending of small packets. pub fn set_nodelay(&self, nodelay: bool) -> io::Result<()> { self.inner.set_nodelay(nodelay) } } impl FromRawFd for TcpStream { unsafe fn from_raw_fd(fd: RawFd) -> Self { TcpStream::from_socket(Socket::from_shared_fd(SharedFd::new(fd))) } } impl AsRawFd for TcpStream { fn as_raw_fd(&self) -> RawFd { self.inner.as_raw_fd() } } tokio-uring-0.5.0/src/net/udp.rs000064400000000000000000000340021046102023000146070ustar 00000000000000use crate::{ buf::fixed::FixedBuf, buf::{BoundedBuf, BoundedBufMut}, io::{SharedFd, Socket}, UnsubmittedWrite, }; use socket2::SockAddr; use std::{ io, net::SocketAddr, os::unix::prelude::{AsRawFd, FromRawFd, RawFd}, }; /// A UDP socket. /// /// UDP is "connectionless", unlike TCP. Meaning, regardless of what address you've bound to, a `UdpSocket` /// is free to communicate with many different remotes. In tokio there are basically two main ways to use `UdpSocket`: /// /// * one to many: [`bind`](`UdpSocket::bind`) and use [`send_to`](`UdpSocket::send_to`) /// and [`recv_from`](`UdpSocket::recv_from`) to communicate with many different addresses /// * one to one: [`connect`](`UdpSocket::connect`) and associate with a single address, using [`write`](`UdpSocket::write`) /// and [`read`](`UdpSocket::read`) to communicate only with that remote address /// /// # Examples /// Bind and connect a pair of sockets and send a packet: /// /// ``` /// use tokio_uring::net::UdpSocket; /// use std::net::SocketAddr; /// fn main() -> std::io::Result<()> { /// tokio_uring::start(async { /// let first_addr: SocketAddr = "127.0.0.1:2401".parse().unwrap(); /// let second_addr: SocketAddr = "127.0.0.1:8080".parse().unwrap(); /// /// // bind sockets /// let socket = UdpSocket::bind(first_addr.clone()).await?; /// let other_socket = UdpSocket::bind(second_addr.clone()).await?; /// /// // connect sockets /// socket.connect(second_addr).await.unwrap(); /// other_socket.connect(first_addr).await.unwrap(); /// /// let buf = vec![0; 32]; /// /// // write data /// let (result, _) = socket.write(b"hello world".as_slice()).submit().await; /// result.unwrap(); /// /// // read data /// let (result, buf) = other_socket.read(buf).await; /// let n_bytes = result.unwrap(); /// /// assert_eq!(b"hello world", &buf[..n_bytes]); /// /// // write data using send on connected socket /// let (result, _) = socket.send(b"hello world via send".as_slice()).await; /// result.unwrap(); /// /// // read data /// let (result, buf) = other_socket.read(buf).await; /// let n_bytes = result.unwrap(); /// /// assert_eq!(b"hello world via send", &buf[..n_bytes]); /// /// Ok(()) /// }) /// } /// ``` /// Send and receive packets without connecting: /// /// ``` /// use tokio_uring::net::UdpSocket; /// use std::net::SocketAddr; /// fn main() -> std::io::Result<()> { /// tokio_uring::start(async { /// let first_addr: SocketAddr = "127.0.0.1:2401".parse().unwrap(); /// let second_addr: SocketAddr = "127.0.0.1:8080".parse().unwrap(); /// /// // bind sockets /// let socket = UdpSocket::bind(first_addr.clone()).await?; /// let other_socket = UdpSocket::bind(second_addr.clone()).await?; /// /// let buf = vec![0; 32]; /// /// // write data /// let (result, _) = socket.send_to(b"hello world".as_slice(), second_addr).await; /// result.unwrap(); /// /// // read data /// let (result, buf) = other_socket.recv_from(buf).await; /// let (n_bytes, addr) = result.unwrap(); /// /// assert_eq!(addr, first_addr); /// assert_eq!(b"hello world", &buf[..n_bytes]); /// /// Ok(()) /// }) /// } /// ``` pub struct UdpSocket { pub(super) inner: Socket, } impl UdpSocket { /// Creates a new UDP socket and attempt to bind it to the addr provided. /// /// Returns a new instance of [`UdpSocket`] on success, /// or an [`io::Error`](std::io::Error) on failure. pub async fn bind(socket_addr: SocketAddr) -> io::Result { let socket = Socket::bind(socket_addr, libc::SOCK_DGRAM)?; Ok(UdpSocket { inner: socket }) } /// Returns the local address to which this UDP socket is bound. /// /// This can be useful, for example, when binding to port 0 to /// figure out which port was actually bound. /// /// # Examples /// /// ``` /// use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; /// use tokio_uring::net::UdpSocket; /// /// tokio_uring::start(async { /// let socket = UdpSocket::bind("127.0.0.1:8080".parse().unwrap()).await.unwrap(); /// let addr = socket.local_addr().expect("Couldn't get local address"); /// assert_eq!(addr, SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), 8080))); /// }); /// ``` pub fn local_addr(&self) -> io::Result { let fd = self.inner.as_raw_fd(); // SAFETY: Our fd is the handle the kernel has given us for a UdpSocket. // Create a std::net::UdpSocket long enough to call its local_addr method // and then forget it so the socket is not closed here. let s = unsafe { std::net::UdpSocket::from_raw_fd(fd) }; let local_addr = s.local_addr(); std::mem::forget(s); local_addr } /// Creates new `UdpSocket` from a previously bound `std::net::UdpSocket`. /// /// This function is intended to be used to wrap a UDP socket from the /// standard library in the tokio-uring equivalent. The conversion assumes nothing /// about the underlying socket; it is left up to the user to decide what socket /// options are appropriate for their use case. /// /// This can be used in conjunction with socket2's `Socket` interface to /// configure a socket before it's handed off, such as setting options like /// `reuse_address` or binding to multiple addresses. /// /// # Example /// /// ``` /// use socket2::{Protocol, Socket, Type}; /// use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; /// use tokio_uring::net::UdpSocket; /// /// fn main() -> std::io::Result<()> { /// tokio_uring::start(async { /// let std_addr: SocketAddr = "127.0.0.1:2401".parse().unwrap(); /// let second_addr: SocketAddr = "127.0.0.1:8080".parse().unwrap(); /// let sock = Socket::new(socket2::Domain::IPV4, Type::DGRAM, Some(Protocol::UDP))?; /// sock.set_reuse_port(true)?; /// sock.set_nonblocking(true)?; /// sock.bind(&std_addr.into())?; /// /// let std_socket = UdpSocket::from_std(sock.into()); /// let other_socket = UdpSocket::bind(second_addr).await?; /// /// let buf = vec![0; 32]; /// /// // write data /// let (result, _) = std_socket /// .send_to(b"hello world".as_slice(), second_addr) /// .await; /// result.unwrap(); /// /// // read data /// let (result, buf) = other_socket.recv_from(buf).await; /// let (n_bytes, addr) = result.unwrap(); /// /// assert_eq!(addr, std_addr); /// assert_eq!(b"hello world", &buf[..n_bytes]); /// /// Ok(()) /// }) /// } /// ``` pub fn from_std(socket: std::net::UdpSocket) -> Self { let inner = Socket::from_std(socket); Self { inner } } pub(crate) fn from_socket(inner: Socket) -> Self { Self { inner } } /// "Connects" this UDP socket to a remote address. /// /// This enables `write` and `read` syscalls to be used on this instance. /// It also constrains the `read` to receive data only from the specified remote peer. /// /// Note: UDP is connectionless, so a successful `connect` call does not execute /// a handshake or validation of the remote peer of any kind. /// Any errors would not be detected until the first send. pub async fn connect(&self, socket_addr: SocketAddr) -> io::Result<()> { self.inner.connect(SockAddr::from(socket_addr)).await } /// Sends data on the connected socket /// /// On success, returns the number of bytes written. pub async fn send(&self, buf: T) -> crate::BufResult { self.inner.send_to(buf, None).await } /// Sends data on the socket to the given address. /// /// On success, returns the number of bytes written. pub async fn send_to( &self, buf: T, socket_addr: SocketAddr, ) -> crate::BufResult { self.inner.send_to(buf, Some(socket_addr)).await } /// Sends data on the socket. Will attempt to do so without intermediate copies. /// /// On success, returns the number of bytes written. /// /// See the linux [kernel docs](https://www.kernel.org/doc/html/latest/networking/msg_zerocopy.html) /// for a discussion on when this might be appropriate. In particular: /// /// > Copy avoidance is not a free lunch. As implemented, with page pinning, /// > it replaces per byte copy cost with page accounting and completion /// > notification overhead. As a result, zero copy is generally only effective /// > at writes over around 10 KB. /// /// Note: Using fixed buffers [#54](https://github.com/tokio-rs/tokio-uring/pull/54), avoids the page-pinning overhead pub async fn send_zc(&self, buf: T) -> crate::BufResult { self.inner.send_zc(buf).await } /// Sends a message on the socket using a msghdr. /// /// Returns a tuple of: /// /// * Result containing bytes written on success /// * The original `io_slices` `Vec` /// * The original `msg_contol` `Option` /// /// Consider using [`Self::sendmsg_zc`] for a zero-copy alternative. pub async fn sendmsg( &self, io_slices: Vec, socket_addr: Option, msg_control: Option, ) -> (io::Result, Vec, Option) { self.inner .sendmsg(io_slices, socket_addr, msg_control) .await } /// Sends a message on the socket using a msghdr. /// /// Returns a tuple of: /// /// * Result containing bytes written on success /// * The original `io_slices` `Vec` /// * The original `msg_contol` `Option` /// /// See the linux [kernel docs](https://www.kernel.org/doc/html/latest/networking/msg_zerocopy.html) /// for a discussion on when this might be appropriate. In particular: /// /// > Copy avoidance is not a free lunch. As implemented, with page pinning, /// > it replaces per byte copy cost with page accounting and completion /// > notification overhead. As a result, zero copy is generally only effective /// > at writes over around 10 KB. /// /// Can be used with socket_addr: None on connected sockets, which can have performance /// benefits if multiple datagrams are sent to the same destination address. pub async fn sendmsg_zc( &self, io_slices: Vec, socket_addr: Option, msg_control: Option, ) -> (io::Result, Vec, Option) { self.inner .sendmsg_zc(io_slices, socket_addr, msg_control) .await } /// Receives a single datagram message on the socket. /// /// On success, returns the number of bytes read and the origin. pub async fn recv_from( &self, buf: T, ) -> crate::BufResult<(usize, SocketAddr), T> { self.inner.recv_from(buf).await } /// Receives a single datagram message on the socket, into multiple buffers /// /// On success, returns the number of bytes read and the origin. pub async fn recvmsg( &self, buf: Vec, ) -> crate::BufResult<(usize, SocketAddr), Vec> { self.inner.recvmsg(buf).await } /// Reads a packet of data from the socket into the buffer. /// /// Returns the original buffer and quantity of data read. pub async fn read(&self, buf: T) -> crate::BufResult { self.inner.read(buf).await } /// Receives a single datagram message into a registered buffer. /// /// Like [`read`], but using a pre-mapped buffer /// registered with [`FixedBufRegistry`]. /// /// [`read`]: Self::read /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry /// /// # Errors /// /// In addition to errors that can be reported by `read`, /// this operation fails if the buffer is not registered in the /// current `tokio-uring` runtime. pub async fn read_fixed(&self, buf: T) -> crate::BufResult where T: BoundedBufMut, { self.inner.read_fixed(buf).await } /// Writes data into the socket from the specified buffer. /// /// Returns the original buffer and quantity of data written. pub fn write(&self, buf: T) -> UnsubmittedWrite { self.inner.write(buf) } /// Writes data into the socket from a registered buffer. /// /// Like [`write`], but using a pre-mapped buffer /// registered with [`FixedBufRegistry`]. /// /// [`write`]: Self::write /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry /// /// # Errors /// /// In addition to errors that can be reported by `write`, /// this operation fails if the buffer is not registered in the /// current `tokio-uring` runtime. pub async fn write_fixed(&self, buf: T) -> crate::BufResult where T: BoundedBuf, { self.inner.write_fixed(buf).await } /// Shuts down the read, write, or both halves of this connection. /// /// This function causes all pending and future I/O on the specified portions to return /// immediately with an appropriate value. pub fn shutdown(&self, how: std::net::Shutdown) -> io::Result<()> { self.inner.shutdown(how) } } impl FromRawFd for UdpSocket { unsafe fn from_raw_fd(fd: RawFd) -> Self { UdpSocket::from_socket(Socket::from_shared_fd(SharedFd::new(fd))) } } impl AsRawFd for UdpSocket { fn as_raw_fd(&self) -> RawFd { self.inner.as_raw_fd() } } tokio-uring-0.5.0/src/net/unix/listener.rs000064400000000000000000000064251046102023000166370ustar 00000000000000use super::UnixStream; use crate::io::Socket; use std::{io, path::Path}; /// A Unix socket server, listening for connections. /// /// You can accept a new connection by using the [`accept`](`UnixListener::accept`) /// method. /// /// # Examples /// /// ``` /// use tokio_uring::net::UnixListener; /// use tokio_uring::net::UnixStream; /// /// let sock_file = "/tmp/tokio-uring-unix-test.sock"; /// let listener = UnixListener::bind(&sock_file).unwrap(); /// /// tokio_uring::start(async move { /// let (tx_ch, rx_ch) = tokio::sync::oneshot::channel(); /// /// tokio_uring::spawn(async move { /// let rx = listener.accept().await.unwrap(); /// if let Err(_) = tx_ch.send(rx) { /// panic!("The receiver dropped"); /// } /// }); /// tokio::task::yield_now().await; // Ensure the listener.accept().await has been kicked off. /// /// let tx = UnixStream::connect(&sock_file).await.unwrap(); /// let rx = rx_ch.await.expect("The spawned task expected to send a UnixStream"); /// /// tx.write(b"test" as &'static [u8]).submit().await.0.unwrap(); /// /// let (_, buf) = rx.read(vec![0; 4]).await; /// /// assert_eq!(buf, b"test"); /// }); /// /// std::fs::remove_file(&sock_file).unwrap(); /// ``` pub struct UnixListener { inner: Socket, } impl UnixListener { /// Creates a new UnixListener, which will be bound to the specified file path. /// The file path cannnot yet exist, and will be cleaned up upon dropping `UnixListener` pub fn bind>(path: P) -> io::Result { let socket = Socket::bind_unix(path, libc::SOCK_STREAM)?; socket.listen(1024)?; Ok(UnixListener { inner: socket }) } /// Returns the local address that this listener is bound to. /// /// # Examples /// /// ``` /// use tokio_uring::net::UnixListener; /// use std::path::Path; /// /// let sock_file = "/tmp/tokio-uring-unix-test.sock"; /// let listener = UnixListener::bind(&sock_file).unwrap(); /// /// let addr = listener.local_addr().expect("Couldn't get local address"); /// assert_eq!(addr.as_pathname(), Some(Path::new(sock_file))); /// /// std::fs::remove_file(&sock_file).unwrap(); /// ``` pub fn local_addr(&self) -> io::Result { use std::os::unix::io::{AsRawFd, FromRawFd}; let fd = self.inner.as_raw_fd(); // SAFETY: Our fd is the handle the kernel has given us for a UnixListener. // Create a std::net::UnixListener long enough to call its local_addr method // and then forget it so the socket is not closed here. let l = unsafe { std::os::unix::net::UnixListener::from_raw_fd(fd) }; let local_addr = l.local_addr(); std::mem::forget(l); local_addr } /// Accepts a new incoming connection from this listener. /// /// This function will yield once a new Unix domain socket connection /// is established. When established, the corresponding [`UnixStream`] and /// will be returned. /// /// [`UnixStream`]: struct@crate::net::UnixStream pub async fn accept(&self) -> io::Result { let (socket, _) = self.inner.accept().await?; let stream = UnixStream { inner: socket }; Ok(stream) } } tokio-uring-0.5.0/src/net/unix/mod.rs000064400000000000000000000001271046102023000155620ustar 00000000000000mod listener; pub use listener::UnixListener; mod stream; pub use stream::UnixStream; tokio-uring-0.5.0/src/net/unix/stream.rs000064400000000000000000000164071046102023000163060ustar 00000000000000use crate::{ buf::fixed::FixedBuf, buf::{BoundedBuf, BoundedBufMut}, io::{SharedFd, Socket}, UnsubmittedWrite, }; use socket2::SockAddr; use std::{ io, os::unix::prelude::{AsRawFd, FromRawFd, RawFd}, path::Path, }; /// A Unix stream between two local sockets on a Unix OS. /// /// A Unix stream can either be created by connecting to an endpoint, via the /// [`connect`] method, or by [`accepting`] a connection from a [`listener`]. /// /// # Examples /// /// ```no_run /// use tokio_uring::net::UnixStream; /// use std::net::ToSocketAddrs; /// /// fn main() -> std::io::Result<()> { /// tokio_uring::start(async { /// // Connect to a peer /// let mut stream = UnixStream::connect("/tmp/tokio-uring-unix-test.sock").await?; /// /// // Write some data. /// let (result, _) = stream.write(b"hello world!".as_slice()).submit().await; /// result.unwrap(); /// /// Ok(()) /// }) /// } /// ``` /// /// [`connect`]: UnixStream::connect /// [`accepting`]: crate::net::UnixListener::accept /// [`listener`]: crate::net::UnixListener pub struct UnixStream { pub(super) inner: Socket, } impl UnixStream { /// Opens a Unix connection to the specified file path. There must be a /// `UnixListener` or equivalent listening on the corresponding Unix domain socket /// to successfully connect and return a `UnixStream`. pub async fn connect>(path: P) -> io::Result { let socket = Socket::new_unix(libc::SOCK_STREAM)?; socket.connect(SockAddr::unix(path)?).await?; let unix_stream = UnixStream { inner: socket }; Ok(unix_stream) } /// Creates new `UnixStream` from a previously bound `std::os::unix::net::UnixStream`. /// /// This function is intended to be used to wrap a TCP stream from the /// standard library in the tokio-uring equivalent. The conversion assumes nothing /// about the underlying socket; it is left up to the user to decide what socket /// options are appropriate for their use case. /// /// This can be used in conjunction with socket2's `Socket` interface to /// configure a socket before it's handed off, such as setting options like /// `reuse_address` or binding to multiple addresses. pub fn from_std(socket: std::os::unix::net::UnixStream) -> UnixStream { let inner = Socket::from_std(socket); Self { inner } } pub(crate) fn from_socket(inner: Socket) -> Self { Self { inner } } /// Read some data from the stream into the buffer, returning the original buffer and /// quantity of data read. pub async fn read(&self, buf: T) -> crate::BufResult { self.inner.read(buf).await } /// Like [`read`], but using a pre-mapped buffer /// registered with [`FixedBufRegistry`]. /// /// [`read`]: Self::read /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry /// /// # Errors /// /// In addition to errors that can be reported by `read`, /// this operation fails if the buffer is not registered in the /// current `tokio-uring` runtime. pub async fn read_fixed(&self, buf: T) -> crate::BufResult where T: BoundedBufMut, { self.inner.read_fixed(buf).await } /// Write some data to the stream from the buffer, returning the original buffer and /// quantity of data written. pub fn write(&self, buf: T) -> UnsubmittedWrite { self.inner.write(buf) } /// Attempts to write an entire buffer to the stream. /// /// This method will continuously call [`write`] until there is no more data to be /// written or an error is returned. This method will not return until the entire /// buffer has been successfully written or an error has occurred. /// /// If the buffer contains no data, this will never call [`write`]. /// /// # Errors /// /// This function will return the first error that [`write`] returns. /// /// [`write`]: Self::write pub async fn write_all(&self, buf: T) -> crate::BufResult<(), T> { self.inner.write_all(buf).await } /// Like [`write`], but using a pre-mapped buffer /// registered with [`FixedBufRegistry`]. /// /// [`write`]: Self::write /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry /// /// # Errors /// /// In addition to errors that can be reported by `write`, /// this operation fails if the buffer is not registered in the /// current `tokio-uring` runtime. pub async fn write_fixed(&self, buf: T) -> crate::BufResult where T: BoundedBuf, { self.inner.write_fixed(buf).await } /// Attempts to write an entire buffer to the stream. /// /// This method will continuously call [`write_fixed`] until there is no more data to be /// written or an error is returned. This method will not return until the entire /// buffer has been successfully written or an error has occurred. /// /// If the buffer contains no data, this will never call [`write_fixed`]. /// /// # Errors /// /// This function will return the first error that [`write_fixed`] returns. /// /// [`write_fixed`]: Self::write pub async fn write_fixed_all(&self, buf: T) -> crate::BufResult<(), T> where T: BoundedBuf, { self.inner.write_fixed_all(buf).await } /// Write data from buffers into this socket returning how many bytes were /// written. /// /// This function will attempt to write the entire contents of `bufs`, but /// the entire write may not succeed, or the write may also generate an /// error. The bytes will be written starting at the specified offset. /// /// # Return /// /// The method returns the operation result and the same array of buffers /// passed in as an argument. A return value of `0` typically means that the /// underlying socket is no longer able to accept bytes and will likely not /// be able to in the future as well, or that the buffer provided is empty. /// /// # Errors /// /// Each call to `write` may generate an I/O error indicating that the /// operation could not be completed. If an error is returned then no bytes /// in the buffer were written to this writer. /// /// It is **not** considered an error if the entire buffer could not be /// written to this writer. /// /// [`Ok(n)`]: Ok pub async fn writev(&self, buf: Vec) -> crate::BufResult> { self.inner.writev(buf).await } /// Shuts down the read, write, or both halves of this connection. /// /// This function will cause all pending and future I/O on the specified portions to return /// immediately with an appropriate value. pub fn shutdown(&self, how: std::net::Shutdown) -> io::Result<()> { self.inner.shutdown(how) } } impl FromRawFd for UnixStream { unsafe fn from_raw_fd(fd: RawFd) -> Self { UnixStream::from_socket(Socket::from_shared_fd(SharedFd::new(fd))) } } impl AsRawFd for UnixStream { fn as_raw_fd(&self) -> RawFd { self.inner.as_raw_fd() } } tokio-uring-0.5.0/src/runtime/context.rs000064400000000000000000000025011046102023000163770ustar 00000000000000use crate::runtime::driver; use crate::runtime::driver::{Handle, WeakHandle}; use std::cell::RefCell; /// Owns the driver and resides in thread-local storage. pub(crate) struct RuntimeContext { driver: RefCell>, } impl RuntimeContext { /// Construct the context with an uninitialized driver. pub(crate) const fn new() -> Self { Self { driver: RefCell::new(None), } } /// Initialize the driver. pub(crate) fn set_handle(&self, handle: Handle) { let mut guard = self.driver.borrow_mut(); assert!(guard.is_none(), "Attempted to initialize the driver twice"); *guard = Some(handle); } pub(crate) fn unset_driver(&self) { let mut guard = self.driver.borrow_mut(); assert!(guard.is_some(), "Attempted to clear nonexistent driver"); *guard = None; } /// Check if driver is initialized #[allow(dead_code)] pub(crate) fn is_set(&self) -> bool { self.driver .try_borrow() .map(|b| b.is_some()) .unwrap_or(false) } pub(crate) fn handle(&self) -> Option { self.driver.borrow().clone() } #[allow(dead_code)] pub(crate) fn weak(&self) -> Option { self.driver.borrow().as_ref().map(Into::into) } } tokio-uring-0.5.0/src/runtime/driver/handle.rs000064400000000000000000000072551046102023000174540ustar 00000000000000//! Internal, reference-counted handle to the driver. //! //! The driver was previously managed exclusively by thread-local context, but this proved //! untenable. //! //! The new system uses a handle which reference-counts the driver to track ownership and access to //! the driver. //! //! There are two handles. //! The strong handle is owning, and the weak handle is non-owning. //! This is important for avoiding reference cycles. //! The weak handle should be used by anything which is stored in the driver or does not need to //! keep the driver alive for it's duration. use io_uring::{cqueue, squeue}; use std::cell::RefCell; use std::io; use std::ops::Deref; use std::os::unix::io::{AsRawFd, RawFd}; use std::rc::{Rc, Weak}; use std::task::{Context, Poll}; use crate::buf::fixed::FixedBuffers; use crate::runtime::driver::op::{Completable, MultiCQEFuture, Op, Updateable}; use crate::runtime::driver::Driver; #[derive(Clone)] pub(crate) struct Handle { pub(super) inner: Rc>, } #[derive(Clone)] pub(crate) struct WeakHandle { inner: Weak>, } impl Handle { pub(crate) fn new(b: &crate::Builder) -> io::Result { Ok(Self { inner: Rc::new(RefCell::new(Driver::new(b)?)), }) } pub(crate) fn dispatch_completions(&self) { self.inner.borrow_mut().dispatch_completions() } pub(crate) fn flush(&self) -> io::Result { self.inner.borrow_mut().uring.submit() } pub(crate) fn register_buffers( &self, buffers: Rc>, ) -> io::Result<()> { self.inner.borrow_mut().register_buffers(buffers) } pub(crate) fn unregister_buffers( &self, buffers: Rc>, ) -> io::Result<()> { self.inner.borrow_mut().unregister_buffers(buffers) } pub(crate) fn submit_op_2(&self, sqe: squeue::Entry) -> usize { self.inner.borrow_mut().submit_op_2(sqe) } pub(crate) fn submit_op(&self, data: T, f: F) -> io::Result> where T: Completable, F: FnOnce(&mut T) -> squeue::Entry, { self.inner.borrow_mut().submit_op(data, f, self.into()) } pub(crate) fn poll_op(&self, op: &mut Op, cx: &mut Context<'_>) -> Poll where T: Unpin + 'static + Completable, { self.inner.borrow_mut().poll_op(op, cx) } pub(crate) fn poll_op_2(&self, index: usize, cx: &mut Context<'_>) -> Poll { self.inner.borrow_mut().poll_op_2(index, cx) } pub(crate) fn poll_multishot_op( &self, op: &mut Op, cx: &mut Context<'_>, ) -> Poll where T: Unpin + 'static + Completable + Updateable, { self.inner.borrow_mut().poll_multishot_op(op, cx) } pub(crate) fn remove_op(&self, op: &mut Op) { self.inner.borrow_mut().remove_op(op) } pub(crate) fn remove_op_2(&self, index: usize, data: T) { self.inner.borrow_mut().remove_op_2(index, data) } } impl WeakHandle { pub(crate) fn upgrade(&self) -> Option { Some(Handle { inner: self.inner.upgrade()?, }) } } impl AsRawFd for Handle { fn as_raw_fd(&self) -> RawFd { self.inner.borrow().uring.as_raw_fd() } } impl From for Handle { fn from(driver: Driver) -> Self { Self { inner: Rc::new(RefCell::new(driver)), } } } impl From for WeakHandle where T: Deref, { fn from(handle: T) -> Self { Self { inner: Rc::downgrade(&handle.inner), } } } tokio-uring-0.5.0/src/runtime/driver/mod.rs000064400000000000000000000526041046102023000167760ustar 00000000000000use crate::buf::fixed::FixedBuffers; use crate::runtime::driver::op::{Completable, Lifecycle, MultiCQEFuture, Op, Updateable}; use io_uring::opcode::AsyncCancel; use io_uring::{cqueue, squeue, IoUring}; use slab::Slab; use std::cell::RefCell; use std::os::unix::io::{AsRawFd, RawFd}; use std::rc::Rc; use std::task::{Context, Poll}; use std::{io, mem}; pub(crate) use handle::*; mod handle; pub(crate) mod op; pub(crate) struct Driver { /// In-flight operations ops: Ops, /// IoUring bindings uring: IoUring, /// Reference to the currently registered buffers. /// Ensures that the buffers are not dropped until /// after the io-uring runtime has terminated. fixed_buffers: Option>>, } struct Ops { // When dropping the driver, all in-flight operations must have completed. This // type wraps the slab and ensures that, on drop, the slab is empty. lifecycle: Slab, /// Received but unserviced Op completions completions: Slab, } impl Driver { pub(crate) fn new(b: &crate::Builder) -> io::Result { let uring = b.urb.build(b.entries)?; Ok(Driver { ops: Ops::new(), uring, fixed_buffers: None, }) } fn wait(&self) -> io::Result { self.uring.submit_and_wait(1) } // only used in tests rn #[allow(unused)] pub(super) fn num_operations(&self) -> usize { self.ops.lifecycle.len() } pub(crate) fn submit(&mut self) -> io::Result<()> { loop { match self.uring.submit() { Ok(_) => { self.uring.submission().sync(); return Ok(()); } Err(ref e) if e.raw_os_error() == Some(libc::EBUSY) => { self.dispatch_completions(); } Err(e) if e.raw_os_error() != Some(libc::EINTR) => { return Err(e); } _ => continue, } } } pub(crate) fn dispatch_completions(&mut self) { let mut cq = self.uring.completion(); cq.sync(); for cqe in cq { if cqe.user_data() == u64::MAX { // Result of the cancellation action. There isn't anything we // need to do here. We must wait for the CQE for the operation // that was canceled. continue; } let index = cqe.user_data() as _; self.ops.complete(index, cqe); } } pub(crate) fn register_buffers( &mut self, buffers: Rc>, ) -> io::Result<()> { unsafe { self.uring .submitter() .register_buffers(buffers.borrow().iovecs()) }?; self.fixed_buffers = Some(buffers); Ok(()) } pub(crate) fn unregister_buffers( &mut self, buffers: Rc>, ) -> io::Result<()> { if let Some(currently_registered) = &self.fixed_buffers { if Rc::ptr_eq(&buffers, currently_registered) { self.uring.submitter().unregister_buffers()?; self.fixed_buffers = None; return Ok(()); } } Err(io::Error::new( io::ErrorKind::Other, "fixed buffers are not currently registered", )) } pub(crate) fn submit_op_2(&mut self, sqe: squeue::Entry) -> usize { let index = self.ops.insert(); // Configure the SQE let sqe = sqe.user_data(index as _); // Push the new operation while unsafe { self.uring.submission().push(&sqe).is_err() } { // If the submission queue is full, flush it to the kernel self.submit().expect("Internal error, failed to submit ops"); } index } pub(crate) fn submit_op( &mut self, mut data: T, f: F, handle: WeakHandle, ) -> io::Result> where T: Completable, F: FnOnce(&mut T) -> squeue::Entry, { let index = self.ops.insert(); // Configure the SQE let sqe = f(&mut data).user_data(index as _); // Create the operation let op = Op::new(handle, data, index); // Push the new operation while unsafe { self.uring.submission().push(&sqe).is_err() } { // If the submission queue is full, flush it to the kernel self.submit()?; } Ok(op) } pub(crate) fn remove_op(&mut self, op: &mut Op) { // Get the Op Lifecycle state from the driver let (lifecycle, completions) = match self.ops.get_mut(op.index()) { Some(val) => val, None => { // Op dropped after the driver return; } }; match mem::replace(lifecycle, Lifecycle::Submitted) { Lifecycle::Submitted | Lifecycle::Waiting(_) => { *lifecycle = Lifecycle::Ignored(Box::new(op.take_data())); } Lifecycle::Completed(..) => { self.ops.remove(op.index()); } Lifecycle::CompletionList(indices) => { // Deallocate list entries, recording if more CQE's are expected let more = { let mut list = indices.into_list(completions); cqueue::more(list.peek_end().unwrap().flags) // Dropping list deallocates the list entries }; if more { // If more are expected, we have to keep the op around *lifecycle = Lifecycle::Ignored(Box::new(op.take_data())); } else { self.ops.remove(op.index()); } } Lifecycle::Ignored(..) => unreachable!(), } } pub(crate) fn remove_op_2(&mut self, index: usize, data: T) { // Get the Op Lifecycle state from the driver let (lifecycle, completions) = match self.ops.get_mut(index) { Some(val) => val, None => { // Op dropped after the driver return; } }; match mem::replace(lifecycle, Lifecycle::Submitted) { Lifecycle::Submitted | Lifecycle::Waiting(_) => { *lifecycle = Lifecycle::Ignored(Box::new(data)); } Lifecycle::Completed(..) => { self.ops.remove(index); } Lifecycle::CompletionList(indices) => { // Deallocate list entries, recording if more CQE's are expected let more = { let mut list = indices.into_list(completions); cqueue::more(list.peek_end().unwrap().flags) // Dropping list deallocates the list entries }; if more { // If more are expected, we have to keep the op around *lifecycle = Lifecycle::Ignored(Box::new(data)); } else { self.ops.remove(index); } } Lifecycle::Ignored(..) => unreachable!(), } } pub(crate) fn poll_op_2(&mut self, index: usize, cx: &mut Context<'_>) -> Poll { let (lifecycle, _) = self.ops.get_mut(index).expect("invalid internal state"); match mem::replace(lifecycle, Lifecycle::Submitted) { Lifecycle::Submitted => { *lifecycle = Lifecycle::Waiting(cx.waker().clone()); Poll::Pending } Lifecycle::Waiting(waker) if !waker.will_wake(cx.waker()) => { *lifecycle = Lifecycle::Waiting(cx.waker().clone()); Poll::Pending } Lifecycle::Waiting(waker) => { *lifecycle = Lifecycle::Waiting(waker); Poll::Pending } Lifecycle::Ignored(..) => unreachable!(), Lifecycle::Completed(cqe) => { self.ops.remove(index); Poll::Ready(cqe) } Lifecycle::CompletionList(..) => { unreachable!("No `more` flag set for SingleCQE") } } } pub(crate) fn poll_op(&mut self, op: &mut Op, cx: &mut Context<'_>) -> Poll where T: Unpin + 'static + Completable, { let (lifecycle, _) = self .ops .get_mut(op.index()) .expect("invalid internal state"); match mem::replace(lifecycle, Lifecycle::Submitted) { Lifecycle::Submitted => { *lifecycle = Lifecycle::Waiting(cx.waker().clone()); Poll::Pending } Lifecycle::Waiting(waker) if !waker.will_wake(cx.waker()) => { *lifecycle = Lifecycle::Waiting(cx.waker().clone()); Poll::Pending } Lifecycle::Waiting(waker) => { *lifecycle = Lifecycle::Waiting(waker); Poll::Pending } Lifecycle::Ignored(..) => unreachable!(), Lifecycle::Completed(cqe) => { self.ops.remove(op.index()); Poll::Ready(op.take_data().unwrap().complete(cqe.into())) } Lifecycle::CompletionList(..) => { unreachable!("No `more` flag set for SingleCQE") } } } pub(crate) fn poll_multishot_op( &mut self, op: &mut Op, cx: &mut Context<'_>, ) -> Poll where T: Unpin + 'static + Completable + Updateable, { let (lifecycle, completions) = self .ops .get_mut(op.index()) .expect("invalid internal state"); match mem::replace(lifecycle, Lifecycle::Submitted) { Lifecycle::Submitted => { *lifecycle = Lifecycle::Waiting(cx.waker().clone()); Poll::Pending } Lifecycle::Waiting(waker) if !waker.will_wake(cx.waker()) => { *lifecycle = Lifecycle::Waiting(cx.waker().clone()); Poll::Pending } Lifecycle::Waiting(waker) => { *lifecycle = Lifecycle::Waiting(waker); Poll::Pending } Lifecycle::Ignored(..) => unreachable!(), Lifecycle::Completed(cqe) => { // This is possible. We may have previously polled a CompletionList, // and the final CQE registered as Completed self.ops.remove(op.index()); Poll::Ready(op.take_data().unwrap().complete(cqe.into())) } Lifecycle::CompletionList(indices) => { let mut data = op.take_data().unwrap(); let mut status = Poll::Pending; // Consume the CqeResult list, calling update on the Op on all Cqe's flagged `more` // If the final Cqe is present, clean up and return Poll::Ready for cqe in indices.into_list(completions) { if cqueue::more(cqe.flags) { data.update(cqe); } else { status = Poll::Ready(cqe); break; } } match status { Poll::Pending => { // We need more CQE's. Restore the op state op.insert_data(data); *lifecycle = Lifecycle::Waiting(cx.waker().clone()); Poll::Pending } Poll::Ready(cqe) => { self.ops.remove(op.index()); Poll::Ready(data.complete(cqe)) } } } } } } impl AsRawFd for Driver { fn as_raw_fd(&self) -> RawFd { self.uring.as_raw_fd() } } /// Drop the driver, cancelling any in-progress ops and waiting for them to terminate. /// /// This first cancels all ops and then waits for them to be moved to the completed lifecycle phase. /// /// It is possible for this to be run without previously dropping the runtime, but this should only /// be possible in the case of [`std::process::exit`]. /// /// This depends on us knowing when ops are completed and done firing. /// When multishot ops are added (support exists but none are implemented), a way to know if such /// an op is finished MUST be added, otherwise our shutdown process is unsound. impl Drop for Driver { fn drop(&mut self) { // get all ops in flight for cancellation while !self.uring.submission().is_empty() { self.submit().expect("Internal error when dropping driver"); } // Pre-determine what to cancel // After this pass, all LifeCycles will be marked either as Completed or Ignored, as appropriate for (_, cycle) in self.ops.lifecycle.iter_mut() { match std::mem::replace(cycle, Lifecycle::Ignored(Box::new(()))) { lc @ Lifecycle::Completed(_) => { // don't cancel completed items *cycle = lc; } Lifecycle::CompletionList(indices) => { let mut list = indices.clone().into_list(&mut self.ops.completions); if !io_uring::cqueue::more(list.peek_end().unwrap().flags) { // This op is complete. Replace with a null Completed entry // safety: zeroed memory is entirely valid with this underlying // representation *cycle = Lifecycle::Completed(unsafe { mem::zeroed() }); } } _ => { // All other states need cancelling. // The mem::replace means these are now marked Ignored. } } } // Submit cancellation for all ops marked Ignored for (id, cycle) in self.ops.lifecycle.iter_mut() { if let Lifecycle::Ignored(..) = cycle { unsafe { while self .uring .submission() .push(&AsyncCancel::new(id as u64).build().user_data(u64::MAX)) .is_err() { self.uring .submit_and_wait(1) .expect("Internal error when dropping driver"); } } } } // Wait until all Lifetimes have been removed from the slab. // // Ignored entries will be removed from the Lifecycle slab // by the complete logic called by `tick()` // // Completed Entries are removed here directly let mut id = 0; loop { if self.ops.lifecycle.is_empty() { break; } // Cycles are either all ignored or complete // If there is at least one Ignored still to process, call wait match self.ops.lifecycle.get(id) { Some(Lifecycle::Ignored(..)) => { // If waiting fails, ignore the error. The wait will be attempted // again on the next loop. let _ = self.wait(); self.dispatch_completions(); } Some(_) => { // Remove Completed entries let _ = self.ops.lifecycle.remove(id); id += 1; } None => { id += 1; } } } } } impl Ops { fn new() -> Ops { Ops { lifecycle: Slab::with_capacity(64), completions: Slab::with_capacity(64), } } fn get_mut(&mut self, index: usize) -> Option<(&mut op::Lifecycle, &mut Slab)> { let completions = &mut self.completions; self.lifecycle .get_mut(index) .map(|lifecycle| (lifecycle, completions)) } // Insert a new operation fn insert(&mut self) -> usize { self.lifecycle.insert(op::Lifecycle::Submitted) } // Remove an operation fn remove(&mut self, index: usize) { self.lifecycle.remove(index); } fn complete(&mut self, index: usize, cqe: cqueue::Entry) { let completions = &mut self.completions; if self.lifecycle[index].complete(completions, cqe) { self.lifecycle.remove(index); } } } impl Drop for Ops { fn drop(&mut self) { assert!(self .lifecycle .iter() .all(|(_, cycle)| matches!(cycle, Lifecycle::Completed(_)))) } } #[cfg(test)] mod test { use std::rc::Rc; use crate::runtime::driver::op::{Completable, CqeResult, Op}; use crate::runtime::CONTEXT; use tokio_test::{assert_pending, assert_ready, task}; use super::*; #[derive(Debug)] pub(crate) struct Completion { result: io::Result, flags: u32, data: Rc<()>, } impl Completable for Rc<()> { type Output = Completion; fn complete(self, cqe: CqeResult) -> Self::Output { Completion { result: cqe.result, flags: cqe.flags, data: self.clone(), } } } #[test] fn op_stays_in_slab_on_drop() { let (op, data) = init(); drop(op); assert_eq!(2, Rc::strong_count(&data)); assert_eq!(1, num_operations()); release(); } #[test] fn poll_op_once() { let (op, data) = init(); let mut op = task::spawn(op); assert_pending!(op.poll()); assert_eq!(2, Rc::strong_count(&data)); complete(&op); assert_eq!(1, num_operations()); assert_eq!(2, Rc::strong_count(&data)); assert!(op.is_woken()); let Completion { result, flags, data: d, } = assert_ready!(op.poll()); assert_eq!(2, Rc::strong_count(&data)); assert_eq!(0, result.unwrap()); assert_eq!(0, flags); drop(d); assert_eq!(1, Rc::strong_count(&data)); drop(op); assert_eq!(0, num_operations()); release(); } #[test] fn poll_op_twice() { { let (op, ..) = init(); let mut op = task::spawn(op); assert_pending!(op.poll()); assert_pending!(op.poll()); complete(&op); assert!(op.is_woken()); let Completion { result, flags, .. } = assert_ready!(op.poll()); assert_eq!(0, result.unwrap()); assert_eq!(0, flags); } release(); } #[test] fn poll_change_task() { { let (op, ..) = init(); let mut op = task::spawn(op); assert_pending!(op.poll()); let op = op.into_inner(); let mut op = task::spawn(op); assert_pending!(op.poll()); complete(&op); assert!(op.is_woken()); let Completion { result, flags, .. } = assert_ready!(op.poll()); assert_eq!(0, result.unwrap()); assert_eq!(0, flags); } release(); } #[test] fn complete_before_poll() { let (op, data) = init(); let mut op = task::spawn(op); complete(&op); assert_eq!(1, num_operations()); assert_eq!(2, Rc::strong_count(&data)); let Completion { result, flags, .. } = assert_ready!(op.poll()); assert_eq!(0, result.unwrap()); assert_eq!(0, flags); drop(op); assert_eq!(0, num_operations()); release(); } #[test] fn complete_after_drop() { let (op, data) = init(); let index = op.index(); drop(op); assert_eq!(2, Rc::strong_count(&data)); assert_eq!(1, num_operations()); CONTEXT.with(|cx| { cx.handle() .unwrap() .inner .borrow_mut() .ops .complete(index, unsafe { mem::zeroed() }) }); assert_eq!(1, Rc::strong_count(&data)); assert_eq!(0, num_operations()); release(); } fn init() -> (Op>, Rc<()>) { let driver = Driver::new(&crate::builder()).unwrap(); let data = Rc::new(()); let op = CONTEXT.with(|cx| { cx.set_handle(driver.into()); let driver = cx.handle().unwrap(); let index = driver.inner.borrow_mut().ops.insert(); Op::new((&driver).into(), data.clone(), index) }); (op, data) } fn num_operations() -> usize { CONTEXT.with(|cx| cx.handle().unwrap().inner.borrow().num_operations()) } fn complete(op: &Op>) { let cqe = unsafe { mem::zeroed() }; CONTEXT.with(|cx| { let driver = cx.handle().unwrap(); driver.inner.borrow_mut().ops.complete(op.index(), cqe); }); } fn release() { CONTEXT.with(|cx| { let driver = cx.handle().unwrap(); driver.inner.borrow_mut().ops.lifecycle.clear(); driver.inner.borrow_mut().ops.completions.clear(); cx.unset_driver(); }); } } tokio-uring-0.5.0/src/runtime/driver/op/mod.rs000064400000000000000000000227351046102023000174160ustar 00000000000000use std::future::Future; use std::io; use std::marker::PhantomData; use std::pin::Pin; use std::task::{Context, Poll, Waker}; use io_uring::{cqueue, squeue}; mod slab_list; use slab::Slab; use slab_list::{SlabListEntry, SlabListIndices}; use crate::runtime::{driver, CONTEXT}; /// A SlabList is used to hold unserved completions. /// /// This is relevant to multi-completion Operations, /// which require an unknown number of CQE events to be /// captured before completion. pub(crate) type Completion = SlabListEntry; /// An unsubmitted oneshot operation. pub struct UnsubmittedOneshot> { stable_data: D, post_op: T, sqe: squeue::Entry, } impl> UnsubmittedOneshot { /// Construct a new operation for later submission. pub fn new(stable_data: D, post_op: T, sqe: squeue::Entry) -> Self { Self { stable_data, post_op, sqe, } } /// Submit an operation to the driver for batched entry to the kernel. pub fn submit(self) -> InFlightOneshot { let handle = CONTEXT .with(|x| x.handle()) .expect("Could not submit op; not in runtime context"); self.submit_with_driver(&handle) } fn submit_with_driver(self, driver: &driver::Handle) -> InFlightOneshot { let index = driver.submit_op_2(self.sqe); let driver = driver.into(); let inner = InFlightOneshotInner { index, driver, stable_data: self.stable_data, post_op: self.post_op, }; InFlightOneshot { inner: Some(inner) } } } /// An in-progress oneshot operation which can be polled for completion. pub struct InFlightOneshot> { inner: Option>, } struct InFlightOneshotInner> { driver: driver::WeakHandle, index: usize, stable_data: D, post_op: T, } impl + Unpin> Future for InFlightOneshot { type Output = T::Output; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.get_mut(); let inner = this .inner .as_mut() .expect("Cannot poll already-completed operation"); let index = inner.index; let upgraded = inner .driver .upgrade() .expect("Failed to poll op: driver no longer exists"); let cqe = ready!(upgraded.poll_op_2(index, cx)); let inner = this.inner.take().unwrap(); Poll::Ready( inner .post_op .transform_oneshot_output(inner.stable_data, cqe), ) } } impl> Drop for InFlightOneshot { fn drop(&mut self) { if let Some(inner) = self.inner.take() { if let Some(driver) = inner.driver.upgrade() { driver.remove_op_2(inner.index, inner.stable_data) } } } } /// Transforms the output of a oneshot operation into a more user-friendly format. pub trait OneshotOutputTransform { /// The final output after the transformation. type Output; /// The stored data within the op. type StoredData; /// Transform the stored data and the cqe into the final output. fn transform_oneshot_output(self, data: Self::StoredData, cqe: cqueue::Entry) -> Self::Output; } /// In-flight operation pub(crate) struct Op { driver: driver::WeakHandle, // Operation index in the slab index: usize, // Per-operation data data: Option, // CqeType marker _cqe_type: PhantomData, } /// A Marker for Ops which expect only a single completion event pub(crate) struct SingleCQE; /// A Marker for Operations will process multiple completion events, /// which combined resolve to a single Future value pub(crate) struct MultiCQEFuture; pub(crate) trait Completable { type Output; /// `complete` will be called for cqe's do not have the `more` flag set fn complete(self, cqe: CqeResult) -> Self::Output; } pub(crate) trait Updateable: Completable { /// Update will be called for cqe's which have the `more` flag set. /// The Op should update any internal state as required. fn update(&mut self, cqe: CqeResult); } #[allow(dead_code)] pub(crate) enum Lifecycle { /// The operation has been submitted to uring and is currently in-flight Submitted, /// The submitter is waiting for the completion of the operation Waiting(Waker), /// The submitter no longer has interest in the operation result. The state /// must be passed to the driver and held until the operation completes. Ignored(Box), /// The operation has completed with a single cqe result Completed(cqueue::Entry), /// One or more completion results have been recieved /// This holds the indices uniquely identifying the list within the slab CompletionList(SlabListIndices), } /// A single CQE entry pub(crate) struct CqeResult { pub(crate) result: io::Result, pub(crate) flags: u32, } impl From for CqeResult { fn from(cqe: cqueue::Entry) -> Self { let res = cqe.result(); let flags = cqe.flags(); let result = if res >= 0 { Ok(res as u32) } else { Err(io::Error::from_raw_os_error(-res)) }; CqeResult { result, flags } } } impl Op { /// Create a new operation pub(super) fn new(driver: driver::WeakHandle, data: T, index: usize) -> Self { Op { driver, index, data: Some(data), _cqe_type: PhantomData, } } pub(super) fn index(&self) -> usize { self.index } pub(super) fn take_data(&mut self) -> Option { self.data.take() } pub(super) fn insert_data(&mut self, data: T) { self.data = Some(data); } } impl Future for Op where T: Unpin + 'static + Completable, { type Output = T::Output; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { self.driver .upgrade() .expect("Not in runtime context") .poll_op(self.get_mut(), cx) } } impl Future for Op where T: Unpin + 'static + Completable + Updateable, { type Output = T::Output; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { self.driver .upgrade() .expect("Not in runtime context") .poll_multishot_op(self.get_mut(), cx) } } /// The operation may have pending cqe's not yet processed. /// To manage this, the lifecycle associated with the Op may if required /// be placed in LifeCycle::Ignored state to handle cqe's which arrive after /// the Op has been dropped. impl Drop for Op { fn drop(&mut self) { self.driver .upgrade() .expect("Not in runtime context") .remove_op(self) } } impl Lifecycle { pub(crate) fn complete( &mut self, completions: &mut Slab, cqe: cqueue::Entry, ) -> bool { use std::mem; match mem::replace(self, Lifecycle::Submitted) { x @ Lifecycle::Submitted | x @ Lifecycle::Waiting(..) => { if io_uring::cqueue::more(cqe.flags()) { let mut list = SlabListIndices::new().into_list(completions); list.push(cqe.into()); *self = Lifecycle::CompletionList(list.into_indices()); } else { *self = Lifecycle::Completed(cqe); } if let Lifecycle::Waiting(waker) = x { // waker is woken to notify cqe has arrived // Note: Maybe defer calling until cqe with !`more` flag set? waker.wake(); } false } lifecycle @ Lifecycle::Ignored(..) => { if io_uring::cqueue::more(cqe.flags()) { // Not yet complete. The Op has been dropped, so we can drop the CQE // but we must keep the lifecycle alive until no more CQE's expected *self = lifecycle; false } else { // This Op has completed, we can drop true } } Lifecycle::Completed(..) => { // Completions with more flag set go straight onto the slab, // and are handled in Lifecycle::CompletionList. // To construct Lifecycle::Completed, a CQE with `more` flag unset was received // we shouldn't be receiving another. unreachable!("invalid operation state") } Lifecycle::CompletionList(indices) => { // A completion list may contain CQE's with and without `more` flag set. // Only the final one may have `more` unset, although we don't check. let mut list = indices.into_list(completions); list.push(cqe.into()); *self = Lifecycle::CompletionList(list.into_indices()); false } } } } tokio-uring-0.5.0/src/runtime/driver/op/slab_list.rs000064400000000000000000000112611046102023000206030ustar 00000000000000//! An indexed linked list, with entries held in slab storage. //! The slab may hold multiple independent lists concurrently. //! //! Each list is uniquely identified by a SlabListIndices, //! which holds the index of the first element of the list. //! It also holds the index of the last element, to support //! push operations without list traversal. use slab::Slab; use std::ops::{Deref, DerefMut}; /// A linked list backed by slab storage pub(crate) struct SlabList<'a, T> { index: SlabListIndices, slab: &'a mut Slab>, } // Indices to the head and tail of a single list held within a SlabList #[derive(Clone)] pub(crate) struct SlabListIndices { start: usize, end: usize, } /// Multi cycle operations may return an unbounded number of CQE's /// for a single cycle SQE. /// /// These are held in an indexed linked list pub(crate) struct SlabListEntry { entry: T, next: usize, } impl Deref for SlabListEntry { type Target = T; fn deref(&self) -> &Self::Target { &self.entry } } impl DerefMut for SlabListEntry { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.entry } } impl SlabListIndices { pub(crate) fn new() -> Self { let start = usize::MAX; SlabListIndices { start, end: start } } pub(crate) fn into_list(self, slab: &mut Slab>) -> SlabList<'_, T> { SlabList::from_indices(self, slab) } } impl<'a, T> SlabList<'a, T> { pub(crate) fn from_indices( index: SlabListIndices, slab: &'a mut Slab>, ) -> Self { SlabList { slab, index } } pub(crate) fn is_empty(&self) -> bool { self.index.start == usize::MAX } /// Peek at the end of the list (most recently pushed) /// This leaves the list unchanged pub(crate) fn peek_end(&mut self) -> Option<&T> { if self.index.end == usize::MAX { None } else { Some(&self.slab[self.index.end].entry) } } /// Pop from front of list pub(crate) fn pop(&mut self) -> Option { self.slab .try_remove(self.index.start) .map(|SlabListEntry { next, entry, .. }| { if next == usize::MAX { self.index.end = usize::MAX; } self.index.start = next; entry }) } /// Push to the end of the list pub(crate) fn push(&mut self, entry: T) { let prev = self.index.end; let entry = SlabListEntry { entry, next: usize::MAX, }; self.index.end = self.slab.insert(entry); if prev != usize::MAX { self.slab[prev].next = self.index.end; } else { self.index.start = self.index.end; } } /// Consume the list, without dropping entries, returning just the start and end indices pub(crate) fn into_indices(mut self) -> SlabListIndices { std::mem::replace(&mut self.index, SlabListIndices::new()) } } impl<'a, T> Drop for SlabList<'a, T> { fn drop(&mut self) { while !self.is_empty() { let removed = self.slab.remove(self.index.start); self.index.start = removed.next; } } } impl<'a, T> Iterator for SlabList<'a, T> { type Item = T; fn next(&mut self) -> Option { self.pop() } } #[cfg(test)] mod test { use super::*; #[test] fn push_pop() { let mut slab = Slab::with_capacity(8); let mut list = SlabListIndices::new().into_list(&mut slab); assert!(list.is_empty()); assert_eq!(list.pop(), None); for i in 0..5 { list.push(i); assert_eq!(list.peek_end(), Some(&i)); assert!(!list.is_empty()); assert!(!list.slab.is_empty()); } for i in 0..5 { assert_eq!(list.pop(), Some(i)) } assert!(list.is_empty()); assert!(list.slab.is_empty()); assert_eq!(list.pop(), None); } #[test] fn entries_freed_on_drop() { let mut slab = Slab::with_capacity(8); { let mut list = SlabListIndices::new().into_list(&mut slab); list.push(42); assert!(!list.is_empty()); } assert!(slab.is_empty()); } #[test] fn entries_kept_on_converion_to_index() { let mut slab = Slab::with_capacity(8); { let mut list = SlabListIndices::new().into_list(&mut slab); list.push(42); assert!(!list.is_empty()); // This forgets the entries let _ = list.into_indices(); } assert!(!slab.is_empty()); } } tokio-uring-0.5.0/src/runtime/mod.rs000064400000000000000000000122351046102023000154770ustar 00000000000000use std::future::Future; use std::io; use std::mem::ManuallyDrop; use tokio::io::unix::AsyncFd; use tokio::task::LocalSet; mod context; pub(crate) mod driver; pub(crate) use context::RuntimeContext; thread_local! { pub(crate) static CONTEXT: RuntimeContext = RuntimeContext::new(); } /// The Runtime Executor /// /// This is the Runtime for `tokio-uring`. /// It wraps the default [`Runtime`] using the platform-specific Driver. /// /// This executes futures and tasks within the current-thread only. /// /// [`Runtime`]: tokio::runtime::Runtime pub struct Runtime { /// Tokio runtime, always current-thread tokio_rt: ManuallyDrop, /// LocalSet for !Send tasks local: ManuallyDrop, /// Strong reference to the driver. driver: driver::Handle, } /// Spawns a new asynchronous task, returning a [`JoinHandle`] for it. /// /// Spawning a task enables the task to execute concurrently to other tasks. /// There is no guarantee that a spawned task will execute to completion. When a /// runtime is shutdown, all outstanding tasks are dropped, regardless of the /// lifecycle of that task. /// /// This function must be called from the context of a `tokio-uring` runtime. /// /// [`JoinHandle`]: tokio::task::JoinHandle /// /// # Examples /// /// In this example, a server is started and `spawn` is used to start a new task /// that processes each received connection. /// /// ```no_run /// tokio_uring::start(async { /// let handle = tokio_uring::spawn(async { /// println!("hello from a background task"); /// }); /// /// // Let the task complete /// handle.await.unwrap(); /// }); /// ``` pub fn spawn(task: T) -> tokio::task::JoinHandle { tokio::task::spawn_local(task) } impl Runtime { /// Creates a new tokio_uring runtime on the current thread. /// /// This takes the tokio-uring [`Builder`](crate::Builder) as a parameter. pub fn new(b: &crate::Builder) -> io::Result { let rt = tokio::runtime::Builder::new_current_thread() .on_thread_park(|| { CONTEXT.with(|x| { let _ = x .handle() .expect("Internal error, driver context not present when invoking hooks") .flush(); }); }) .enable_all() .build()?; let tokio_rt = ManuallyDrop::new(rt); let local = ManuallyDrop::new(LocalSet::new()); let driver = driver::Handle::new(b)?; start_uring_wakes_task(&tokio_rt, &local, driver.clone()); Ok(Runtime { local, tokio_rt, driver, }) } /// Runs a future to completion on the tokio-uring runtime. This is the /// runtime's entry point. /// /// This runs the given future on the current thread, blocking until it is /// complete, and yielding its resolved result. Any tasks, futures, or timers /// which the future spawns internally will be executed on this runtime. /// /// Any spawned tasks will be suspended after `block_on` returns. Calling /// `block_on` again will resume previously spawned tasks. /// /// # Panics /// /// This function panics if the provided future panics, or if called within an /// asynchronous execution context. /// Runs a future to completion on the current runtime. pub fn block_on(&self, future: F) -> F::Output where F: Future, { struct ContextGuard; impl Drop for ContextGuard { fn drop(&mut self) { CONTEXT.with(|cx| cx.unset_driver()); } } CONTEXT.with(|cx| cx.set_handle(self.driver.clone())); let _guard = ContextGuard; tokio::pin!(future); let res = self .tokio_rt .block_on(self.local.run_until(std::future::poll_fn(|cx| { // assert!(drive.as_mut().poll(cx).is_pending()); future.as_mut().poll(cx) }))); res } } impl Drop for Runtime { fn drop(&mut self) { // drop tasks in correct order unsafe { ManuallyDrop::drop(&mut self.local); ManuallyDrop::drop(&mut self.tokio_rt); } } } fn start_uring_wakes_task( tokio_rt: &tokio::runtime::Runtime, local: &LocalSet, driver: driver::Handle, ) { let _guard = tokio_rt.enter(); let async_driver_handle = AsyncFd::new(driver).unwrap(); local.spawn_local(drive_uring_wakes(async_driver_handle)); } async fn drive_uring_wakes(driver: AsyncFd) { loop { // Wait for read-readiness let mut guard = driver.readable().await.unwrap(); guard.get_inner().dispatch_completions(); guard.clear_ready(); } } #[cfg(test)] mod test { use super::*; use crate::builder; #[test] fn block_on() { let rt = Runtime::new(&builder()).unwrap(); rt.block_on(async move { () }); } #[test] fn block_on_twice() { let rt = Runtime::new(&builder()).unwrap(); rt.block_on(async move { () }); rt.block_on(async move { () }); } } tokio-uring-0.5.0/tests/buf.rs000064400000000000000000000154641046102023000143730ustar 00000000000000use tokio_uring::buf::{BoundedBuf, BoundedBufMut, Slice}; use std::mem; use std::ops::RangeBounds; use std::slice::SliceIndex; #[test] fn test_vec() { let mut v = vec![]; assert_eq!(v.as_ptr(), v.stable_ptr()); assert_eq!(v.as_mut_ptr(), v.stable_mut_ptr()); assert_eq!(v.bytes_init(), 0); assert_eq!(v.bytes_total(), 0); v.reserve(100); assert_eq!(v.as_ptr(), v.stable_ptr()); assert_eq!(v.as_mut_ptr(), v.stable_mut_ptr()); assert_eq!(v.bytes_init(), 0); assert_eq!(v.bytes_total(), v.capacity()); v.extend(b"hello"); assert_eq!(v.as_ptr(), v.stable_ptr()); assert_eq!(v.as_mut_ptr(), v.stable_mut_ptr()); assert_eq!(v.bytes_init(), 5); assert_eq!(v.bytes_total(), v.capacity()); // Assume init does not go backwards unsafe { v.set_init(3); } assert_eq!(&v[..], b"hello"); // Initializing goes forward unsafe { std::ptr::copy(DATA.as_ptr(), v.stable_mut_ptr(), 10); v.set_init(10); } assert_eq!(&v[..], &DATA[..10]); } #[test] fn test_slice() { let v = &b""[..]; assert_eq!(v.as_ptr(), v.stable_ptr()); assert_eq!(v.bytes_init(), 0); assert_eq!(v.bytes_total(), 0); let v = &b"hello"[..]; assert_eq!(v.as_ptr(), v.stable_ptr()); assert_eq!(v.bytes_init(), 5); assert_eq!(v.bytes_total(), 5); } const DATA: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789!?"; macro_rules! test_slice { ( $( $name:ident => $buf:expr; )* ) => { $( mod $name { use super::*; #[test] fn test_slice_read() { let buf = $buf; let slice = buf.slice(..); assert_eq!(slice.begin(), 0); assert_eq!(slice.end(), DATA.len()); assert_eq!(&slice[..], DATA); assert_eq!(&slice[5..], &DATA[5..]); assert_eq!(&slice[10..15], &DATA[10..15]); assert_eq!(&slice[..15], &DATA[..15]); let buf = slice.into_inner(); let slice = buf.slice(10..); assert_eq!(slice.begin(), 10); assert_eq!(slice.end(), DATA.len()); assert_eq!(&slice[..], &DATA[10..]); assert_eq!(&slice[10..], &DATA[20..]); assert_eq!(&slice[5..15], &DATA[15..25]); assert_eq!(&slice[..15], &DATA[10..25]); let buf = slice.into_inner(); let slice = buf.slice(5..15); assert_eq!(slice.begin(), 5); assert_eq!(slice.end(), 15); assert_eq!(&slice[..], &DATA[5..15]); assert_eq!(&slice[5..], &DATA[10..15]); assert_eq!(&slice[5..8], &DATA[10..13]); assert_eq!(&slice[..5], &DATA[5..10]); let buf = slice.into_inner(); let slice = buf.slice(..15); assert_eq!(slice.begin(), 0); assert_eq!(slice.end(), 15); assert_eq!(&slice[..], &DATA[..15]); assert_eq!(&slice[5..], &DATA[5..15]); assert_eq!(&slice[5..10], &DATA[5..10]); assert_eq!(&slice[..5], &DATA[..5]); } #[test] fn test_subslice_read() { let buf = $buf; let buf = test_subslice_read_case(buf.slice(..), DATA, ..); let buf = test_subslice_read_case(buf.slice(..), DATA, 10..); let buf = test_subslice_read_case(buf.slice(..), DATA, 5..15); let buf = test_subslice_read_case(buf.slice(..), DATA, ..15); let buf = test_subslice_read_case(buf.slice(5..), &DATA[5..], ..); let buf = test_subslice_read_case(buf.slice(5..), &DATA[5..], 5..); let buf = test_subslice_read_case(buf.slice(5..), &DATA[5..], 5..15); let buf = test_subslice_read_case(buf.slice(5..), &DATA[5..], ..10); let buf = test_subslice_read_case(buf.slice(5..25), &DATA[5..25], ..); let buf = test_subslice_read_case(buf.slice(5..25), &DATA[5..25], 5..); let buf = test_subslice_read_case(buf.slice(5..25), &DATA[5..25], 5..15); let buf = test_subslice_read_case(buf.slice(5..25), &DATA[5..25], ..10); let buf = test_subslice_read_case(buf.slice(..25), &DATA[..25], ..); let buf = test_subslice_read_case(buf.slice(..25), &DATA[..25], 5..); let buf = test_subslice_read_case(buf.slice(..25), &DATA[..25], 5..15); let ___ = test_subslice_read_case(buf.slice(..25), &DATA[..25], ..10); } } )* }; } fn test_subslice_read_case(slice: Slice, expected: &[u8], range: R) -> B where B: tokio_uring::buf::IoBuf, R: RangeBounds + SliceIndex<[u8], Output = [u8]> + Clone, { use std::ops::{Bound, Index}; let buf_ptr = slice.get_ref().stable_ptr(); let buf_total = slice.get_ref().bytes_total(); let buf_init = slice.get_ref().bytes_init(); let begin = slice.begin(); let end = slice.end(); let subslice = slice.slice(range.clone()); let data = expected.index(range.clone()); match range.start_bound() { Bound::Included(&n) => { assert_eq!(subslice.begin(), begin + n); } Bound::Excluded(&n) => { assert_eq!(subslice.begin(), begin + n + 1); } Bound::Unbounded => { assert_eq!(subslice.begin(), begin); } } match range.end_bound() { Bound::Included(&n) => { assert_eq!(subslice.end(), begin + n + 1); } Bound::Excluded(&n) => { assert_eq!(subslice.end(), begin + n); } Bound::Unbounded => { assert_eq!(subslice.end(), end); } } assert_eq!(&subslice[..], data); let buf = subslice.into_inner(); assert_eq!(buf.stable_ptr(), buf_ptr); assert_eq!(buf.bytes_init(), buf_init); assert_eq!(buf.bytes_total(), buf_total); buf } test_slice! { vec => Vec::from(DATA); slice => DATA; } #[test] fn can_deref_slice_into_uninit_buf() { let buf = Vec::with_capacity(10).slice(..); let _ = buf.stable_ptr(); assert_eq!(buf.bytes_init(), 0); assert_eq!(buf.bytes_total(), 10); assert!(buf[..].is_empty()); let mut v = Vec::with_capacity(10); v.push(42); let mut buf = v.slice(..); let _ = buf.stable_mut_ptr(); assert_eq!(buf.bytes_init(), 1); assert_eq!(buf.bytes_total(), 10); assert_eq!(mem::replace(&mut buf[0], 0), 42); buf.copy_from_slice(&[43]); assert_eq!(&buf[..], &[43]); } tokio-uring-0.5.0/tests/driver.rs000064400000000000000000000065211046102023000151040ustar 00000000000000use tempfile::NamedTempFile; use tokio_uring::{buf::IoBuf, fs::File}; #[path = "../src/future.rs"] #[allow(warnings)] mod future; #[test] fn complete_ops_on_drop() { use std::sync::Arc; struct MyBuf { data: Vec, _ref_cnt: Arc<()>, } unsafe impl IoBuf for MyBuf { fn stable_ptr(&self) -> *const u8 { self.data.stable_ptr() } fn bytes_init(&self) -> usize { self.data.bytes_init() } fn bytes_total(&self) -> usize { self.data.bytes_total() } } unsafe impl tokio_uring::buf::IoBufMut for MyBuf { fn stable_mut_ptr(&mut self) -> *mut u8 { self.data.stable_mut_ptr() } unsafe fn set_init(&mut self, pos: usize) { self.data.set_init(pos); } } // Used to test if the buffer dropped. let ref_cnt = Arc::new(()); let tempfile = tempfile(); let vec = vec![0; 50 * 1024 * 1024]; let mut file = std::fs::File::create(tempfile.path()).unwrap(); std::io::Write::write_all(&mut file, &vec).unwrap(); let file = tokio_uring::start(async { let file = File::create(tempfile.path()).await.unwrap(); poll_once(async { file.read_at( MyBuf { data: vec![0; 64 * 1024], _ref_cnt: ref_cnt.clone(), }, 25 * 1024 * 1024, ) .await .0 .unwrap(); }) .await; file }); assert_eq!(Arc::strong_count(&ref_cnt), 1); // little sleep std::thread::sleep(std::time::Duration::from_millis(100)); drop(file); } #[test] fn too_many_submissions() { let tempfile = tempfile(); tokio_uring::start(async { let file = File::create(tempfile.path()).await.unwrap(); for _ in 0..600 { poll_once(async { file.write_at(b"hello world".to_vec(), 0) .submit() .await .0 .unwrap(); }) .await; } }); } #[test] fn completion_overflow() { use std::process; use std::{thread, time}; use tokio::task::JoinSet; let spawn_cnt = 50; let squeue_entries = 2; let cqueue_entries = 2 * squeue_entries; std::thread::spawn(|| { thread::sleep(time::Duration::from_secs(8)); // 1000 times longer than it takes on a slow machine eprintln!("Timeout reached. The uring completions are hung."); process::exit(1); }); tokio_uring::builder() .entries(squeue_entries) .uring_builder(tokio_uring::uring_builder().setup_cqsize(cqueue_entries)) .start(async move { let mut js = JoinSet::new(); for _ in 0..spawn_cnt { js.spawn_local(tokio_uring::no_op()); } while let Some(res) = js.join_next().await { res.unwrap().unwrap(); } }); } fn tempfile() -> NamedTempFile { NamedTempFile::new().unwrap() } async fn poll_once(future: impl std::future::Future) { // use std::future::Future; use std::task::Poll; use tokio::pin; pin!(future); std::future::poll_fn(|cx| { assert!(future.as_mut().poll(cx).is_pending()); Poll::Ready(()) }) .await; } tokio-uring-0.5.0/tests/fixed_buf.rs000064400000000000000000000145121046102023000155430ustar 00000000000000use tokio_test::assert_err; use tokio_uring::buf::fixed::{FixedBufPool, FixedBufRegistry}; use tokio_uring::buf::{BoundedBuf, BoundedBufMut}; use tokio_uring::fs::File; use std::fs::File as StdFile; use std::io::prelude::*; use std::iter; use std::mem; use tempfile::NamedTempFile; const HELLO: &[u8] = b"hello world..."; #[test] fn fixed_buf_turnaround() { tokio_uring::start(async { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); let file = File::open(tempfile.path()).await.unwrap(); let buffers = FixedBufRegistry::new([30, 20, 10].iter().map(|&n| Vec::with_capacity(n))); buffers.register().unwrap(); let fixed_buf = buffers.check_out(0).unwrap(); assert_eq!(fixed_buf.bytes_total(), 30); // Can't check out the same buffer twice. assert!(buffers.check_out(0).is_none()); // Checking out another buffer from the same registry is possible, // but does not affect the status of the first buffer. let fixed_buf1 = buffers.check_out(1).unwrap(); assert_eq!(fixed_buf1.bytes_total(), 20); assert!(buffers.check_out(0).is_none()); mem::drop(fixed_buf1); assert!(buffers.check_out(0).is_none()); let op = file.read_fixed_at(fixed_buf, 0); // The buffer is used by the pending operation, can't check it out // for another instance. assert!(buffers.check_out(0).is_none()); let (res, buf) = op.await; let n = res.unwrap(); assert_eq!(n, HELLO.len()); // The buffer is owned by `buf`, can't check it out // for another instance. assert!(buffers.check_out(0).is_none()); mem::drop(buf); // The buffer has been released, check it out again. let fixed_buf = buffers.check_out(0).unwrap(); assert_eq!(fixed_buf.bytes_total(), 30); assert_eq!(fixed_buf.bytes_init(), HELLO.len()); }); } #[test] fn unregister_invalidates_checked_out_buffers() { tokio_uring::start(async { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); let file = File::open(tempfile.path()).await.unwrap(); let buffers = FixedBufRegistry::new([Vec::with_capacity(1024)]); buffers.register().unwrap(); let fixed_buf = buffers.check_out(0).unwrap(); // The checked out handle keeps the buffer allocation alive. // Meanwhile, we replace buffer registration in the kernel: buffers.unregister().unwrap(); let buffers = FixedBufRegistry::new([Vec::with_capacity(1024)]); buffers.register().unwrap(); // The old buffer's index no longer matches the memory area of the // currently registered buffer, so the read operation using the old // buffer's memory should fail. let (res, _) = file.read_fixed_at(fixed_buf, 0).await; assert_err!(res); let fixed_buf = buffers.check_out(0).unwrap(); let (res, buf) = file.read_fixed_at(fixed_buf, 0).await; let n = res.unwrap(); assert_eq!(n, HELLO.len()); assert_eq!(&buf[..], HELLO); }); } #[test] fn slicing() { tokio_uring::start(async { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); let file = File::from_std( StdFile::options() .read(true) .write(true) .open(tempfile.path()) .unwrap(), ); let buffers = FixedBufRegistry::new([Vec::with_capacity(1024)]); buffers.register().unwrap(); let fixed_buf = buffers.check_out(0).unwrap(); // Read no more than 8 bytes into the fixed buffer. let (res, slice) = file.read_fixed_at(fixed_buf.slice(..8), 3).await; let n = res.unwrap(); assert_eq!(n, 8); assert_eq!(slice[..], HELLO[3..11]); let fixed_buf = slice.into_inner(); // Write from the fixed buffer, starting at offset 1, // up to the end of the initialized bytes in the buffer. let (res, slice) = file .write_fixed_at(fixed_buf.slice(1..), HELLO.len() as u64) .await; let n = res.unwrap(); assert_eq!(n, 7); assert_eq!(slice[..], HELLO[4..11]); let fixed_buf = slice.into_inner(); // Read into the fixed buffer, overwriting bytes starting from offset 3 // and then extending the initialized part with as many bytes as // the operation can read. let (res, slice) = file.read_fixed_at(fixed_buf.slice(3..), 0).await; let n = res.unwrap(); assert_eq!(n, HELLO.len() + 7); assert_eq!(slice[..HELLO.len()], HELLO[..]); assert_eq!(slice[HELLO.len()..], HELLO[4..11]); }) } #[test] fn pool_next_as_concurrency_limit() { tokio_uring::start(async move { const BUF_SIZE: usize = 80; let mut tempfile = tempfile(); let file = StdFile::options() .write(true) .open(tempfile.path()) .unwrap(); let buffers = FixedBufPool::new(iter::repeat_with(|| Vec::with_capacity(BUF_SIZE)).take(2)); buffers.register().unwrap(); let mut join_handles = vec![]; for i in 0..10 { let mut buf = buffers.next(BUF_SIZE).await; println!( "[main] iteration {}: obtained buffer {}", i, buf.buf_index() ); let cloned_file = file.try_clone().unwrap(); let handle = tokio_uring::spawn(async move { let file = File::from_std(cloned_file); let data = [b'0' + i as u8; BUF_SIZE]; buf.put_slice(&data); let (res, buf) = file.write_fixed_all_at(buf, BUF_SIZE as u64 * i).await; res.unwrap(); println!("[worker {}]: dropping buffer {}", i, buf.buf_index()); }); join_handles.push(handle); } for (i, handle) in join_handles.into_iter().enumerate() { handle .await .unwrap_or_else(|e| panic!("worker {} terminated abnormally: {}", i, e)); } mem::drop(file); let mut content = String::new(); tempfile.read_to_string(&mut content).unwrap(); println!("{}", content); }) } fn tempfile() -> NamedTempFile { NamedTempFile::new().unwrap() } tokio-uring-0.5.0/tests/fs_directory.rs000064400000000000000000000012671046102023000163070ustar 00000000000000#[path = "../src/future.rs"] #[allow(warnings)] mod future; use tokio_test::assert_ok; use tokio_uring::fs; use tempfile::tempdir; #[test] fn basic_create_dir() { tokio_uring::start(async { let base_dir = tempdir().unwrap(); let new_dir = base_dir.path().join("foo"); let new_dir_2 = new_dir.clone(); assert_ok!(fs::create_dir(new_dir).await); assert!(new_dir_2.is_dir()); }); } #[test] fn basic_remove_dir() { tokio_uring::start(async { let temp_dir = tempfile::TempDir::new().unwrap(); tokio_uring::fs::remove_dir(temp_dir.path()).await.unwrap(); assert!(std::fs::metadata(temp_dir.path()).is_err()); }); } tokio-uring-0.5.0/tests/fs_file.rs000064400000000000000000000214651046102023000152240ustar 00000000000000use std::{ io::prelude::*, os::unix::io::{AsRawFd, FromRawFd, RawFd}, }; use tempfile::NamedTempFile; use tokio_uring::buf::fixed::FixedBufRegistry; use tokio_uring::buf::{BoundedBuf, BoundedBufMut}; use tokio_uring::fs::File; #[path = "../src/future.rs"] #[allow(warnings)] mod future; const HELLO: &[u8] = b"hello world..."; async fn read_hello(file: &File) { let buf = Vec::with_capacity(1024); let (res, buf) = file.read_at(buf, 0).await; let n = res.unwrap(); assert_eq!(n, HELLO.len()); assert_eq!(&buf[..n], HELLO); } #[test] fn basic_read() { tokio_uring::start(async { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); let file = File::open(tempfile.path()).await.unwrap(); read_hello(&file).await; }); } #[test] fn basic_read_exact() { tokio_uring::start(async { let data = HELLO.repeat(1000); let buf = Vec::with_capacity(data.len()); let mut tempfile = tempfile(); tempfile.write_all(&data).unwrap(); let file = File::open(tempfile.path()).await.unwrap(); let (res, buf) = file.read_exact_at(buf, 0).await; res.unwrap(); assert_eq!(buf, data); }); } #[test] fn basic_write() { tokio_uring::start(async { let tempfile = tempfile(); let file = File::create(tempfile.path()).await.unwrap(); file.write_at(HELLO, 0).submit().await.0.unwrap(); let file = std::fs::read(tempfile.path()).unwrap(); assert_eq!(file, HELLO); }); } #[test] fn vectored_read() { tokio_uring::start(async { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); let file = File::open(tempfile.path()).await.unwrap(); let bufs = vec![Vec::::with_capacity(5), Vec::::with_capacity(9)]; let (res, bufs) = file.readv_at(bufs, 0).await; let n = res.unwrap(); assert_eq!(n, HELLO.len()); assert_eq!(bufs[1][0], b' '); }); } #[test] fn vectored_write() { tokio_uring::start(async { let tempfile = tempfile(); let file = File::create(tempfile.path()).await.unwrap(); let buf1 = "hello".to_owned().into_bytes(); let buf2 = " world...".to_owned().into_bytes(); let bufs = vec![buf1, buf2]; file.writev_at(bufs, 0).await.0.unwrap(); let file = std::fs::read(tempfile.path()).unwrap(); assert_eq!(file, HELLO); }); } #[test] fn basic_write_all() { tokio_uring::start(async { let data = HELLO.repeat(1000); let tempfile = tempfile(); let file = File::create(tempfile.path()).await.unwrap(); let (ret, data) = file.write_all_at(data, 0).await; ret.unwrap(); let file = std::fs::read(tempfile.path()).unwrap(); assert_eq!(file, data); }); } #[test] fn cancel_read() { tokio_uring::start(async { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); let file = File::open(tempfile.path()).await.unwrap(); // Poll the future once, then cancel it poll_once(async { read_hello(&file).await }).await; read_hello(&file).await; }); } #[test] fn explicit_close() { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); tokio_uring::start(async { let file = File::open(tempfile.path()).await.unwrap(); let fd = file.as_raw_fd(); file.close().await.unwrap(); assert_invalid_fd(fd); }) } #[test] fn drop_open() { tokio_uring::start(async { let tempfile = tempfile(); let _ = File::create(tempfile.path()); // Do something else let file = File::create(tempfile.path()).await.unwrap(); file.write_at(HELLO, 0).submit().await.0.unwrap(); let file = std::fs::read(tempfile.path()).unwrap(); assert_eq!(file, HELLO); }); } #[test] fn drop_off_runtime() { let file = tokio_uring::start(async { let tempfile = tempfile(); File::open(tempfile.path()).await.unwrap() }); let fd = file.as_raw_fd(); drop(file); assert_invalid_fd(fd); } #[test] fn sync_doesnt_kill_anything() { let tempfile = tempfile(); tokio_uring::start(async { let file = File::create(tempfile.path()).await.unwrap(); file.sync_all().await.unwrap(); file.sync_data().await.unwrap(); file.write_at(&b"foo"[..], 0).submit().await.0.unwrap(); file.sync_all().await.unwrap(); file.sync_data().await.unwrap(); }); } #[test] fn rename() { use std::ffi::OsStr; tokio_uring::start(async { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); let old_path = tempfile.path(); let old_file = File::open(old_path).await.unwrap(); read_hello(&old_file).await; old_file.close().await.unwrap(); let mut new_file_name = old_path .file_name() .unwrap_or_else(|| OsStr::new("")) .to_os_string(); new_file_name.push("_renamed"); let new_path = old_path.with_file_name(new_file_name); tokio_uring::fs::rename(&old_path, &new_path).await.unwrap(); let new_file = File::open(&new_path).await.unwrap(); read_hello(&new_file).await; let old_file = File::open(old_path).await; assert!(old_file.is_err()); // Since the file has been renamed, it won't be deleted // in the TempPath destructor. We have to manually delete it. std::fs::remove_file(&new_path).unwrap(); }) } #[test] fn read_fixed() { tokio_uring::start(async { let mut tempfile = tempfile(); tempfile.write_all(HELLO).unwrap(); let buffers = FixedBufRegistry::new([Vec::with_capacity(6), Vec::with_capacity(1024)]); buffers.register().unwrap(); let file = File::open(tempfile.path()).await.unwrap(); let fixed_buf = buffers.check_out(0).unwrap(); assert_eq!(fixed_buf.bytes_total(), 6); let (res, buf) = file.read_fixed_at(fixed_buf.slice(..), 0).await; let n = res.unwrap(); assert_eq!(n, 6); assert_eq!(&buf[..], &HELLO[..6]); let fixed_buf = buffers.check_out(1).unwrap(); assert_eq!(fixed_buf.bytes_total(), 1024); let (res, buf) = file.read_fixed_at(fixed_buf.slice(..), 6).await; let n = res.unwrap(); assert_eq!(n, HELLO.len() - 6); assert_eq!(&buf[..], &HELLO[6..]); }); } #[test] fn write_fixed() { tokio_uring::start(async { let tempfile = tempfile(); let file = File::create(tempfile.path()).await.unwrap(); let buffers = FixedBufRegistry::new([Vec::with_capacity(6), Vec::with_capacity(1024)]); buffers.register().unwrap(); let fixed_buf = buffers.check_out(0).unwrap(); let mut buf = fixed_buf; buf.put_slice(&HELLO[..6]); let (res, _) = file.write_fixed_at(buf, 0).await; let n = res.unwrap(); assert_eq!(n, 6); let fixed_buf = buffers.check_out(1).unwrap(); let mut buf = fixed_buf; buf.put_slice(&HELLO[6..]); let (res, _) = file.write_fixed_at(buf, 6).await; let n = res.unwrap(); assert_eq!(n, HELLO.len() - 6); let file = std::fs::read(tempfile.path()).unwrap(); assert_eq!(file, HELLO); }); } #[test] fn basic_fallocate() { tokio_uring::start(async { let tempfile = tempfile(); let file = File::create(tempfile.path()).await.unwrap(); file.fallocate(0, 1024, libc::FALLOC_FL_ZERO_RANGE) .await .unwrap(); file.sync_all().await.unwrap(); let statx = file.statx().await.unwrap(); let size = statx.stx_size; assert_eq!(size, 1024); // using the FALLOC_FL_KEEP_SIZE flag causes the file metadata to reflect the previous size file.fallocate( 0, 2048, libc::FALLOC_FL_ZERO_RANGE | libc::FALLOC_FL_KEEP_SIZE, ) .await .unwrap(); file.sync_all().await.unwrap(); let statx = file.statx().await.unwrap(); let size = statx.stx_size; assert_eq!(size, 1024); }); } fn tempfile() -> NamedTempFile { NamedTempFile::new().unwrap() } async fn poll_once(future: impl std::future::Future) { use std::future::poll_fn; // use std::future::Future; use std::task::Poll; use tokio::pin; pin!(future); poll_fn(|cx| { assert!(future.as_mut().poll(cx).is_pending()); Poll::Ready(()) }) .await; } fn assert_invalid_fd(fd: RawFd) { use std::fs::File; let mut f = unsafe { File::from_raw_fd(fd) }; let mut buf = vec![]; match f.read_to_end(&mut buf) { Err(ref e) if e.raw_os_error() == Some(libc::EBADF) => {} res => panic!("assert_invalid_fd finds for fd {:?}, res = {:?}", fd, res), } } tokio-uring-0.5.0/tests/runtime.rs000064400000000000000000000016061046102023000152730ustar 00000000000000use tokio::net::{TcpListener, TcpStream}; #[test] fn use_tokio_types_from_runtime() { tokio_uring::start(async { let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); let addr = listener.local_addr().unwrap(); let task = tokio::spawn(async move { let _socket = TcpStream::connect(addr).await.unwrap(); }); // Accept a connection let (_socket, _) = listener.accept().await.unwrap(); // Wait for the task to complete task.await.unwrap(); }); } #[test] fn spawn_a_task() { use std::cell::RefCell; use std::rc::Rc; tokio_uring::start(async { let cell = Rc::new(RefCell::new(1)); let c = cell.clone(); let handle = tokio_uring::spawn(async move { *c.borrow_mut() = 2; }); handle.await.unwrap(); assert_eq!(2, *cell.borrow()); }); }