rustix/backend/libc/thread/
syscalls.rs

1//! libc syscalls supporting `rustix::thread`.
2
3use crate::backend::c;
4use crate::backend::conv::ret;
5use crate::io;
6#[cfg(not(any(
7    apple,
8    freebsdlike,
9    target_os = "emscripten",
10    target_os = "espidf",
11    target_os = "haiku",
12    target_os = "openbsd",
13    target_os = "redox",
14    target_os = "vita",
15    target_os = "wasi",
16)))]
17use crate::thread::ClockId;
18#[cfg(not(target_os = "redox"))]
19use crate::thread::{NanosleepRelativeResult, Timespec};
20#[cfg(all(target_env = "gnu", fix_y2038))]
21use crate::timespec::LibcTimespec;
22#[cfg(all(
23    linux_kernel,
24    target_pointer_width = "32",
25    not(any(target_arch = "aarch64", target_arch = "x86_64"))
26))]
27use crate::utils::option_as_ptr;
28use core::mem::MaybeUninit;
29#[cfg(linux_kernel)]
30use core::sync::atomic::AtomicU32;
31#[cfg(linux_kernel)]
32use {
33    crate::backend::conv::{borrowed_fd, ret_c_int, ret_usize},
34    crate::fd::BorrowedFd,
35    crate::pid::Pid,
36    crate::thread::futex,
37    crate::utils::as_mut_ptr,
38};
39
40#[cfg(all(target_env = "gnu", fix_y2038))]
41weak!(fn __clock_nanosleep_time64(c::clockid_t, c::c_int, *const LibcTimespec, *mut LibcTimespec) -> c::c_int);
42#[cfg(all(target_env = "gnu", fix_y2038))]
43weak!(fn __nanosleep64(*const LibcTimespec, *mut LibcTimespec) -> c::c_int);
44
45#[cfg(not(any(
46    apple,
47    target_os = "dragonfly",
48    target_os = "emscripten",
49    target_os = "espidf",
50    target_os = "freebsd", // FreeBSD 12 has clock_nanosleep, but libc targets FreeBSD 11.
51    target_os = "haiku",
52    target_os = "openbsd",
53    target_os = "redox",
54    target_os = "vita",
55    target_os = "wasi",
56)))]
57#[inline]
58pub(crate) fn clock_nanosleep_relative(id: ClockId, request: &Timespec) -> NanosleepRelativeResult {
59    // Old 32-bit version: libc has `clock_nanosleep` but it is not y2038 safe
60    // by default. But there may be a `__clock_nanosleep_time64` we can use.
61    #[cfg(fix_y2038)]
62    {
63        #[cfg(target_env = "gnu")]
64        if let Some(libc_clock_nanosleep) = __clock_nanosleep_time64.get() {
65            let flags = 0;
66            let mut remain = MaybeUninit::<LibcTimespec>::uninit();
67
68            unsafe {
69                return match libc_clock_nanosleep(
70                    id as c::clockid_t,
71                    flags,
72                    &request.clone().into(),
73                    remain.as_mut_ptr(),
74                ) {
75                    0 => NanosleepRelativeResult::Ok,
76                    err if err == io::Errno::INTR.0 => {
77                        NanosleepRelativeResult::Interrupted(remain.assume_init().into())
78                    }
79                    err => NanosleepRelativeResult::Err(io::Errno(err)),
80                };
81            }
82        }
83
84        clock_nanosleep_relative_old(id, request)
85    }
86
87    // Main version: libc is y2038 safe and has `clock_nanosleep`.
88    #[cfg(not(fix_y2038))]
89    unsafe {
90        let flags = 0;
91        let mut remain = MaybeUninit::<Timespec>::uninit();
92
93        match c::clock_nanosleep(id as c::clockid_t, flags, request, remain.as_mut_ptr()) {
94            0 => NanosleepRelativeResult::Ok,
95            err if err == io::Errno::INTR.0 => {
96                NanosleepRelativeResult::Interrupted(remain.assume_init())
97            }
98            err => NanosleepRelativeResult::Err(io::Errno(err)),
99        }
100    }
101}
102
103#[cfg(all(
104    fix_y2038,
105    not(any(
106        apple,
107        target_os = "emscripten",
108        target_os = "haiku",
109        target_os = "vita"
110    ))
111))]
112fn clock_nanosleep_relative_old(
113    id: crate::clockid::ClockId,
114    request: &Timespec,
115) -> NanosleepRelativeResult {
116    let tv_sec = match request.tv_sec.try_into() {
117        Ok(tv_sec) => tv_sec,
118        Err(_) => return NanosleepRelativeResult::Err(io::Errno::OVERFLOW),
119    };
120    let tv_nsec = match request.tv_nsec.try_into() {
121        Ok(tv_nsec) => tv_nsec,
122        Err(_) => return NanosleepRelativeResult::Err(io::Errno::INVAL),
123    };
124    let old_request = c::timespec { tv_sec, tv_nsec };
125    let mut old_remain = MaybeUninit::<c::timespec>::uninit();
126    let flags = 0;
127
128    unsafe {
129        match c::clock_nanosleep(
130            id as c::clockid_t,
131            flags,
132            &old_request,
133            old_remain.as_mut_ptr(),
134        ) {
135            0 => NanosleepRelativeResult::Ok,
136            err if err == io::Errno::INTR.0 => {
137                let old_remain = old_remain.assume_init();
138                let remain = Timespec {
139                    tv_sec: old_remain.tv_sec.into(),
140                    tv_nsec: old_remain.tv_nsec.into(),
141                };
142                NanosleepRelativeResult::Interrupted(remain)
143            }
144            err => NanosleepRelativeResult::Err(io::Errno(err)),
145        }
146    }
147}
148
149#[cfg(not(any(
150    apple,
151    target_os = "dragonfly",
152    target_os = "emscripten",
153    target_os = "espidf",
154    target_os = "freebsd", // FreeBSD 12 has clock_nanosleep, but libc targets FreeBSD 11.
155    target_os = "haiku",
156    target_os = "openbsd",
157    target_os = "redox",
158    target_os = "vita",
159    target_os = "wasi",
160)))]
161#[inline]
162pub(crate) fn clock_nanosleep_absolute(id: ClockId, request: &Timespec) -> io::Result<()> {
163    // Old 32-bit version: libc has `clock_nanosleep` but it is not y2038 safe
164    // by default. But there may be a `__clock_nanosleep_time64` we can use.
165    #[cfg(fix_y2038)]
166    {
167        #[cfg(target_env = "gnu")]
168        if let Some(libc_clock_nanosleep) = __clock_nanosleep_time64.get() {
169            let flags = c::TIMER_ABSTIME;
170            unsafe {
171                return match {
172                    libc_clock_nanosleep(
173                        id as c::clockid_t,
174                        flags,
175                        &request.clone().into(),
176                        core::ptr::null_mut(),
177                    )
178                } {
179                    0 => Ok(()),
180                    err => Err(io::Errno(err)),
181                };
182            }
183        }
184
185        clock_nanosleep_absolute_old(id, request)
186    }
187
188    // Main version: libc is y2038 safe and has `clock_nanosleep`.
189    #[cfg(not(fix_y2038))]
190    {
191        let flags = c::TIMER_ABSTIME;
192
193        match unsafe {
194            c::clock_nanosleep(
195                id as c::clockid_t,
196                flags as _,
197                request,
198                core::ptr::null_mut(),
199            )
200        } {
201            0 => Ok(()),
202            err => Err(io::Errno(err)),
203        }
204    }
205}
206
207#[cfg(all(
208    fix_y2038,
209    not(any(
210        apple,
211        target_os = "emscripten",
212        target_os = "haiku",
213        target_os = "vita"
214    ))
215))]
216fn clock_nanosleep_absolute_old(id: crate::clockid::ClockId, request: &Timespec) -> io::Result<()> {
217    let flags = c::TIMER_ABSTIME;
218
219    let old_request = c::timespec {
220        tv_sec: request.tv_sec.try_into().map_err(|_| io::Errno::OVERFLOW)?,
221        tv_nsec: request.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?,
222    };
223    match unsafe {
224        c::clock_nanosleep(
225            id as c::clockid_t,
226            flags,
227            &old_request,
228            core::ptr::null_mut(),
229        )
230    } {
231        0 => Ok(()),
232        err => Err(io::Errno(err)),
233    }
234}
235
236#[cfg(not(target_os = "redox"))]
237#[inline]
238pub(crate) fn nanosleep(request: &Timespec) -> NanosleepRelativeResult {
239    // Old 32-bit version: libc has `nanosleep` but it is not y2038 safe by
240    // default. But there may be a `__nanosleep64` we can use.
241    #[cfg(fix_y2038)]
242    {
243        #[cfg(target_env = "gnu")]
244        if let Some(libc_nanosleep) = __nanosleep64.get() {
245            let mut remain = MaybeUninit::<LibcTimespec>::uninit();
246            unsafe {
247                return match ret(libc_nanosleep(&request.clone().into(), remain.as_mut_ptr())) {
248                    Ok(()) => NanosleepRelativeResult::Ok,
249                    Err(io::Errno::INTR) => {
250                        NanosleepRelativeResult::Interrupted(remain.assume_init().into())
251                    }
252                    Err(err) => NanosleepRelativeResult::Err(err),
253                };
254            }
255        }
256
257        nanosleep_old(request)
258    }
259
260    // Main version: libc is y2038 safe and has `nanosleep`.
261    #[cfg(not(fix_y2038))]
262    unsafe {
263        let mut remain = MaybeUninit::<Timespec>::uninit();
264
265        match ret(c::nanosleep(request, remain.as_mut_ptr())) {
266            Ok(()) => NanosleepRelativeResult::Ok,
267            Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(remain.assume_init()),
268            Err(err) => NanosleepRelativeResult::Err(err),
269        }
270    }
271}
272
273#[cfg(fix_y2038)]
274fn nanosleep_old(request: &Timespec) -> NanosleepRelativeResult {
275    let tv_sec = match request.tv_sec.try_into() {
276        Ok(tv_sec) => tv_sec,
277        Err(_) => return NanosleepRelativeResult::Err(io::Errno::OVERFLOW),
278    };
279    let tv_nsec = match request.tv_nsec.try_into() {
280        Ok(tv_nsec) => tv_nsec,
281        Err(_) => return NanosleepRelativeResult::Err(io::Errno::INVAL),
282    };
283    let old_request = c::timespec { tv_sec, tv_nsec };
284    let mut old_remain = MaybeUninit::<c::timespec>::uninit();
285
286    unsafe {
287        match ret(c::nanosleep(&old_request, old_remain.as_mut_ptr())) {
288            Ok(()) => NanosleepRelativeResult::Ok,
289            Err(io::Errno::INTR) => {
290                let old_remain = old_remain.assume_init();
291                let remain = Timespec {
292                    tv_sec: old_remain.tv_sec.into(),
293                    tv_nsec: old_remain.tv_nsec.into(),
294                };
295                NanosleepRelativeResult::Interrupted(remain)
296            }
297            Err(err) => NanosleepRelativeResult::Err(err),
298        }
299    }
300}
301
302#[cfg(linux_kernel)]
303#[inline]
304#[must_use]
305pub(crate) fn gettid() -> Pid {
306    // `gettid` wasn't supported in glibc until 2.30, and musl until 1.2.2,
307    // so use `syscall`.
308    // <https://sourceware.org/bugzilla/show_bug.cgi?id=6399#c62>
309    weak_or_syscall! {
310        fn gettid() via SYS_gettid -> c::pid_t
311    }
312
313    unsafe {
314        let tid = gettid();
315        Pid::from_raw_unchecked(tid)
316    }
317}
318
319#[cfg(linux_kernel)]
320#[inline]
321pub(crate) fn setns(fd: BorrowedFd<'_>, nstype: c::c_int) -> io::Result<c::c_int> {
322    // `setns` wasn't supported in glibc until 2.14, and musl until 0.9.5,
323    // so use `syscall`.
324    weak_or_syscall! {
325        fn setns(fd: c::c_int, nstype: c::c_int) via SYS_setns -> c::c_int
326    }
327
328    unsafe { ret_c_int(setns(borrowed_fd(fd), nstype)) }
329}
330
331#[cfg(linux_kernel)]
332#[inline]
333pub(crate) fn unshare(flags: crate::thread::UnshareFlags) -> io::Result<()> {
334    unsafe { ret(c::unshare(flags.bits() as i32)) }
335}
336
337#[cfg(linux_kernel)]
338#[inline]
339pub(crate) fn capget(
340    header: &mut linux_raw_sys::general::__user_cap_header_struct,
341    data: &mut [MaybeUninit<linux_raw_sys::general::__user_cap_data_struct>],
342) -> io::Result<()> {
343    syscall! {
344        fn capget(
345            hdrp: *mut linux_raw_sys::general::__user_cap_header_struct,
346            data: *mut linux_raw_sys::general::__user_cap_data_struct
347        ) via SYS_capget -> c::c_int
348    }
349
350    unsafe {
351        ret(capget(
352            as_mut_ptr(header),
353            data.as_mut_ptr()
354                .cast::<linux_raw_sys::general::__user_cap_data_struct>(),
355        ))
356    }
357}
358
359#[cfg(linux_kernel)]
360#[inline]
361pub(crate) fn capset(
362    header: &mut linux_raw_sys::general::__user_cap_header_struct,
363    data: &[linux_raw_sys::general::__user_cap_data_struct],
364) -> io::Result<()> {
365    syscall! {
366        fn capset(
367            hdrp: *mut linux_raw_sys::general::__user_cap_header_struct,
368            data: *const linux_raw_sys::general::__user_cap_data_struct
369        ) via SYS_capset -> c::c_int
370    }
371
372    unsafe { ret(capset(as_mut_ptr(header), data.as_ptr())) }
373}
374
375#[cfg(linux_kernel)]
376#[inline]
377pub(crate) fn setuid_thread(uid: crate::ugid::Uid) -> io::Result<()> {
378    syscall! {
379        fn setuid(uid: c::uid_t) via SYS_setuid -> c::c_int
380    }
381
382    unsafe { ret(setuid(uid.as_raw())) }
383}
384
385#[cfg(linux_kernel)]
386#[inline]
387pub(crate) fn setresuid_thread(
388    ruid: crate::ugid::Uid,
389    euid: crate::ugid::Uid,
390    suid: crate::ugid::Uid,
391) -> io::Result<()> {
392    #[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc"))]
393    const SYS: c::c_long = c::SYS_setresuid32 as c::c_long;
394    #[cfg(not(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc")))]
395    const SYS: c::c_long = c::SYS_setresuid as c::c_long;
396
397    syscall! {
398        fn setresuid(ruid: c::uid_t, euid: c::uid_t, suid: c::uid_t) via SYS -> c::c_int
399    }
400
401    unsafe { ret(setresuid(ruid.as_raw(), euid.as_raw(), suid.as_raw())) }
402}
403
404#[cfg(linux_kernel)]
405#[inline]
406pub(crate) fn setgid_thread(gid: crate::ugid::Gid) -> io::Result<()> {
407    syscall! {
408        fn setgid(gid: c::gid_t) via SYS_setgid -> c::c_int
409    }
410
411    unsafe { ret(setgid(gid.as_raw())) }
412}
413
414#[cfg(linux_kernel)]
415#[inline]
416pub(crate) fn setresgid_thread(
417    rgid: crate::ugid::Gid,
418    egid: crate::ugid::Gid,
419    sgid: crate::ugid::Gid,
420) -> io::Result<()> {
421    #[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc"))]
422    const SYS: c::c_long = c::SYS_setresgid32 as c::c_long;
423    #[cfg(not(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc")))]
424    const SYS: c::c_long = c::SYS_setresgid as c::c_long;
425
426    syscall! {
427        fn setresgid(rgid: c::gid_t, egid: c::gid_t, sgid: c::gid_t) via SYS -> c::c_int
428    }
429
430    unsafe { ret(setresgid(rgid.as_raw(), egid.as_raw(), sgid.as_raw())) }
431}
432
433/// # Safety
434///
435/// The raw pointers must point to valid aligned memory.
436#[cfg(linux_kernel)]
437pub(crate) unsafe fn futex_val2(
438    uaddr: *const AtomicU32,
439    op: super::futex::Operation,
440    flags: futex::Flags,
441    val: u32,
442    val2: u32,
443    uaddr2: *const AtomicU32,
444    val3: u32,
445) -> io::Result<usize> {
446    // Pass `val2` in the least-significant bytes of the `timeout` argument.
447    // [“the kernel casts the timeout value first to unsigned long, then to
448    // uint32_t”], so we perform that exact conversion in reverse to create
449    // the pointer.
450    //
451    // [“the kernel casts the timeout value first to unsigned long, then to uint32_t”]: https://man7.org/linux/man-pages/man2/futex.2.html
452    let timeout = val2 as usize as *const Timespec;
453
454    #[cfg(all(
455        target_pointer_width = "32",
456        not(any(target_arch = "aarch64", target_arch = "x86_64"))
457    ))]
458    {
459        // TODO: Upstream this to the libc crate.
460        #[allow(non_upper_case_globals)]
461        const SYS_futex_time64: i32 = linux_raw_sys::general::__NR_futex_time64 as i32;
462
463        syscall! {
464            fn futex_time64(
465                uaddr: *const AtomicU32,
466                futex_op: c::c_int,
467                val: u32,
468                timeout: *const Timespec,
469                uaddr2: *const AtomicU32,
470                val3: u32
471            ) via SYS_futex_time64 -> c::ssize_t
472        }
473
474        ret_usize(futex_time64(
475            uaddr,
476            op as i32 | flags.bits() as i32,
477            val,
478            timeout,
479            uaddr2,
480            val3,
481        ))
482    }
483
484    #[cfg(any(
485        target_pointer_width = "64",
486        target_arch = "aarch64",
487        target_arch = "x86_64"
488    ))]
489    {
490        syscall! {
491            fn futex(
492                uaddr: *const AtomicU32,
493                futex_op: c::c_int,
494                val: u32,
495                timeout: *const linux_raw_sys::general::__kernel_timespec,
496                uaddr2: *const AtomicU32,
497                val3: u32
498            ) via SYS_futex -> c::c_long
499        }
500
501        ret_usize(futex(
502            uaddr,
503            op as i32 | flags.bits() as i32,
504            val,
505            timeout.cast(),
506            uaddr2,
507            val3,
508        ) as isize)
509    }
510}
511
512/// # Safety
513///
514/// The raw pointers must point to valid aligned memory.
515#[cfg(linux_kernel)]
516pub(crate) unsafe fn futex_timeout(
517    uaddr: *const AtomicU32,
518    op: super::futex::Operation,
519    flags: futex::Flags,
520    val: u32,
521    timeout: *const Timespec,
522    uaddr2: *const AtomicU32,
523    val3: u32,
524) -> io::Result<usize> {
525    #[cfg(all(
526        target_pointer_width = "32",
527        not(any(target_arch = "aarch64", target_arch = "x86_64"))
528    ))]
529    {
530        // TODO: Upstream this to the libc crate.
531        #[allow(non_upper_case_globals)]
532        const SYS_futex_time64: i32 = linux_raw_sys::general::__NR_futex_time64 as i32;
533
534        syscall! {
535            fn futex_time64(
536                uaddr: *const AtomicU32,
537                futex_op: c::c_int,
538                val: u32,
539                timeout: *const Timespec,
540                uaddr2: *const AtomicU32,
541                val3: u32
542            ) via SYS_futex_time64 -> c::ssize_t
543        }
544
545        ret_usize(futex_time64(
546            uaddr,
547            op as i32 | flags.bits() as i32,
548            val,
549            timeout,
550            uaddr2,
551            val3,
552        ))
553        .or_else(|err| {
554            // See the comments in `rustix_clock_gettime_via_syscall` about
555            // emulation.
556            if err == io::Errno::NOSYS {
557                futex_old_timespec(uaddr, op, flags, val, timeout, uaddr2, val3)
558            } else {
559                Err(err)
560            }
561        })
562    }
563
564    #[cfg(any(
565        target_pointer_width = "64",
566        target_arch = "aarch64",
567        target_arch = "x86_64"
568    ))]
569    {
570        syscall! {
571            fn futex(
572                uaddr: *const AtomicU32,
573                futex_op: c::c_int,
574                val: u32,
575                timeout: *const linux_raw_sys::general::__kernel_timespec,
576                uaddr2: *const AtomicU32,
577                val3: u32
578            ) via SYS_futex -> c::c_long
579        }
580
581        ret_usize(futex(
582            uaddr,
583            op as i32 | flags.bits() as i32,
584            val,
585            timeout.cast(),
586            uaddr2,
587            val3,
588        ) as isize)
589    }
590}
591
592/// # Safety
593///
594/// The raw pointers must point to valid aligned memory.
595#[cfg(linux_kernel)]
596#[cfg(all(
597    target_pointer_width = "32",
598    not(any(target_arch = "aarch64", target_arch = "x86_64"))
599))]
600unsafe fn futex_old_timespec(
601    uaddr: *const AtomicU32,
602    op: super::futex::Operation,
603    flags: futex::Flags,
604    val: u32,
605    timeout: *const Timespec,
606    uaddr2: *const AtomicU32,
607    val3: u32,
608) -> io::Result<usize> {
609    syscall! {
610        fn futex(
611            uaddr: *const AtomicU32,
612            futex_op: c::c_int,
613            val: u32,
614            timeout: *const linux_raw_sys::general::__kernel_old_timespec,
615            uaddr2: *const AtomicU32,
616            val3: u32
617        ) via SYS_futex -> c::c_long
618    }
619
620    let old_timeout = if timeout.is_null() {
621        None
622    } else {
623        Some(linux_raw_sys::general::__kernel_old_timespec {
624            tv_sec: (*timeout).tv_sec.try_into().map_err(|_| io::Errno::INVAL)?,
625            tv_nsec: (*timeout)
626                .tv_nsec
627                .try_into()
628                .map_err(|_| io::Errno::INVAL)?,
629        })
630    };
631    ret_usize(futex(
632        uaddr,
633        op as i32 | flags.bits() as i32,
634        val,
635        option_as_ptr(old_timeout.as_ref()),
636        uaddr2,
637        val3,
638    ) as isize)
639}
640
641#[cfg(linux_kernel)]
642#[inline]
643pub(crate) fn setgroups_thread(groups: &[crate::ugid::Gid]) -> io::Result<()> {
644    syscall! {
645        fn setgroups(size: c::size_t, list: *const c::gid_t) via SYS_setgroups -> c::c_int
646    }
647    ret(unsafe { setgroups(groups.len(), groups.as_ptr().cast()) })
648}