linux-kernelorg-stable/include/uapi/linux
Thomas Gleixner ec2d0c0462 posix-timers: Provide a mechanism to allocate a given timer ID
Checkpoint/Restore in Userspace (CRIU) requires to reconstruct posix timers
with the same timer ID on restore. It uses sys_timer_create() and relies on
the monotonic increasing timer ID provided by this syscall. It creates and
deletes timers until the desired ID is reached. This is can loop for a long
time, when the checkpointed process had a very sparse timer ID range.

It has been debated to implement a new syscall to allow the creation of
timers with a given timer ID, but that's tideous due to the 32/64bit compat
issues of sigevent_t and of dubious value.

The restore mechanism of CRIU creates the timers in a state where all
threads of the restored process are held on a barrier and cannot issue
syscalls. That means the restorer task has exclusive control.

This allows to address this issue with a prctl() so that the restorer
thread can do:

   if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_ON))
      goto linear_mode;
   create_timers_with_explicit_ids();
   prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_OFF);
   
This is backwards compatible because the prctl() fails on older kernels and
CRIU can fall back to the linear timer ID mechanism. CRIU versions which do
not know about the prctl() just work as before.

Implement the prctl() and modify timer_create() so that it copies the
requested timer ID from userspace by utilizing the existing timer_t
pointer, which is used to copy out the allocated timer ID on success.

If the prctl() is disabled, which it is by default, timer_create() works as
before and does not try to read from the userspace pointer.

There is no problem when a broken or rogue user space application enables
the prctl(). If the user space pointer does not contain a valid ID, then
timer_create() fails. If the data is not initialized, but constains a
random valid ID, timer_create() will create that random timer ID or fail if
the ID is already given out. 
 
As CRIU must use the raw syscall to avoid manipulating the internal state
of the restored process, this has no library dependencies and can be
adopted by CRIU right away.

Recreating two timers with IDs 1000000 and 2000000 takes 1.5 seconds with
the create/delete method. With the prctl() it takes 3 microseconds.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com>
Tested-by: Cyrill Gorcunov <gorcunov@gmail.com>
Link: https://lore.kernel.org/all/87jz8vz0en.ffs@tglx
2025-03-13 12:07:18 +01:00
..
android
byteorder
caif
can
cifs
dvb
genwqe
hdlc
hsi
iio
isdn
media/raspberrypi
misc
mmc
netfilter netfilter: conntrack: add conntrack event timestamp 2025-01-09 14:42:16 +01:00
netfilter_arp
netfilter_bridge
netfilter_ipv4
netfilter_ipv6
nfsd
raid md: reintroduce md-linear 2025-01-13 07:36:29 -08:00
sched
spi
sunrpc
surface_aggregator
tc_act
tc_ematch
usb usb: gadget: functionfs: fix spellos 2024-12-04 16:08:34 +01:00
a.out.h
acct.h
acrn.h
adb.h
adfs_fs.h
affs_hardblocks.h
agpgart.h
aio_abi.h
am437x-vpfe.h
amt.h
apm_bios.h
arcfb.h
arm_sdei.h
aspeed-lpc-ctrl.h
aspeed-p2a-ctrl.h
aspeed-video.h
atalk.h
atm.h
atm_eni.h
atm_he.h
atm_idt77105.h
atm_nicstar.h
atm_tcp.h
atm_zatm.h
atmapi.h
atmarp.h
atmbr2684.h
atmclip.h
atmdev.h
atmioc.h
atmlec.h
atmmpc.h
atmppp.h
atmsap.h
atmsvc.h
audit.h ima: instantiate the bprm_creds_for_exec() hook 2024-12-18 17:00:29 -08:00
auto_dev-ioctl.h
auto_fs.h
auto_fs4.h
auxvec.h
ax25.h
batadv_packet.h
batman_adv.h
baycom.h
bcm933xx_hcs.h
bfs_fs.h
binfmts.h
bits.h
blkdev.h
blkpg.h
blktrace_api.h
blkzoned.h
bpf.h bpf: Add fd_array_cnt attribute for prog_load 2024-12-13 14:48:36 -08:00
bpf_common.h
bpf_perf_event.h
bpqether.h
bsg.h
bt-bmc.h
btf.h
btrfs.h
btrfs_tree.h
cachefiles.h
can.h
capability.h
capi.h
cciss_defs.h
cciss_ioctl.h
ccs.h
cdrom.h
cec-funcs.h
cec.h
cfm_bridge.h
cgroupstats.h
chio.h
close_range.h
cn_proc.h
coda.h
coff.h
comedi.h
connector.h
const.h
coresight-stm.h
counter.h
cramfs_fs.h
cryptouser.h
cuda.h
cxl_mem.h
cyclades.h
cycx_cfm.h
dcbnl.h
dccp.h
devlink.h
dlm.h
dlm_device.h
dlm_plock.h
dlmconstants.h
dm-ioctl.h dm-table: atomic writes support 2025-01-17 22:23:47 +01:00
dm-log-userspace.h
dma-buf.h
dma-heap.h
dns_resolver.h
dpll.h
dqblk_xfs.h
dw100.h
edd.h
efs_fs_sb.h
elf-em.h
elf-fdpic.h
elf.h RISC-V Paches for the 6.13 Merge Window, Part 1 2024-11-27 11:19:09 -08:00
errno.h
errqueue.h
erspan.h
ethtool.h net: ethtool: add support for structured PHY statistics 2025-01-14 11:44:19 +01:00
ethtool_netlink.h net: ethtool: add support for structured PHY statistics 2025-01-14 11:44:19 +01:00
ethtool_netlink_generated.h net: ethtool: ts: add separate counter for unconfirmed one-step TX timestamps 2025-01-17 20:01:09 -08:00
eventfd.h
eventpoll.h
exfat.h
ext4.h
f2fs.h
fadvise.h
falloc.h
fanotify.h fanotify: allow to set errno in FAN_DENY permission response 2024-12-10 12:03:17 +01:00
fb.h
fcntl.h exec: Add a new AT_EXECVE_CHECK flag to execveat(2) 2024-12-18 17:00:29 -08:00
fd.h
fdreg.h
fib_rules.h net: fib_rules: Add flow label selector attributes 2024-12-19 16:02:21 +01:00
fiemap.h fiemap: use kernel-doc includes in fiemap docbook 2024-12-22 11:29:50 +01:00
filter.h
firewire-cdev.h
firewire-constants.h
fou.h
fpga-dfl.h
fs.h for-6.14/io_uring-20250119 2025-01-20 20:27:33 -08:00
fscrypt.h
fsi.h
fsl_hypervisor.h
fsl_mc.h
fsmap.h
fsverity.h
fuse.h fuse: {io-uring} Handle SQEs - register commands 2025-01-24 11:54:08 +01:00
futex.h
gameport.h
gen_stats.h
genetlink.h
gfs2_ondisk.h
gpio.h
gsmmux.h
gtp.h
handshake.h
hash_info.h
hdlc.h
hdlcdrv.h
hdreg.h
hid.h
hiddev.h
hidraw.h
hpet.h
hsr_netlink.h
hw_breakpoint.h
hyperv.h
i2c-dev.h
i2c.h
i2o-dev.h
i8k.h
icmp.h
icmpv6.h
idxd.h
if.h
if_addr.h
if_addrlabel.h
if_alg.h
if_arcnet.h
if_arp.h
if_bonding.h
if_bridge.h
if_cablemodem.h
if_eql.h
if_ether.h
if_fc.h
if_fddi.h
if_hippi.h
if_infiniband.h
if_link.h bpf-next-for-netdev 2025-01-07 15:39:09 -08:00
if_ltalk.h
if_macsec.h
if_packet.h
if_phonet.h
if_plip.h
if_ppp.h
if_pppol2tp.h
if_pppox.h
if_slip.h
if_team.h
if_tun.h
if_tunnel.h
if_vlan.h
if_x25.h
if_xdp.h
ife.h
igmp.h
ila.h
in.h include: uapi: protocol number and packet structs for AGGFRAG in ESP 2024-12-05 10:01:09 +01:00
in6.h
in_route.h
inet_diag.h
inotify.h
input-event-codes.h Input: allocate keycode for phone linking 2025-01-15 16:26:41 +02:00
input.h
io_uring.h io_uring: expose read/write attribute capability 2025-01-10 17:12:42 -07:00
ioam6.h
ioam6_genl.h
ioam6_iptunnel.h
ioctl.h
iommufd.h iommufd: Fix struct iommu_hwpt_pgfault init and padding 2025-01-21 13:55:49 -04:00
ioprio.h
ip.h include: uapi: protocol number and packet structs for AGGFRAG in ESP 2024-12-05 10:01:09 +01:00
ip6_tunnel.h
ip_vs.h
ipc.h
ipmi.h
ipmi_bmc.h
ipmi_msgdefs.h
ipmi_ssif_bmc.h
ipsec.h xfrm: add generic iptfs defines and functionality 2024-12-05 10:01:28 +01:00
ipv6.h
ipv6_route.h
irqnr.h
iso_fs.h
isst_if.h
ivtv.h
ivtvfb.h
jffs2.h
joystick.h
kcm.h
kcmp.h
kcov.h
kd.h
kdev_t.h
kernel-page-flags.h
kernel.h
kernelcapi.h
kexec.h
keyboard.h
keyctl.h
kfd_ioctl.h
kfd_sysfs.h
kvm.h KVM: x86: Drop the now unused KVM_X86_DISABLE_VALID_EXITS 2024-12-18 14:19:37 -08:00
kvm_para.h
l2tp.h
landlock.h
libc-compat.h
limits.h
lirc.h
llc.h
loadpin.h
loop.h
lp.h
lsm.h
lwtunnel.h
magic.h
major.h
map_to_7segment.h
map_to_14segment.h
matroxfb.h
max2175.h
mctp.h
mdio.h net: mdio: add definition for clock stop capable bit 2025-01-16 17:22:59 -08:00
media-bus-format.h
media.h
mei.h
mei_uuid.h
membarrier.h
memfd.h
mempolicy.h
mii.h
minix_fs.h
mman.h
mmtimer.h
module.h
mount.h
mpls.h
mpls_iptunnel.h
mptcp.h
mptcp_pm.h netlink: specs: mptcp: clearly mention attributes 2024-12-27 11:16:21 -08:00
mqueue.h
mroute.h
mroute6.h
mrp_bridge.h
msdos_fs.h
msg.h
mtio.h
nbd-netlink.h
nbd.h
ncsi.h
ndctl.h
neighbour.h
net.h
net_dropmon.h
net_namespace.h
net_shaper.h
net_tstamp.h net: Add the possibility to support a selected hwtstamp in netdevice 2024-12-16 12:51:40 +00:00
netconf.h
netdev.h
netdevice.h
netfilter.h
netfilter_arp.h
netfilter_bridge.h
netfilter_ipv4.h
netfilter_ipv6.h
netlink.h
netlink_diag.h
netrom.h
nexthop.h
nfc.h
nfs.h
nfs2.h
nfs3.h
nfs4.h nfsd: rework NFS4_SHARE_WANT_* flag handling 2025-01-21 15:30:01 -05:00
nfs4_mount.h
nfs_fs.h
nfs_idmap.h
nfs_mount.h
nfsacl.h
nfsd_netlink.h
nilfs2_api.h
nilfs2_ondisk.h
nitro_enclaves.h
nl80211-vnd-intel.h
nl80211.h wifi: cfg80211: Add support for controlling EPCS 2025-01-13 15:34:09 +01:00
npcm-video.h
nsfs.h
nsm.h
ntsync.h ntsync: Introduce alertable waits. 2025-01-08 13:18:11 +01:00
nubus.h
nvme_ioctl.h
nvram.h
omap3isp.h
omapfb.h
oom.h
openat2.h
openvswitch.h
packet_diag.h
papr_pdsm.h
param.h
parport.h
patchkey.h
pci.h
pci_regs.h Merge branch 'pci/misc' 2025-01-23 13:05:06 -06:00
pcitest.h misc: pci_endpoint_test: Add consecutive BAR test 2025-01-21 09:44:14 -06:00
perf_event.h
personality.h
pfkeyv2.h
pfrut.h
pg.h
phantom.h
phonet.h
pidfd.h
pkt_cls.h
pkt_sched.h
pktcdvd.h
pmu.h
poll.h
posix_acl.h
posix_acl_xattr.h
posix_types.h
ppdev.h
ppp-comp.h
ppp-ioctl.h
ppp_defs.h
pps.h
pps_gen.h drivers pps: add PPS generators support 2025-01-08 13:18:09 +01:00
pr.h
prctl.h posix-timers: Provide a mechanism to allocate a given timer ID 2025-03-13 12:07:18 +01:00
psample.h
psci.h
psp-dbc.h
psp-sev.h
ptp_clock.h
ptrace.h
qemu_fw_cfg.h
qnx4_fs.h
qnxtypes.h
qrtr.h
quota.h
radeonfb.h
random.h
rds.h
reboot.h
remoteproc_cdev.h
resource.h
rfkill.h
rio_cm_cdev.h
rio_mport_cdev.h
rkisp1-config.h
romfs_fs.h
rose.h
route.h
rpl.h
rpl_iptunnel.h
rpmsg.h
rpmsg_types.h
rseq.h
rtc.h
rtnetlink.h netlink: add IPv6 anycast join/leave notifications 2025-01-09 12:54:45 +01:00
rxrpc.h
scc.h
sched.h
scif_ioctl.h
screen_info.h
sctp.h
seccomp.h
securebits.h security: Add EXEC_RESTRICT_FILE and EXEC_DENY_INTERACTIVE securebits 2024-12-18 17:00:29 -08:00
sed-opal.h
seg6.h
seg6_genl.h
seg6_hmac.h
seg6_iptunnel.h
seg6_local.h
selinux_netlink.h
sem.h
serial.h
serial_core.h
serial_reg.h
serio.h
sev-guest.h
shm.h
signal.h
signalfd.h
smc.h
smc_diag.h
smiapp.h
snmp.h tcp: add LINUX_MIB_PAWS_OLD_ACK SNMP counter 2025-01-14 13:28:13 -08:00
sock_diag.h
socket.h
sockios.h
sonet.h
sonypi.h
sound.h
soundcard.h
stat.h fs: add STATX_DIO_READ_ALIGN 2025-01-09 16:23:17 +01:00
stddef.h stddef: make __struct_group() UAPI C++-friendly 2024-12-20 09:05:53 -08:00
stm.h
string.h
suspend_ioctls.h
swab.h
switchtec_ioctl.h
sync_file.h
synclink.h
sysctl.h
sysinfo.h
target_core_user.h
taskstats.h delayacct: add delay min to record delay peak 2025-01-12 20:21:16 -08:00
tcp.h
tcp_metrics.h
tdx-guest.h
tee.h
termios.h
thermal.h thermal/thresholds: Fix uapi header macros leading to a compilation error 2024-12-16 21:30:20 +01:00
thp7312.h
time.h
time_types.h
timerfd.h
times.h
timex.h
tiocl.h
tipc.h
tipc_config.h
tipc_netlink.h
tipc_sockets_diag.h
tls.h
toshiba.h
tps6594_pfsm.h
trace_mmap.h
tty.h
tty_flags.h
types.h
ublk_cmd.h
udf_fs_i.h
udmabuf.h
udp.h
uhid.h
uinput.h
uio.h
uleds.h
ultrasound.h
um_timetravel.h
un.h
unistd.h
unix_diag.h
usbdevice_fs.h
usbip.h
user_events.h
userfaultfd.h
userio.h
utime.h
utsname.h
uuid.h
uvcvideo.h
v4l2-common.h
v4l2-controls.h
v4l2-dv-timings.h
v4l2-mediabus.h
v4l2-subdev.h
vbox_err.h
vbox_vmmdev_types.h
vboxguest.h
vdpa.h
vduse.h vduse: relicense under GPL-2.0 OR BSD-3-Clause 2025-01-08 06:37:13 -05:00
vesa.h
veth.h
vfio.h
vfio_ccw.h
vfio_zdev.h
vhost.h
vhost_types.h
videodev2.h
virtio_9p.h
virtio_balloon.h
virtio_blk.h
virtio_bt.h
virtio_config.h
virtio_console.h
virtio_crypto.h
virtio_fs.h
virtio_gpio.h
virtio_gpu.h
virtio_i2c.h
virtio_ids.h
virtio_input.h
virtio_iommu.h
virtio_mem.h
virtio_mmio.h
virtio_net.h
virtio_pci.h virtio-pci: define type and header for PCI vendor data 2025-01-27 09:39:25 -05:00
virtio_pcidev.h
virtio_pmem.h
virtio_ring.h
virtio_rng.h
virtio_scmi.h
virtio_scsi.h
virtio_snd.h
virtio_types.h
virtio_vsock.h
vm_sockets.h
vm_sockets_diag.h
vmclock-abi.h
vmcore.h
vsockmon.h
vt.h
vtpm_proxy.h
wait.h
watch_queue.h
watchdog.h
wireguard.h
wireless.h
wmi.h
wwan.h
x25.h
xattr.h
xdp_diag.h
xfrm.h xfrm: netlink: add config (netlink) options 2024-12-05 10:01:15 +01:00
xilinx-v4l2-controls.h
zorro.h
zorro_ids.h