1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
/*
* x86_64 clone syscall trampoline
*
* Goal: provide a reliable way to run a C function in the child after the
* clone(2) syscall, without depending on the parent's stack frame (which is
* invalid in the child because the child starts with a new stack pointer).
*
* API (internal):
*
* long __clone(long (*fn)(void *), void *arg,
* unsigned long flags, void *child_stack,
* int *ptid, void *newtls, int *ctid);
*
* This uses the raw Linux x86_64 clone syscall:
* long clone(unsigned long flags, void *child_stack,
* int *ptid, int *ctid, unsigned long newtls);
*
* Behavior:
* - In the parent, returns child TID (> 0) or -errno (< 0).
* - In the child, calls fn(arg) and then exits the thread/process via
* SYS_exit (does not return).
*
* Notes:
* - We explicitly place (fn,arg) onto the child's new stack before issuing
* the syscall. On success in the child, we recover them from %rsp and call.
* - This avoids relying on any locals from the parent frame.
* - The stack is aligned to 16 bytes before the call into C.
*/
.text
.globl __clone
.type __clone, @function
__clone:
/*
* SysV AMD64 calling convention:
* fn in %rdi
* arg in %rsi
* flags in %rdx
* child_stack in %rcx
* ptid in %r8
* newtls in %r9
* ctid at 8(%rsp)
*/
movq 8(%rsp), %r10 /* r10 = ctid (7th arg from caller stack) */
/* r11 = child_stack (we'll use it as our working "new sp") */
movq %rcx, %r11
/*
* Ensure 16-byte alignment for the eventual C call:
* We'll build the child's stack as:
* [low] fn (8)
* arg (8)
* [high] <-- %rsp in child before call site
*
* We align first, then reserve 16 bytes.
*/
andq $-16, %r11
subq $16, %r11
/* Store fn and arg onto the child's stack */
movq %rdi, 0(%r11) /* fn */
movq %rsi, 8(%r11) /* arg */
/*
* Prepare registers for Linux x86_64 clone syscall:
* rax = SYS_clone
* rdi = flags
* rsi = child_stack
* rdx = ptid
* r10 = ctid
* r8 = newtls
*/
movq %rdx, %rdi /* flags */
movq %r11, %rsi /* child_stack (new sp) */
movq %r8, %rdx /* ptid */
movq %r9, %r8 /* newtls */
/* r10 already holds ctid */
movq $56, %rax /* SYS_clone on x86_64 */
syscall
/* Parent: rax = child tid (>0) or -errno (<0) */
testq %rax, %rax
jnz 1f
/* Child: rax == 0 */
/* %rsp is already the child_stack we provided. Recover fn/arg from it. */
movq 8(%rsp), %rdi /* first arg to fn = arg */
call *0(%rsp) /* call fn(arg) */
/*
* If fn returns, exit the thread/process.
* Use SYS_exit (60). Return value of fn is in %rax; pass as status in %rdi.
*/
movq %rax, %rdi
movq $60, %rax /* SYS_exit */
syscall
hlt /* should not reach */
1:
ret
.size __clone, .-__clone
|