Thus, the C language I/O request
write (FileDescriptor, BufferAddress, BufferSize)system service is implemented with the
int $0x80(the trap instruction with vector 80 hex specified).
C Library implementation of write
movl BufferSize, %edx ;third argument - byte count
movl BufferAddress, %ecx ;second argument - pointer to user space buffer
movl FileDescriptor, %ebx ;first argument - usually 1 (stdout)
movl $4, %eax ;system call number - offset in syscall table to sys_write
int $0x80 ;trap into kernel
C library implementation of exit (terminate) user program
movl $0, %ebx ;only one argument - exit code
movl $1, %eax ;offset in syscall table to sys_exit
int $0x80 ;trap into kernel
From the GNU C Library glibc-2.3.2/sysdeps/unix/sysv/linux/i386/sysdep.h file:
/* The original calling convention for system calls on Linux/i386 is
to use int $0x80. */
#ifdef I386_USE_SYSENTER
# ifdef SHARED
# define ENTER_KERNEL call *%gs:SYSINFO_OFFSET
# else
# define ENTER_KERNEL call *_dl_sysinfo
# endif
#else
# define ENTER_KERNEL int $0x80
#endif
/* Linux takes system call arguments in registers:
syscall number %eax call-clobbered
arg 1 %ebx call-saved
arg 2 %ecx call-clobbered
arg 3 %edx call-clobbered
arg 4 %esi call-saved
arg 5 %edi call-saved
The stack layout upon entering the function is:
20(%esp) Arg# 5
16(%esp) Arg# 4
12(%esp) Arg# 3
8(%esp) Arg# 2
4(%esp) Arg# 1
(%esp) Return address
(Of course a function with say 3 arguments does not have entries for
arguments 4 and 5.)
The following code tries hard to be optimal. A general assumption
(which is true according to the data books I have) is that
2 * xchg is more expensive than pushl + movl + popl
Beside this a neat trick is used. The calling conventions for Linux
tell that among the registers used for parameters %ecx and %edx need
not be saved. Beside this we may clobber this registers even when
they are not used for parameter passing.
As a result one can see below that we save the content of the %ebx
register in the %edx register when we have less than 3 arguments
(2 * movl is less expensive than pushl + popl).
Second unlike for the other registers we don't save the content of
%ecx and %edx when we have more than 1 and 2 registers resp.
The code below might look a bit long but we have to take care for
the pipelined processors (i586). Here the `pushl' and `popl'
instructions are marked as NP (not pairable) but the exception is
two consecutive of these instruction. This gives no penalty on
other processors though. */
#undef DO_CALL
#define DO_CALL(syscall_name, args) \
PUSHARGS_##args \
DOARGS_##args \
movl $SYS_ify (syscall_name), %eax; \
ENTER_KERNEL \
POPARGS_##args
/* Note the above macros "PUSHARGS_", "DOARGS_" and "POPARGS_" combined
with the actual number arguments specified in the DO_CALL macro
invocation to yeild one of the following macro sets: */
#define PUSHARGS_0 /* No arguments to push. */
#define DOARGS_0 /* No arguments to frob. */
#define POPARGS_0 /* No arguments to pop. */
#define _PUSHARGS_0 /* No arguments to push. */
#define _DOARGS_0(n) /* No arguments to frob. */
#define _POPARGS_0 /* No arguments to pop. */
#define PUSHARGS_1 movl %ebx, %edx; PUSHARGS_0
#define DOARGS_1 _DOARGS_1 (4)
#define POPARGS_1 POPARGS_0; movl %edx, %ebx
#define _PUSHARGS_1 pushl %ebx; _PUSHARGS_0
#define _DOARGS_1(n) movl n(%esp), %ebx; _DOARGS_0(n-4)
#define _POPARGS_1 _POPARGS_0; popl %ebx
#define PUSHARGS_2 PUSHARGS_1
#define DOARGS_2 _DOARGS_2 (8)
#define POPARGS_2 POPARGS_1
#define _PUSHARGS_2 _PUSHARGS_1
#define _DOARGS_2(n) movl n(%esp), %ecx; _DOARGS_1 (n-4)
#define _POPARGS_2 _POPARGS_1
#define PUSHARGS_3 _PUSHARGS_2
#define DOARGS_3 _DOARGS_3 (16)
#define POPARGS_3 _POPARGS_3
#define _PUSHARGS_3 _PUSHARGS_2
#define _DOARGS_3(n) movl n(%esp), %edx; _DOARGS_2 (n-4)
#define _POPARGS_3 _POPARGS_2
#define PUSHARGS_4 _PUSHARGS_4
#define DOARGS_4 _DOARGS_4 (24)
#define POPARGS_4 _POPARGS_4
#define _PUSHARGS_4 pushl %esi; _PUSHARGS_3
#define _DOARGS_4(n) movl n(%esp), %esi; _DOARGS_3 (n-4)
#define _POPARGS_4 _POPARGS_3; popl %esi
#define PUSHARGS_5 _PUSHARGS_5
#define DOARGS_5 _DOARGS_5 (32)
#define POPARGS_5 _POPARGS_5
#define _PUSHARGS_5 pushl %edi; _PUSHARGS_4
#define _DOARGS_5(n) movl n(%esp), %edi; _DOARGS_4 (n-4)
#define _POPARGS_5 _POPARGS_4; popl %edi
#else /* !__ASSEMBLER__ */
.text
ENTRY (syscall)
PUSHARGS_5 /* Save register contents. */
_DOARGS_5(36) /* Load arguments. */
movl 16(%esp), %eax /* Load syscall number into %eax. */
ENTER_KERNEL /* Do the system call. */
POPARGS_5 /* Restore register contents. */
cmpl $-4095, %eax /* Check %eax for error. */
jae SYSCALL_ERROR_LABEL /* Jump to error handler if error. */
L(pseudo_end):
ret /* Return to caller. */
/* Linux uses a negative return value to indicate syscall errors,
unlike most Unices, which use the condition codes' carry flag.
Since version 2.1 the return value of a system call might be
negative even if the call succeeded. E.g., the `lseek' system call
might return a large offset. Therefore we must not anymore test
for < 0, but test for a real error by making sure the value in %eax
is a real error number. Linus said he will make sure the no syscall
returns a value in -1 .. -4095 as a valid result so we can savely
test with -4095. */