Commit Graph

310 Commits

Author SHA1 Message Date
Michael Kerrisk 475f1bca2c Arg for %p is a pointer to _a pointer to_ void.
As per http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=263109
2005-06-17 13:06:58 +00:00
Michael Kerrisk 019934ed21 BUGS: In kernels < 2.6.9, EPOLL_CTL_DEL required a non-NULL
'event', even though this argument is ignored.
As per http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=306517.
2005-06-17 11:33:07 +00:00
Michael Kerrisk 6a4c2e3618 freopen(3) can change file descriptor associations of stin/stdout/stderr 2005-06-16 16:10:05 +00:00
Michael Kerrisk b99cf1e0e8 strerror_r(3) requires #define _XOPEN_SOURCE 600 2005-06-16 16:09:25 +00:00
Michael Kerrisk 2a01941630 Rewrote description of return value.
As per http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=296183
2005-06-16 15:07:57 +00:00
Michael Kerrisk 4f90a5f34e Removed erroneous description of makecontext() return value.
As per http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=311800
2005-06-16 14:37:57 +00:00
Michael Kerrisk adb036712f removed fixed FIXMEs 2005-06-16 10:35:03 +00:00
Michael Kerrisk 4a3f7c5fbc formatting 2005-06-16 10:32:30 +00:00
Michael Kerrisk 95d29ab2c8 More SIOCATMARK cahnges. 2005-06-16 10:23:59 +00:00
Michael Kerrisk 8729177b44 global edit s/ -1/ \\-1/g 2005-06-15 14:10:23 +00:00
Michael Kerrisk f59a3f1941 Global edit: s/nonzero/non-zero/ 2005-06-15 13:32:34 +00:00
Michael Kerrisk 7ecc26f664 Various wording and foratting fixes 2005-06-15 13:26:36 +00:00
Michael Kerrisk 5c45d5f543 Various wording and formatting fixes.
Incorporated some new /proc/sys/net/ipv4/tcp_* file descriptions
from the 2.6.12 source file Documentation/networking/ip-sysctl.txt.
2005-06-15 12:56:21 +00:00
Michael Kerrisk 81c6dd6c54 Added pointer to ip(7) and proc(7) for /proc/sys/net 2005-06-15 12:07:30 +00:00
Michael Kerrisk 2e0eee5451 RLIMIT_RSS only has affect "in 2.4.x", not "in 2.4 and later". 2005-06-15 11:35:49 +00:00
Michael Kerrisk 096a9c0199 Added FIXME for mqueue files 2005-06-15 08:30:11 +00:00
Michael Kerrisk fd1835be9c Fixes in discussion of SIOCATMARK + general wording and formatting
clean-ups.
2005-06-14 15:24:55 +00:00
Michael Kerrisk 54221c6a41 Various minor changes 2005-06-14 11:25:12 +00:00
Michael Kerrisk 027df6e02d Wording improvements 2005-06-14 11:22:23 +00:00
Michael Kerrisk 5af3e8eeee Wording improvements 2005-06-14 11:22:06 +00:00
Michael Kerrisk 7b57506d6d Various minor changes 2005-06-14 11:20:57 +00:00
Michael Kerrisk fd064f40a4 Small wording fix. 2005-06-13 09:51:27 +00:00
Michael Kerrisk c13fcab060 Salut Olivier (and Nishanth),
Regarding man page documentation of the problem of short sleeps 
for setiteimer(2)...

> > -- pointers to those threads
> 
> http://bugzilla.kernel.org/show_bug.cgi?id=4569
> http://lkml.org/lkml/2005/4/29/163
> 
> > -- indications of which kernel versions show this bahaviour
> 
> AFAIK, all versions as far as x86 is concerned.
> Dunno if it is hardware specific.
> 
> > -- a (short) test program to demonstrate it, if you have one.
> 
> See the bugzilla bug's attachments

Sorry for the long delay in following this up, but I've got to 
it now.  I tweaked your suggestions slightly:

{{
Timers will never expire before the requested time,
-instead expiring some short, constant time afterwards, dependent
-on the system timer resolution (currently 10ms).  
+but may expire some (short) time afterwards, which depends
+on the system timer resolution and on the system load.
+Upon expiration, a signal will be generated and the timer reset.
+If the timer expires while the process is active (always true for

+On certain systems (including x86), the Linux kernel has a bug which will
+produce premature timer expirations of up to one jiffy under some
+circumstances.
}}

Thanks for this bug reporet,

Nishanth: if and when your changes are accepted, and the problem 
is thus fixed, could you please send me a notification of that
fact, and I can then further amend the manual pages.

Cheers,

Michael



/* itimer_short_interval_bug.c 

   June 2005

   In current Linux kernels, an interval timer set using setitimer() 
   can sometimes sleep *less* than the specified interval.
   This program demonstrates the behaviour by looping through all
   itimer values from 1 microsecond upwards, in one microsecond steps.
*/
/* Adapted from a program by Olivier Croquette, June 2005 */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/wait.h>



typedef unsigned long long int u_time_t; /* in microsecs */

static int handler_flag;

/* return time as a number of microsecs  */

static u_time_t 
gettime(void ) 
{
    struct timeval tv;

    if ( gettimeofday(&tv, NULL) == -1) {
        perror("gettimeofday()");
        return 0;
    }
    return (tv.tv_usec + tv.tv_sec * 1000000LL);
}


static void 
handler (int sig, siginfo_t *siginfo, void *context) 
{
    handler_flag++;
    return ;
}


/* Sleep for 'time' microsecs. */
static int 
isleep(u_time_t time) 
{
    struct itimerval  newtv;
    sigset_t sigset;
    struct sigaction  sigact;

    if (time == 0)
        return 0;

    /* block SIGALRM */
    sigemptyset (&sigset);
    sigaddset (&sigset, SIGALRM);
    sigprocmask (SIG_BLOCK, &sigset, NULL);

    /* set up our handler */
    sigact.sa_sigaction  = handler;
    sigemptyset(&sigact.sa_mask);
    sigact.sa_flags = SA_SIGINFO;
    sigaction (SIGALRM, &sigact, NULL);
 
    newtv.it_interval.tv_sec  = 0;
    newtv.it_interval.tv_usec = 0;
    newtv.it_value.tv_sec     = time / 1000000;
    newtv.it_value.tv_usec    = time % 1000000;
    if (setitimer(ITIMER_REAL,&newtv,NULL) == -1) {
        perror("setitimer(set)");
        return 1;
    }

    sigemptyset (&sigset);
    sigsuspend (&sigset);
    return 0;
}


int 
main(int argc, char *argv[]) {
    u_time_t wait;
    int loop, numLoops;
    u_time_t t1, t2;
    u_time_t actual;
    long long minDiff, maxDiff, totDiff, diff;
    int numFail = 0;

    if (argc != 2) {
	fprintf(stderr, "Usage: %s num-loops\n", argv[0]);
	exit(EXIT_FAILURE);
    } /* if */

    numLoops = atoi(argv[1]);
    setbuf(stdout, NULL);

    for (wait = 1; ; wait++) {
	maxDiff = 0;
	numFail = 0;
	totDiff = 0;
	minDiff = -wait;

        if (wait % 10000 == 0)
	    printf("%llu\n", wait);
        
	for (loop = 0; loop < numLoops; loop++) {
            t1 = gettime();

            handler_flag = 0;
            isleep(wait);
	    
	    if ( handler_flag != 1 ) 
                printf("Problem with the handler flag (%d)!\n", handler_flag);
    
            t2 = gettime();
            actual = t2 - t1;
            if ( actual < wait ) {
	        diff = actual - wait;
		if (diff < maxDiff)
		    maxDiff = diff;
		if (diff > minDiff)
		    minDiff = diff;
		totDiff += diff;
		numFail++;
	    } /* if */

        } /* for */
	if (numFail > 0) 
            printf("%llu: %3d fail (%4lld %4lld; avg=%6.1f)\n", 
		    wait, numFail, minDiff, maxDiff, 
		    (double) totDiff / numFail);
    } /* for */

    return 0;
} /* main */
2005-06-13 09:01:49 +00:00
Michael Kerrisk 561aa928db Remove duplicated CLONE_VFORK text 2005-06-13 06:09:43 +00:00
Michael Kerrisk 6e2d1c54dd Formatting fix 2005-06-09 07:12:30 +00:00
Michael Kerrisk deea4228eb Updates 2005-06-08 13:27:32 +00:00
Michael Kerrisk 23a6e651f3 Fix discussion of alternate signals stack 2005-06-08 13:27:21 +00:00
Michael Kerrisk 3616b7c0cf New pthreads.7 page 2005-06-07 12:35:32 +00:00
Michael Kerrisk 1b88e9c222 Minor fix 2005-06-07 06:51:12 +00:00
Michael Kerrisk ad4fa95926 Formatting fix 2005-06-03 11:14:19 +00:00
Michael Kerrisk 9a7957be33 Initial set-up for 2.04 release 2005-06-02 13:28:50 +00:00
Michael Kerrisk 552b1e5504 2.03 release 2005-06-02 13:11:04 +00:00
Michael Kerrisk 2ed34e110b 2.03 release 2005-06-02 13:10:44 +00:00
Michael Kerrisk de7639e575 Updated copyright date 2005-06-02 12:52:15 +00:00
Michael Kerrisk f7110f6014 Noted that any thread in a thread group can wait for a child
that one of them creates using fork().
2005-06-02 10:22:34 +00:00
Michael Kerrisk 9cbf71d215 Fix typo 2005-05-31 16:08:45 +00:00
Michael Kerrisk 6aacaf94e5 Hi Andries,
> The question came up whether execve of a suid binary while being ptraced
> would fail or ignore the suid part. The answer today seems to be the
> latter:
> 
> E.g. (in 2.6.11) security/dummy.c:
> 
> static void dummy_bprm_apply_creds (struct linux_binprm *bprm, int 
> unsafe)
> {
>         if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) {
>                 if ((unsafe & ~LSM_UNSAFE_PTRACE_CAP) &&
> !capable(CAP_SETUID)) {
>                         bprm->e_uid = current->uid;
>                         bprm->e_gid = current->gid;
>                 }
>         }
> }
> 
> and fs/exec.c:
> 
> void compute_creds(struct linux_binprm *bprm) {
>         int unsafe;
> 
>         unsafe = unsafe_exec(current);
>         security_bprm_apply_creds(bprm, unsafe);
> }
> 
> static inline int unsafe_exec(struct task_struct *p) {
>         int unsafe = 0;
>         if (p->ptrace & PT_PTRACED) {
>                 if (p->ptrace & PT_PTRACE_CAP)
>                         unsafe |= LSM_UNSAFE_PTRACE_CAP;
>                 else
>                         unsafe |= LSM_UNSAFE_PTRACE;
>         }
>         return unsafe;
> }
> 
> That is: if the process that calls execve() is being traced,
> the LSM_UNSAFE_PTRACE bit is et in unsafe and security_bprm_apply_creds()
> will make sure the suid/sgid bits are ignored.
> 
> ---
> 
> In my man page I do not read anything like that. It says
> 
>  EPERM  The process is being traced, the user is not the  superuser and
>         the file has an SUID or SGID bit set.
> and
> 
>  If  the current program is being ptraced, a SIGTRAP is sent to it after
>  a successful execve().
> 
>  If the set-uid bit is set on the program file pointed  to  by filename
>  the  effective user ID of the calling process is changed to that of the
>  owner of the program file.
> 
> So, maybe this sentence should be amended to read
> 
>  If the set-uid bit is set on the program file pointed  to  by filename
>  and the current process is not being ptraced, the  effective user ID
>  of the calling process is changed to ...

I changed your "current" to "calling" (to be consistent with the 
rest of the page), but otherwise applied as you suggest.

The revision will appear in man-pages-2.03, which I can release
any time now.  Are you avialable to do an upload tomorrow?
2005-05-31 16:07:24 +00:00
Michael Kerrisk 5e4e13a313 SEE ALSO s/threads/pthreads 2005-05-30 16:47:35 +00:00
Michael Kerrisk fb829c7406 nswap - not maintained. 2005-05-30 16:44:20 +00:00
Michael Kerrisk 3418ef2fdc FIXME: CAP_AUDIT_CONTROL and CAP_AUDIT_WRITE are not yet documented 2005-05-30 16:43:37 +00:00
Michael Kerrisk 2202881e83 Noted changes in permissions required for SHM_LOCK/SHM_UNLOCK. 2005-05-30 11:58:06 +00:00
Michael Kerrisk 5bdccabd8e 1,$s/inherited/inheritable/g 2005-05-30 09:56:32 +00:00
Michael Kerrisk 9d2a7b1f62 Noted F_SETOWN bug for socket file descriptor in Linux 2.4 and earlier.
Added text on permissions required to send signal to owner.

====

Hello Johannes,

> Betreff: Inaccuracy of fcntl man page
> Datum: Mon, 2 May 2005 20:07:12 +0200

Thanks for yor note.

Sorry for the delay in getting back to you.  I needed to find time 
to set aside to look at the details.  Now I've finally got there.

> I have attached a simple program 

Thanks -- a little program is always helpful.

> that uses the fcntl system call in order
> to kill an arbitrary process of the same user.
> According to the fcntl man page, fcntl(fd,F_SETOWN,pid) returns zero if 
> it has success.

Yes.

> If you strace the program while killing for exampe man running in another 
> terminal, you will see that man is killed, but fcntl(fd,F_SETOWN,pid)
> will return EPERM, 

I confirm that I see this problem in 2.4, with both Unix domain 
and Internet domain sockets.

> where you can only find a very confusing explanation 
> in the fcntl man page.

I'm not sure what explanation you mean here.  As far as I can 
tell, the manual page just doesn't cover this point.

> I have looked into the kernel source of 2.4.30 and found out, that 
> net/core/socket::sock_no_fcntl is the culprit if you use fcntl on Unix 
> sockets.

Yes, looks that way to me, as well,  And the 2.2 code looks 
similar.

> If pid is not your own pid or not your own process group, 
> the system call will return EPERM but will also set the pid 
> as you wanted to.

Yes.

> In the 2.6 kernel line, fcntl will react according the specification in
> the manual page.

Yes.

> If you also think, that one should clarify the return specification of 
> fcntl(fd,F_SETOWN,pid) or 2.4.x kernels, please tell me and I will 
> provide you with a patch for the manual page.

In fact I've written some new text under BUGS, which describes
the problem:

  In Linux 2.4 and earlier, there is bug that can occur  when  an
  unprivileged  process  uses  F_SETOWN to specify the owner of a
  socket file descriptor as a  process  (group)  other  than  the
  caller.   In this case, fcntl() can return -1 with errno set to
  EPERM, even when the owner process  (group)  is  one  that  the
  caller  has  permission to send signals to.  Despite this error
  return, the file descriptor owner is set, and signals  will  be
  sent to the owner.

Does that seem okay to you?

> Furthermore, it would be interseting to write there, what permissions 
> one need in order to send signals to processes via fcntl 

Good idea.  I added the following new text:

  Sending a signal to  the  owner  process  (group)  specified  by
  F_SETOWN  is  subject  to  the  same  permissions  checks as are
  described for kill(2), where the sending process is the one that
  employs F_SETOWN (but see BUGS below).

====


#define _GNU_SOURCE		/* needed to get the defines */
#include <fcntl.h>		/* in glibc 2.2 this has the needed
				   values defined */
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>


/**
 * Funnykill kills a program with fcntl
**/
int
main (int argc, char **argv)
{
  if (argc != 2)
    {
      fprintf (stderr, "Usage: funnykill <pid>\n");
      return 1;
    }

  int sockets[2];
  socketpair (AF_UNIX, SOCK_STREAM, 0, sockets);
  if (fcntl (sockets[0], F_SETFL, O_ASYNC | O_NONBLOCK) == -1) 
      errMsg("fcntl-F_SETFL");
  if (fcntl (sockets[0], F_SETOWN, atoi (argv[1])) == -1) 
      errMsg("fcntl-F_SETOWN");
//  fcntl (sockets[0], F_SETOWN, getpid());
  if (fcntl (sockets[0], F_SETSIG, SIGKILL) == -1) 
      errMsg("fcntl-_FSETSIG");
  write (sockets[1], "good bye", 9);
}
2005-05-20 12:11:25 +00:00
Michael Kerrisk 80ca8aba1d Added EAGAIN error description for umount2(). 2005-05-18 14:42:07 +00:00
Michael Kerrisk 8df37e4d66 Added MNT_EXPIRE, plus a few other tidy-ups. 2005-05-18 14:34:43 +00:00
Michael Kerrisk 6e5a730913 Added comment:
.\" For Unix domain sockets and regular files, EPERM is only returned in
.\" Linux 2.2 and earlier; in Linux 2.4 and later, unprivileged can
.\" use mknod() to make these files.
2005-05-18 09:39:52 +00:00
Michael Kerrisk 99408a60ee Minor fixes to CLONE_THREAD material. 2005-05-18 08:29:38 +00:00
Michael Kerrisk e2fbf61d5a Added text on CLONE_THREAD and signals. 2005-05-17 16:21:20 +00:00
Michael Kerrisk fd8a5be48e Substantially enhanced discussion of CLONE_THREAD. 2005-05-17 15:06:30 +00:00
Michael Kerrisk c3c6c1f61f Update date in header 2005-05-12 09:08:33 +00:00