mmap.2: Add note about partial page in BUGS section

This adds a note about Linux behavior with partial page at the end
of the object. The problem here is that a page that contains only
part of a file (because the file size is not multiple of PAGE_SIZE)
stays in page cache even after the mapping is unmapped and the file
is closed. So if some process dirties such page, other mappings
will see the changes rather than zeroes.

I've also attached a reproducer which is a stripped down version of
the LTP test. The child creates a file of the size of PAGE_SIZE/2,
maps it, changes the content after the PAGE_SIZE/2. The parent
waits for the child to exit, maps the same file, and checks the
content after PAGE_SIZE/2.  Uncommenting the msync() makes the test
succeed.

==========

int main(void)
{
	char tmpfname[256];
	long page_size;

	void *pa;
	size_t len;
	int fd;

	pid_t child;
	char *ch;
	int exit_val;

	page_size = sysconf(_SC_PAGE_SIZE);

	len = page_size / 2;

	snprintf(tmpfname, sizeof(tmpfname), "/tmp/test");
	child = fork();
	switch (child) {
	case 0:
		/* Create shared object */
		unlink(tmpfname);
		fd = open(tmpfname, O_CREAT | O_RDWR | O_EXCL,
			  S_IRUSR | S_IWUSR);
		if (fd == -1) {
			printf("Error at open(): %s\n", strerror(errno));
			return 1;
		}
		if (ftruncate(fd, len) == -1) {
			printf("Error at ftruncate(): %s\n", strerror(errno));
			return 1;
		}

		pa = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
		if (pa == MAP_FAILED) {
			printf("Error at mmap(): %s\n", strerror(errno));
			return 1;
		}

		/* Check the partial page is ZERO filled */
		ch = pa + len + 1;
		if (*ch != 0) {
			printf("Test FAILED: "
			       "The partial page at the end of an object "
			       "is not zero-filled\n");
			return 1;
		}

		/* Write the partial page */
		*ch = 'b';
		//msync(pa, len, MS_SYNC);
		munmap(pa, len);
		close(fd);
		return 0;
	case -1:
		printf("Error at fork(): %s\n", strerror(errno));
		return 1;
	default:
	break;
	}

	wait(&exit_val);
	if (!(WIFEXITED(exit_val) && (WEXITSTATUS(exit_val) == 0))) {
		unlink(tmpfname);
		printf("Child exited abnormally\n");
		return 1;
	}

	fd = open(tmpfname, O_RDWR, 0);
	unlink(tmpfname);

	pa = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
	if (pa == MAP_FAILED) {
		printf("Error at 2nd mmap(): %s\n", strerror(errno));
		return 1;
	}

	ch = pa + len + 1;
	if (*ch == 'b') {
		printf("Test FAILED: Modification of the partial page "
		       "at the end of an object is written out\n");
		return 1;
	}
	close(fd);
	munmap(pa, len);

	printf("Test PASSED\n");
	return 0;
}

Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
This commit is contained in:
Cyril Hrubis 2013-02-25 08:52:37 +01:00 committed by Michael Kerrisk
parent 42b437ca05
commit 2e43522f07
1 changed files with 9 additions and 0 deletions

View File

@ -611,6 +611,15 @@ Since kernel 2.6.12,
fails with the error
.B EINVAL
for this case.
POSIX specifies that system shall always zero fill any partial page at the end
of the object and that system will newer write any modification of object
beyond its end. On Linux when you write data to such partial page after the end
of the object the data stays in page cache even after you closed and unmaped
the file and although the data are newer written to the file itself subsequent
mappings may see the modified content. In some cases this could be fixed by
calling msync before the unmap takes place, this however doesn't work on tmpfs
(for example when using shm ipc interface).
.SH EXAMPLE
.\" FIXME . Add an example here that uses an anonymous shared region for
.\" IPC between parent and child.