diff -urN oldtree/Documentation/accounting/getdelays.c newtree/Documentation/accounting/getdelays.c --- oldtree/Documentation/accounting/getdelays.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/Documentation/accounting/getdelays.c 2006-09-30 04:18:35.000000000 -0400 @@ -285,7 +285,7 @@ if (maskset) { rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, - &cpumask, sizeof(cpumask)); + &cpumask, strlen(cpumask) + 1); PRINTF("Sent register cpumask, retval %d\n", rc); if (rc < 0) { printf("error sending register cpumask\n"); @@ -315,7 +315,8 @@ } if (msg.n.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&msg.n), rep_len)) { - printf("fatal reply error, errno %d\n", errno); + struct nlmsgerr *err = NLMSG_DATA(&msg); + printf("fatal reply error, errno %d\n", err->error); goto done; } @@ -383,7 +384,7 @@ if (maskset) { rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, - &cpumask, sizeof(cpumask)); + &cpumask, strlen(cpumask) + 1); printf("Sent deregister mask, retval %d\n", rc); if (rc < 0) err(rc, "error sending deregister cpumask\n"); diff -urN oldtree/Documentation/accounting/taskstats-struct.txt newtree/Documentation/accounting/taskstats-struct.txt --- oldtree/Documentation/accounting/taskstats-struct.txt 1969-12-31 19:00:00.000000000 -0500 +++ newtree/Documentation/accounting/taskstats-struct.txt 2006-09-30 04:20:31.000000000 -0400 @@ -0,0 +1,161 @@ +The struct taskstats +-------------------- + +This document contains an explanation of the struct taskstats fields. + +There are three different groups of fields in the struct taskstats: + +1) Common and basic accounting fields + If CONFIG_TASKSTATS is set, the taskstats inteface is enabled and + the common fields and basic accounting fields are collected for + delivery at do_exit() of a task. +2) Delay accounting fields + These fields are placed between + /* Delay accounting fields start */ + and + /* Delay accounting fields end */ + Their values are collected if CONFIG_TASK_DELAY_ACCT is set. +3) Extended accounting fields + These fields are placed between + /* Extended accounting fields start */ + and + /* Extended accounting fields end */ + Their values are collected if CONFIG_TASK_XACCT is set. + +Future extension should add fields to the end of the taskstats struct, and +should not change the relative position of each field within the struct. + + +struct taskstats { + +1) Common and basic accounting fields: + /* The version number of this struct. This field is always set to + * TAKSTATS_VERSION, which is defined in . + * Each time the struct is changed, the value should be incremented. + */ + __u16 version; + + /* The exit code of a task. */ + __u32 ac_exitcode; /* Exit status */ + + /* The accounting flags of a task as defined in + * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. + */ + __u8 ac_flag; /* Record flags */ + + /* The value of task_nice() of a task. */ + __u8 ac_nice; /* task_nice */ + + /* The name of the command that started this task. */ + char ac_comm[TS_COMM_LEN]; /* Command name */ + + /* The scheduling discipline as set in task->policy field. */ + __u8 ac_sched; /* Scheduling discipline */ + + __u8 ac_pad[3]; + __u32 ac_uid; /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + + /* The time when a task begins, in [secs] since 1970. */ + __u32 ac_btime; /* Begin time [sec since 1970] */ + + /* The elapsed time of a task, in [usec]. */ + __u64 ac_etime; /* Elapsed time [usec] */ + + /* The user CPU time of a task, in [usec]. */ + __u64 ac_utime; /* User CPU time [usec] */ + + /* The system CPU time of a task, in [usec]. */ + __u64 ac_stime; /* System CPU time [usec] */ + + /* The minor page fault count of a task, as set in task->min_flt. */ + __u64 ac_minflt; /* Minor Page Fault Count */ + + /* The major page fault count of a task, as set in task->maj_flt. */ + __u64 ac_majflt; /* Major Page Fault Count */ + + +2) Delay accounting fields: + /* Delay accounting fields start + * + * All values, until the comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count; + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ + + +3) Extended accounting fields + /* Extended accounting fields start */ + + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. + * The current rss usage is added to this counter every time + * a tick is charged to a task's system time. So, at the end we + * will have memory usage multiplied by system time. Thus an + * average usage per system time unit can be calculated. + */ + __u64 coremem; /* accumulated RSS usage in MB-usec */ + + /* Accumulated virtual memory usage in duration of a task. + * Same as acct_rss_mem1 above except that we keep track of VM usage. + */ + __u64 virtmem; /* accumulated VM usage in MB-usec */ + + /* High watermark of RSS usage in duration of a task, in KBytes. */ + __u64 hiwater_rss; /* High-watermark of RSS usage */ + + /* High watermark of VM usage in duration of a task, in KBytes. */ + __u64 hiwater_vm; /* High-water virtual memory usage */ + + /* The following four fields are I/O statistics of a task. */ + __u64 read_char; /* bytes read */ + __u64 write_char; /* bytes written */ + __u64 read_syscalls; /* read syscalls */ + __u64 write_syscalls; /* write syscalls */ + + /* Extended accounting fields end */ + +} diff -urN oldtree/Documentation/ecryptfs.txt newtree/Documentation/ecryptfs.txt --- oldtree/Documentation/ecryptfs.txt 1969-12-31 19:00:00.000000000 -0500 +++ newtree/Documentation/ecryptfs.txt 2006-09-30 04:24:48.000000000 -0400 @@ -0,0 +1,77 @@ +eCryptfs: A stacked cryptographic filesystem for Linux + +eCryptfs is free software. Please see the file COPYING for details. +For documentation, please see the files in the doc/ subdirectory. For +building and installation instructions please see the INSTALL file. + +Maintainer: Phillip Hellewell +Lead developer: Michael A. Halcrow +Developers: Michael C. Thompson + Kent Yoder +Web Site: http://ecryptfs.sf.net + +This software is currently undergoing development. Make sure to +maintain a backup copy of any data you write into eCryptfs. + +eCryptfs requires the userspace tools downloadable from the +SourceForge site: + +http://sourceforge.net/projects/ecryptfs/ + +Userspace requirements include: + - David Howells' userspace keyring headers and libraries (version + 1.0 or higher), obtainable from + http://people.redhat.com/~dhowells/keyutils/ + - Libgcrypt + + +NOTES + +In the beta/experimental releases of eCryptfs, when you upgrade +eCryptfs, you should copy the files to an unencrypted location and +then copy the files back into the new eCryptfs mount to migrate the +files. + + +MOUNT-WIDE PASSPHRASE + +Create a new directory into which eCryptfs will write its encrypted +files (i.e., /root/crypt). Then, create the mount point directory +(i.e., /mnt/crypt). Now it's time to mount eCryptfs: + +mount -t ecryptfs /root/crypt /mnt/crypt + +You should be prompted for a passphrase and a salt (the salt may be +blank). + +Try writing a new file: + +echo "Hello, World" > /mnt/crypt/hello.txt + +The operation will complete. Notice that there is a new file in +/root/crypt that is at least 12288 bytes in size (depending on your +host page size). This is the encrypted underlying file for what you +just wrote. To test reading, from start to finish, you need to clear +the user session keyring: + +keyctl clear @u + +Then umount /mnt/crypt and mount again per the instructions given +above. + +cat /mnt/crypt/hello.txt + + +NOTES + +eCryptfs version 0.1 should only be mounted on (1) empty directories +or (2) directories containing files only created by eCryptfs. If you +mount a directory that has pre-existing files not created by eCryptfs, +then behavior is undefined. Do not run eCryptfs in higher verbosity +levels unless you are doing so for the sole purpose of debugging or +development, since secret values will be written out to the system log +in that case. + + +Mike Halcrow +mhalcrow@us.ibm.com diff -urN oldtree/Documentation/filesystems/Locking newtree/Documentation/filesystems/Locking --- oldtree/Documentation/filesystems/Locking 2006-09-29 13:50:42.000000000 -0400 +++ newtree/Documentation/filesystems/Locking 2006-09-30 04:16:37.000000000 -0400 @@ -356,10 +356,9 @@ prototypes: loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, - loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, diff -urN oldtree/Documentation/filesystems/vfs.txt newtree/Documentation/filesystems/vfs.txt --- oldtree/Documentation/filesystems/vfs.txt 2006-09-29 15:29:53.000000000 -0400 +++ newtree/Documentation/filesystems/vfs.txt 2006-09-30 04:16:37.000000000 -0400 @@ -712,9 +712,9 @@ struct file_operations { loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); diff -urN oldtree/MAINTAINERS newtree/MAINTAINERS --- oldtree/MAINTAINERS 2006-09-29 16:09:28.000000000 -0400 +++ newtree/MAINTAINERS 2006-09-30 04:31:59.000000000 -0400 @@ -989,6 +989,13 @@ W: http://ebtables.sourceforge.net/ S: Maintained +ECRYPT FILE SYSTEM +P: Mike Halcrow, Phillip Hellewell +M: mhalcrow@us.ibm.com, phillip@hellewell.homeip.net +L: ecryptfs-devel@lists.sourceforge.net +W: http://ecryptfs.sourceforge.net/ +S: Supported + EDAC-CORE P: Doug Thompson M: norsk5@xmission.com diff -urN oldtree/arch/i386/Kconfig.debug newtree/arch/i386/Kconfig.debug --- oldtree/arch/i386/Kconfig.debug 2006-09-29 13:50:42.000000000 -0400 +++ newtree/arch/i386/Kconfig.debug 2006-09-30 04:36:14.000000000 -0400 @@ -57,14 +57,16 @@ If in doubt, say "N". config 4KSTACKS - bool "Use 4Kb for kernel stacks instead of 8Kb" - depends on DEBUG_KERNEL + bool "Use 4Kb for kernel stacks instead of 8Kb" if DEBUG_KERNEL + depends on n + default y help If you say Y here the kernel will use a 4Kb stacksize for the kernel stack attached to each process/thread. This facilitates running more threads on a system and also reduces the pressure on the VM subsystem for higher order allocations. This option - will also use IRQ stacks to compensate for the reduced stackspace. + will also use separate 4Kb IRQ stacks to compensate for the + reduced stackspace. config X86_FIND_SMP_CONFIG bool diff -urN oldtree/arch/s390/hypfs/inode.c newtree/arch/s390/hypfs/inode.c --- oldtree/arch/s390/hypfs/inode.c 2006-09-29 14:03:19.000000000 -0400 +++ newtree/arch/s390/hypfs/inode.c 2006-09-30 04:16:45.000000000 -0400 @@ -134,12 +134,20 @@ return 0; } -static ssize_t hypfs_aio_read(struct kiocb *iocb, __user char *buf, - size_t count, loff_t offset) +static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) { char *data; size_t len; struct file *filp = iocb->ki_filp; + /* XXX: temporary */ + char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; + + if (nr_segs != 1) { + count = -EINVAL; + goto out; + } data = filp->private_data; len = strlen(data); @@ -158,12 +166,13 @@ out: return count; } -static ssize_t hypfs_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) { int rc; struct super_block *sb; struct hypfs_sb_info *fs_info; + size_t count = iov_length(iov, nr_segs); sb = iocb->ki_filp->f_dentry->d_inode->i_sb; fs_info = sb->s_fs_info; diff -urN oldtree/drivers/char/raw.c newtree/drivers/char/raw.c --- oldtree/drivers/char/raw.c 2006-09-29 14:03:20.000000000 -0400 +++ newtree/drivers/char/raw.c 2006-09-30 04:17:37.000000000 -0400 @@ -238,39 +238,14 @@ return err; } -static ssize_t raw_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct iovec local_iov = { - .iov_base = (char __user *)buf, - .iov_len = count - }; - - return generic_file_write_nolock(file, &local_iov, 1, ppos); -} - -static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) -{ - struct iovec local_iov = { - .iov_base = (char __user *)buf, - .iov_len = count - }; - - return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); -} - - static const struct file_operations raw_fops = { - .read = generic_file_read, + .read = do_sync_read, .aio_read = generic_file_aio_read, - .write = raw_file_write, - .aio_write = raw_file_aio_write, + .write = do_sync_write, + .aio_write = generic_file_aio_write_nolock, .open = raw_open, .release= raw_release, .ioctl = raw_ioctl, - .readv = generic_file_readv, - .writev = generic_file_writev, .owner = THIS_MODULE, }; diff -urN oldtree/drivers/ide/Kconfig newtree/drivers/ide/Kconfig --- oldtree/drivers/ide/Kconfig 2006-09-29 13:50:42.000000000 -0400 +++ newtree/drivers/ide/Kconfig 2006-09-30 04:34:40.000000000 -0400 @@ -592,6 +592,12 @@ ide-probe at boot. It is reported to support DVD II drives, by the manufacturer. +config BLK_DEV_JMICRON + tristate "JMicron JMB36x support" + help + Basic support for the JMicron ATA controllers. For full support + use the libata drivers. + config BLK_DEV_SC1200 tristate "National SCx200 chipset support" help diff -urN oldtree/drivers/ide/pci/Makefile newtree/drivers/ide/pci/Makefile --- oldtree/drivers/ide/pci/Makefile 2006-09-29 13:50:42.000000000 -0400 +++ newtree/drivers/ide/pci/Makefile 2006-09-30 04:34:40.000000000 -0400 @@ -14,6 +14,7 @@ #obj-$(CONFIG_BLK_DEV_HPT37X) += hpt37x.o obj-$(CONFIG_BLK_DEV_IT8172) += it8172.o obj-$(CONFIG_BLK_DEV_IT821X) += it821x.o +obj-$(CONFIG_BLK_DEV_JMICRON) += jmicron.o obj-$(CONFIG_BLK_DEV_NS87415) += ns87415.o obj-$(CONFIG_BLK_DEV_OPTI621) += opti621.o obj-$(CONFIG_BLK_DEV_PDC202XX_OLD) += pdc202xx_old.o diff -urN oldtree/drivers/ide/pci/jmicron.c newtree/drivers/ide/pci/jmicron.c --- oldtree/drivers/ide/pci/jmicron.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/drivers/ide/pci/jmicron.c 2006-09-30 04:35:54.000000000 -0400 @@ -0,0 +1,270 @@ + +/* + * Copyright (C) 2006 Red Hat + * + * May be copied or modified under the terms of the GNU General Public License + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +typedef enum { + PORT_PATA0 = 0, + PORT_PATA1 = 1, + PORT_SATA = 2, +} port_type; + +/** + * jmicron_ratemask - Compute available modes + * @drive: IDE drive + * + * Compute the available speeds for the devices on the interface. This + * is all modes to ATA133 clipped by drive cable setup. + */ + +static u8 jmicron_ratemask(ide_drive_t *drive) +{ + u8 mode = 4; + if (!eighty_ninty_three(drive)) + mode = min(mode, (u8)1); + return mode; +} + +/** + * ata66_jmicron - Cable check + * @hwif: IDE port + * + * Return 1 if the cable is 80pin + */ + +static int __devinit ata66_jmicron(ide_hwif_t *hwif) +{ + struct pci_dev *pdev = hwif->pci_dev; + + u32 control; + u32 control5; + + int port = hwif->channel; + port_type port_map[2]; + + pci_read_config_dword(pdev, 0x40, &control); + + /* There are two basic mappings. One has the two SATA ports merged + as master/slave and the secondary as PATA, the other has only the + SATA port mapped */ + if (control & (1 << 23)) { + port_map[0] = PORT_SATA; + port_map[1] = PORT_PATA0; + } else { + port_map[0] = PORT_SATA; + port_map[1] = PORT_SATA; + } + + /* The 365/366 may have this bit set to map the second PATA port + as the internal primary channel */ + pci_read_config_dword(pdev, 0x80, &control5); + if (control5 & (1<<24)) + port_map[0] = PORT_PATA1; + + /* The two ports may then be logically swapped by the firmware */ + if (control & (1 << 22)) + port = port ^ 1; + + /* + * Now we know which physical port we are talking about we can + * actually do our cable checking etc. Thankfully we don't need + * to do the plumbing for other cases. + */ + switch (port_map[port]) + { + case PORT_PATA0: + if (control & (1 << 3)) /* 40/80 pin primary */ + return 1; + return 0; + case PORT_PATA1: + if (control5 & (1 << 19)) /* 40/80 pin secondary */ + return 0; + return 1; + case PORT_SATA: + break; + } + return 1; /* Avoid bogus "control reaches end of non-void function" */ +} + +static void jmicron_tuneproc (ide_drive_t *drive, byte mode_wanted) +{ + return; +} + +/** + * config_jmicron_chipset_for_pio - set drive timings + * @drive: drive to tune + * @speed we want + * + */ + +static void config_jmicron_chipset_for_pio (ide_drive_t *drive, byte set_speed) +{ + u8 speed = XFER_PIO_0 + ide_get_best_pio_mode(drive, 255, 5, NULL); + if (set_speed) + (void) ide_config_drive_speed(drive, speed); +} + +/** + * jmicron_tune_chipset - set controller timings + * @drive: Drive to set up + * @xferspeed: speed we want to achieve + * + * As the JMicron snoops for timings all we actually need to do is + * make sure we don't set an invalid mode. We do need to honour + * the cable detect here. + */ + +static int jmicron_tune_chipset (ide_drive_t *drive, byte xferspeed) +{ + + u8 speed = ide_rate_filter(jmicron_ratemask(drive), xferspeed); + + return ide_config_drive_speed(drive, speed); +} + +/** + * config_chipset_for_dma - configure for DMA + * @drive: drive to configure + * + * As the JMicron snoops for timings all we actually need to do is + * make sure we don't set an invalid mode. + */ + +static int config_chipset_for_dma (ide_drive_t *drive) +{ + u8 speed = ide_dma_speed(drive, jmicron_ratemask(drive)); + + config_jmicron_chipset_for_pio(drive, !speed); + jmicron_tune_chipset(drive, speed); + return ide_dma_enable(drive); +} + +/** + * jmicron_configure_drive_for_dma - set up for DMA transfers + * @drive: drive we are going to set up + * + * As the JMicron snoops for timings all we actually need to do is + * make sure we don't set an invalid mode. + */ + +static int jmicron_config_drive_for_dma (ide_drive_t *drive) +{ + ide_hwif_t *hwif = drive->hwif; + + if (ide_use_dma(drive)) { + if (config_chipset_for_dma(drive)) + return hwif->ide_dma_on(drive); + } + config_jmicron_chipset_for_pio(drive, 1); + return hwif->ide_dma_off_quietly(drive); +} + +/** + * init_hwif_jmicron - set up hwif structs + * @hwif: interface to set up + * + * Minimal set up is required for the Jmicron hardware. + */ + +static void __devinit init_hwif_jmicron(ide_hwif_t *hwif) +{ + hwif->speedproc = &jmicron_tune_chipset; + hwif->tuneproc = &jmicron_tuneproc; + + hwif->drives[0].autotune = 1; + hwif->drives[1].autotune = 1; + + if (!hwif->dma_base) + goto fallback; + + hwif->atapi_dma = 1; + hwif->ultra_mask = 0x7f; + hwif->mwdma_mask = 0x07; + + hwif->ide_dma_check = &jmicron_config_drive_for_dma; + if (!(hwif->udma_four)) + hwif->udma_four = ata66_jmicron(hwif); + + hwif->autodma = 1; + hwif->drives[0].autodma = hwif->autodma; + hwif->drives[1].autodma = hwif->autodma; + return; +fallback: + hwif->autodma = 0; + return; +} + +#define DECLARE_JMB_DEV(name_str) \ + { \ + .name = name_str, \ + .init_hwif = init_hwif_jmicron, \ + .channels = 2, \ + .autodma = AUTODMA, \ + .bootable = ON_BOARD, \ + .enablebits = { {0x40, 1, 1}, {0x40, 0x10, 0x10} }, \ + } + +static ide_pci_device_t jmicron_chipsets[] __devinitdata = { + /* 0 */ DECLARE_JMB_DEV("JMB361"), + /* 1 */ DECLARE_JMB_DEV("JMB363"), + /* 2 */ DECLARE_JMB_DEV("JMB365"), + /* 3 */ DECLARE_JMB_DEV("JMB366"), + /* 4 */ DECLARE_JMB_DEV("JMB368"), +}; + +/** + * jmicron_init_one - pci layer discovery entry + * @dev: PCI device + * @id: ident table entry + * + * Called by the PCI code when it finds a Jmicron controller. + * We then use the IDE PCI generic helper to do most of the work. + */ + +static int __devinit jmicron_init_one(struct pci_dev *dev, const struct pci_device_id *id) +{ + ide_setup_pci_device(dev, &jmicron_chipsets[id->driver_data]); + return 0; +} + +static struct pci_device_id jmicron_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB361), 0}, + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB363), 1}, + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB365), 2}, + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB366), 3}, + { PCI_DEVICE(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB368), 4}, + { 0, }, +}; + +MODULE_DEVICE_TABLE(pci, jmicron_pci_tbl); + +static struct pci_driver driver = { + .name = "JMicron IDE", + .id_table = jmicron_pci_tbl, + .probe = jmicron_init_one, +}; + +static int __init jmicron_ide_init(void) +{ + return ide_pci_register_driver(&driver); +} + +module_init(jmicron_ide_init); + +MODULE_AUTHOR("Alan Cox"); +MODULE_DESCRIPTION("PCI driver module for the JMicron in legacy modes"); +MODULE_LICENSE("GPL"); diff -urN oldtree/drivers/net/tun.c newtree/drivers/net/tun.c --- oldtree/drivers/net/tun.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/drivers/net/tun.c 2006-09-30 04:17:01.000000000 -0400 @@ -288,11 +288,10 @@ return len; } -/* Writev */ -static ssize_t tun_chr_writev(struct file * file, const struct iovec *iv, - unsigned long count, loff_t *pos) +static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) { - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = iocb->ki_filp->private_data; if (!tun) return -EBADFD; @@ -302,14 +301,6 @@ return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count)); } -/* Write */ -static ssize_t tun_chr_write(struct file * file, const char __user * buf, - size_t count, loff_t *pos) -{ - struct iovec iv = { (void __user *) buf, count }; - return tun_chr_writev(file, &iv, 1, pos); -} - /* Put packet to the user space buffer */ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, struct sk_buff *skb, @@ -343,10 +334,10 @@ return total; } -/* Readv */ -static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv, - unsigned long count, loff_t *pos) +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) { + struct file *file = iocb->ki_filp; struct tun_struct *tun = file->private_data; DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; @@ -426,14 +417,6 @@ return ret; } -/* Read */ -static ssize_t tun_chr_read(struct file * file, char __user * buf, - size_t count, loff_t *pos) -{ - struct iovec iv = { buf, count }; - return tun_chr_readv(file, &iv, 1, pos); -} - static void tun_setup(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -764,10 +747,10 @@ static struct file_operations tun_fops = { .owner = THIS_MODULE, .llseek = no_llseek, - .read = tun_chr_read, - .readv = tun_chr_readv, - .write = tun_chr_write, - .writev = tun_chr_writev, + .read = do_sync_read, + .aio_read = tun_chr_aio_read, + .write = do_sync_write, + .aio_write = tun_chr_aio_write, .poll = tun_chr_poll, .ioctl = tun_chr_ioctl, .open = tun_chr_open, diff -urN oldtree/drivers/usb/gadget/inode.c newtree/drivers/usb/gadget/inode.c --- oldtree/drivers/usb/gadget/inode.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/drivers/usb/gadget/inode.c 2006-09-30 04:16:37.000000000 -0400 @@ -533,7 +533,8 @@ struct usb_request *req; struct ep_data *epdata; void *buf; - char __user *ubuf; /* NULL for writes */ + const struct iovec *iv; + unsigned long nr_segs; unsigned actual; }; @@ -561,17 +562,32 @@ static ssize_t ep_aio_read_retry(struct kiocb *iocb) { struct kiocb_priv *priv = iocb->private; - ssize_t status = priv->actual; + ssize_t len, total; + int i; - /* we "retry" to get the right mm context for this: */ - status = copy_to_user(priv->ubuf, priv->buf, priv->actual); - if (unlikely(0 != status)) - status = -EFAULT; - else - status = priv->actual; - kfree(priv->buf); - kfree(priv); - return status; + /* we "retry" to get the right mm context for this: */ + + /* copy stuff into user buffers */ + total = priv->actual; + len = 0; + for (i=0; i < priv->nr_segs; i++) { + ssize_t this = min((ssize_t)(priv->iv[i].iov_len), total); + + if (copy_to_user(priv->iv[i].iov_base, priv->buf, this)) { + if (len == 0) + len = -EFAULT; + break; + } + + total -= this; + len += this; + if (total == 0) + break; + } + kfree(priv->buf); + kfree(priv); + aio_put_req(iocb); + return len; } static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) @@ -584,7 +600,7 @@ spin_lock(&epdata->dev->lock); priv->req = NULL; priv->epdata = NULL; - if (priv->ubuf == NULL + if (priv->iv == NULL || unlikely(req->actual == 0) || unlikely(kiocbIsCancelled(iocb))) { kfree(req->buf); @@ -619,7 +635,8 @@ char *buf, size_t len, struct ep_data *epdata, - char __user *ubuf + const struct iovec *iv, + unsigned long nr_segs ) { struct kiocb_priv *priv; @@ -634,7 +651,8 @@ return value; } iocb->private = priv; - priv->ubuf = ubuf; + priv->iv = iv; + priv->nr_segs = nr_segs; value = get_ready_ep(iocb->ki_filp->f_flags, epdata); if (unlikely(value < 0)) { @@ -674,41 +692,53 @@ kfree(priv); put_ep(epdata); } else - value = (ubuf ? -EIOCBRETRY : -EIOCBQUEUED); + value = (iv ? -EIOCBRETRY : -EIOCBQUEUED); return value; } static ssize_t -ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o) +ep_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t o) { struct ep_data *epdata = iocb->ki_filp->private_data; char *buf; if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN)) return -EINVAL; - buf = kmalloc(len, GFP_KERNEL); + + buf = kmalloc(iocb->ki_left, GFP_KERNEL); if (unlikely(!buf)) return -ENOMEM; + iocb->ki_retry = ep_aio_read_retry; - return ep_aio_rwtail(iocb, buf, len, epdata, ubuf); + return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs); } static ssize_t -ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o) +ep_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t o) { struct ep_data *epdata = iocb->ki_filp->private_data; char *buf; + size_t len = 0; + int i = 0; if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN))) return -EINVAL; - buf = kmalloc(len, GFP_KERNEL); + + buf = kmalloc(iocb->ki_left, GFP_KERNEL); if (unlikely(!buf)) return -ENOMEM; - if (unlikely(copy_from_user(buf, ubuf, len) != 0)) { - kfree(buf); - return -EFAULT; + + for (i=0; i < nr_segs; i++) { + if (unlikely(copy_from_user(&buf[len], iov[i].iov_base, + iov[i].iov_len) != 0)) { + kfree(buf); + return -EFAULT; + } + len += iov[i].iov_len; } - return ep_aio_rwtail(iocb, buf, len, epdata, NULL); + return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0); } /*----------------------------------------------------------------------*/ diff -urN oldtree/fs/Kconfig newtree/fs/Kconfig --- oldtree/fs/Kconfig 2006-09-29 15:59:29.000000000 -0400 +++ newtree/fs/Kconfig 2006-09-30 04:24:40.000000000 -0400 @@ -990,6 +990,18 @@ To compile this file system support as a module, choose M here: the module will be called affs. If unsure, say N. +config ECRYPT_FS + tristate "eCrypt filesystem layer support (EXPERIMENTAL)" + depends on EXPERIMENTAL && KEYS && CRYPTO + help + Encrypted filesystem that operates on the VFS layer. See + to learn more about + eCryptfs. Userspace components are required and can be + obtained from . + + To compile this file system support as a module, choose M here: the + module will be called ecryptfs. + config HFS_FS tristate "Apple Macintosh file system support (EXPERIMENTAL)" depends on EXPERIMENTAL @@ -1930,8 +1942,7 @@ For most cases you probably want to say N. config AFS_FS -# for fs/nls/Config.in - tristate "Andrew File System support (AFS) (Experimental)" + tristate "Andrew File System support (AFS) (EXPERIMENTAL)" depends on INET && EXPERIMENTAL select RXRPC help diff -urN oldtree/fs/Makefile newtree/fs/Makefile --- oldtree/fs/Makefile 2006-09-29 15:59:29.000000000 -0400 +++ newtree/fs/Makefile 2006-09-30 04:24:37.000000000 -0400 @@ -71,6 +71,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ obj-$(CONFIG_HFS_FS) += hfs/ +obj-$(CONFIG_ECRYPT_FS) += ecryptfs/ obj-$(CONFIG_VXFS_FS) += freevxfs/ obj-$(CONFIG_NFS_FS) += nfs/ obj-$(CONFIG_EXPORTFS) += exportfs/ diff -urN oldtree/fs/adfs/file.c newtree/fs/adfs/file.c --- oldtree/fs/adfs/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/adfs/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -27,10 +27,12 @@ const struct file_operations adfs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .mmap = generic_file_mmap, .fsync = file_fsync, - .write = generic_file_write, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .sendfile = generic_file_sendfile, }; diff -urN oldtree/fs/affs/file.c newtree/fs/affs/file.c --- oldtree/fs/affs/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/affs/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -27,8 +27,10 @@ const struct file_operations affs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .open = affs_file_open, .release = affs_file_release, diff -urN oldtree/fs/aio.c newtree/fs/aio.c --- oldtree/fs/aio.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/aio.c 2006-09-30 04:18:01.000000000 -0400 @@ -15,6 +15,7 @@ #include #include #include +#include #define DEBUG 0 @@ -414,6 +415,7 @@ req->ki_retry = NULL; req->ki_dtor = NULL; req->private = NULL; + req->ki_iovec = NULL; INIT_LIST_HEAD(&req->ki_run_list); /* Check if the completion queue has enough free space to @@ -459,6 +461,8 @@ if (req->ki_dtor) req->ki_dtor(req); + if (req->ki_iovec != &req->ki_inline_vec) + kfree(req->ki_iovec); kmem_cache_free(kiocb_cachep, req); ctx->reqs_active--; @@ -1300,63 +1304,63 @@ return -EINVAL; } -/* - * aio_p{read,write} are the default ki_retry methods for - * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially - * multiple calls to f_op->aio_read(). They loop around partial progress - * instead of returning -EIOCBRETRY because they don't have the means to call - * kick_iocb(). - */ -static ssize_t aio_pread(struct kiocb *iocb) +static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret) { - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - ssize_t ret = 0; - - do { - ret = file->f_op->aio_read(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); - /* - * Can't just depend on iocb->ki_left to determine - * whether we are done. This may have been a short read. - */ - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; - } + struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg]; - /* - * For pipes and sockets we return once we have some data; for - * regular files we retry till we complete the entire read or - * find that we can't read any more data (e.g short reads). - */ - } while (ret > 0 && iocb->ki_left > 0 && - !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); + BUG_ON(ret <= 0); - /* This means we must have transferred all that we could */ - /* No need to retry anymore */ - if ((ret == 0) || (iocb->ki_left == 0)) - ret = iocb->ki_nbytes - iocb->ki_left; + while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) { + ssize_t this = min((ssize_t)iov->iov_len, ret); + iov->iov_base += this; + iov->iov_len -= this; + iocb->ki_left -= this; + ret -= this; + if (iov->iov_len == 0) { + iocb->ki_cur_seg++; + iov++; + } + } - return ret; + /* the caller should not have done more io than what fit in + * the remaining iovecs */ + BUG_ON(ret > 0 && iocb->ki_left == 0); } -/* see aio_pread() */ -static ssize_t aio_pwrite(struct kiocb *iocb) +static ssize_t aio_rw_vect_retry(struct kiocb *iocb) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t (*rw_op)(struct kiocb *, const struct iovec *, + unsigned long, loff_t); ssize_t ret = 0; + unsigned short opcode; + + if ((iocb->ki_opcode == IOCB_CMD_PREADV) || + (iocb->ki_opcode == IOCB_CMD_PREAD)) { + rw_op = file->f_op->aio_read; + opcode = IOCB_CMD_PREADV; + } else { + rw_op = file->f_op->aio_write; + opcode = IOCB_CMD_PWRITEV; + } do { - ret = file->f_op->aio_write(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); - if (ret > 0) { - iocb->ki_buf += ret; - iocb->ki_left -= ret; - } - } while (ret > 0 && iocb->ki_left > 0); + ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], + iocb->ki_nr_segs - iocb->ki_cur_seg, + iocb->ki_pos); + if (ret > 0) + aio_advance_iovec(iocb, ret); + + /* retry all partial writes. retry partial reads as long as its a + * regular file. */ + } while (ret > 0 && iocb->ki_left > 0 && + (opcode == IOCB_CMD_PWRITEV || + (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); + /* This means we must have transferred all that we could */ + /* No need to retry anymore */ if ((ret == 0) || (iocb->ki_left == 0)) ret = iocb->ki_nbytes - iocb->ki_left; @@ -1383,6 +1387,38 @@ return ret; } +static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb) +{ + ssize_t ret; + + ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf, + kiocb->ki_nbytes, 1, + &kiocb->ki_inline_vec, &kiocb->ki_iovec); + if (ret < 0) + goto out; + + kiocb->ki_nr_segs = kiocb->ki_nbytes; + kiocb->ki_cur_seg = 0; + /* ki_nbytes/left now reflect bytes instead of segs */ + kiocb->ki_nbytes = ret; + kiocb->ki_left = ret; + + ret = 0; +out: + return ret; +} + +static ssize_t aio_setup_single_vector(struct kiocb *kiocb) +{ + kiocb->ki_iovec = &kiocb->ki_inline_vec; + kiocb->ki_iovec->iov_base = kiocb->ki_buf; + kiocb->ki_iovec->iov_len = kiocb->ki_left; + kiocb->ki_nr_segs = 1; + kiocb->ki_cur_seg = 0; + kiocb->ki_nbytes = kiocb->ki_left; + return 0; +} + /* * aio_setup_iocb: * Performs the initial checks and aio retry method @@ -1405,9 +1441,12 @@ ret = security_file_permission(file, MAY_READ); if (unlikely(ret)) break; + ret = aio_setup_single_vector(kiocb); + if (ret) + break; ret = -EINVAL; if (file->f_op->aio_read) - kiocb->ki_retry = aio_pread; + kiocb->ki_retry = aio_rw_vect_retry; break; case IOCB_CMD_PWRITE: ret = -EBADF; @@ -1420,9 +1459,40 @@ ret = security_file_permission(file, MAY_WRITE); if (unlikely(ret)) break; + ret = aio_setup_single_vector(kiocb); + if (ret) + break; + ret = -EINVAL; + if (file->f_op->aio_write) + kiocb->ki_retry = aio_rw_vect_retry; + break; + case IOCB_CMD_PREADV: + ret = -EBADF; + if (unlikely(!(file->f_mode & FMODE_READ))) + break; + ret = security_file_permission(file, MAY_READ); + if (unlikely(ret)) + break; + ret = aio_setup_vectored_rw(READ, kiocb); + if (ret) + break; + ret = -EINVAL; + if (file->f_op->aio_read) + kiocb->ki_retry = aio_rw_vect_retry; + break; + case IOCB_CMD_PWRITEV: + ret = -EBADF; + if (unlikely(!(file->f_mode & FMODE_WRITE))) + break; + ret = security_file_permission(file, MAY_WRITE); + if (unlikely(ret)) + break; + ret = aio_setup_vectored_rw(WRITE, kiocb); + if (ret) + break; ret = -EINVAL; if (file->f_op->aio_write) - kiocb->ki_retry = aio_pwrite; + kiocb->ki_retry = aio_rw_vect_retry; break; case IOCB_CMD_FDSYNC: ret = -EINVAL; diff -urN oldtree/fs/bad_inode.c newtree/fs/bad_inode.c --- oldtree/fs/bad_inode.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/bad_inode.c 2006-09-30 04:17:01.000000000 -0400 @@ -40,8 +40,6 @@ .aio_fsync = EIO_ERROR, .fasync = EIO_ERROR, .lock = EIO_ERROR, - .readv = EIO_ERROR, - .writev = EIO_ERROR, .sendfile = EIO_ERROR, .sendpage = EIO_ERROR, .get_unmapped_area = EIO_ERROR, diff -urN oldtree/fs/bfs/file.c newtree/fs/bfs/file.c --- oldtree/fs/bfs/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/bfs/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -19,8 +19,10 @@ const struct file_operations bfs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, }; diff -urN oldtree/fs/block_dev.c newtree/fs/block_dev.c --- oldtree/fs/block_dev.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/block_dev.c 2006-09-30 04:17:37.000000000 -0400 @@ -1152,22 +1152,6 @@ return blkdev_put(bdev); } -static ssize_t blkdev_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return generic_file_write_nolock(file, &local_iov, 1, ppos); -} - -static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) -{ - struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); -} - static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); @@ -1187,18 +1171,16 @@ .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, - .read = generic_file_read, - .write = blkdev_file_write, + .read = do_sync_read, + .write = do_sync_write, .aio_read = generic_file_aio_read, - .aio_write = blkdev_file_aio_write, + .aio_write = generic_file_aio_write_nolock, .mmap = generic_file_mmap, .fsync = block_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif - .readv = generic_file_readv, - .writev = generic_file_write_nolock, .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, diff -urN oldtree/fs/cifs/cifsfs.c newtree/fs/cifs/cifsfs.c --- oldtree/fs/cifs/cifsfs.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/cifs/cifsfs.c 2006-09-30 04:17:01.000000000 -0400 @@ -480,25 +480,13 @@ return simple_set_mnt(mnt, sb); } -static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct inode *inode = file->f_dentry->d_inode; - ssize_t written; - - written = generic_file_writev(file, iov, nr_segs, ppos); - if (!CIFS_I(inode)->clientCanCacheAll) - filemap_fdatawrite(inode->i_mapping); - return written; -} - -static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_dentry->d_inode; ssize_t written; - written = generic_file_aio_write(iocb, buf, count, pos); + written = generic_file_aio_write(iocb, iov, nr_segs, pos); if (!CIFS_I(inode)->clientCanCacheAll) filemap_fdatawrite(inode->i_mapping); return written; @@ -577,8 +565,6 @@ const struct file_operations cifs_file_ops = { .read = do_sync_read, .write = do_sync_write, - .readv = generic_file_readv, - .writev = cifs_file_writev, .aio_read = generic_file_aio_read, .aio_write = cifs_file_aio_write, .open = cifs_open, @@ -620,8 +606,6 @@ const struct file_operations cifs_file_nobrl_ops = { .read = do_sync_read, .write = do_sync_write, - .readv = generic_file_readv, - .writev = cifs_file_writev, .aio_read = generic_file_aio_read, .aio_write = cifs_file_aio_write, .open = cifs_open, diff -urN oldtree/fs/compat.c newtree/fs/compat.c --- oldtree/fs/compat.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/compat.c 2006-09-30 04:19:34.000000000 -0400 @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include /* siocdevprivate_ioctl */ @@ -69,6 +69,8 @@ return ret; } +#include "read_write.h" + /* * Not all architectures have sys_utime, so implement this in terms * of sys_utimes. @@ -1153,9 +1155,6 @@ const struct compat_iovec __user *uvector, unsigned long nr_segs, loff_t *pos) { - typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); - typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); - compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov=iovstack, *vector; @@ -1238,39 +1237,18 @@ fnv = NULL; if (type == READ) { fn = file->f_op->read; - fnv = file->f_op->readv; + fnv = file->f_op->aio_read; } else { fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->writev; - } - if (fnv) { - ret = fnv(file, iov, nr_segs, pos); - goto out; + fnv = file->f_op->aio_write; } - /* Do it by hand, with file-ops */ - ret = 0; - vector = iov; - while (nr_segs > 0) { - void __user * base; - size_t len; - ssize_t nr; - - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - nr = fn(file, base, len, pos); + if (fnv) + ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, + pos, fnv); + else + ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); - if (nr < 0) { - if (!ret) ret = nr; - break; - } - ret += nr; - if (nr != len) - break; - } out: if (iov != iovstack) kfree(iov); @@ -1298,7 +1276,7 @@ goto out; ret = -EINVAL; - if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) + if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) goto out; ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos); @@ -1321,7 +1299,7 @@ goto out; ret = -EINVAL; - if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) + if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) goto out; ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos); diff -urN oldtree/fs/ecryptfs/Makefile newtree/fs/ecryptfs/Makefile --- oldtree/fs/ecryptfs/Makefile 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/Makefile 2006-09-30 04:24:53.000000000 -0400 @@ -0,0 +1,7 @@ +# +# Makefile for the Linux 2.6 eCryptfs +# + +obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o + +ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o debug.o diff -urN oldtree/fs/ecryptfs/crypto.c newtree/fs/ecryptfs/crypto.c --- oldtree/fs/ecryptfs/crypto.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/crypto.c 2006-09-30 04:33:04.000000000 -0400 @@ -0,0 +1,1663 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2004 Erez Zadok + * Copyright (C) 2001-2004 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * Michael C. Thompson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ecryptfs_kernel.h" + +static int +ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, int dst_offset, + struct page *src_page, int src_offset, int size, + unsigned char *iv); +static int +ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, int dst_offset, + struct page *src_page, int src_offset, int size, + unsigned char *iv); + +/** + * ecryptfs_to_hex + * @dst: Buffer to take hex character representation of contents of + * src; must be at least of size (src_size * 2) + * @src: Buffer to be converted to a hex string respresentation + * @src_size: number of bytes to convert + */ +void ecryptfs_to_hex(char *dst, char *src, size_t src_size) +{ + int x; + + for (x = 0; x < src_size; x++) + sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]); +} + +/** + * ecryptfs_from_hex + * @dst: Buffer to take the bytes from src hex; must be at least of + * size (src_size / 2) + * @src: Buffer to be converted from a hex string respresentation to raw value + * @dst_size: size of dst buffer, or number of hex characters pairs to convert + */ +void ecryptfs_from_hex(char *dst, char *src, int dst_size) +{ + int x; + char tmp[3] = { 0, }; + + for (x = 0; x < dst_size; x++) { + tmp[0] = src[x * 2]; + tmp[1] = src[x * 2 + 1]; + dst[x] = (unsigned char)simple_strtol(tmp, NULL, 16); + } +} + +/** + * ecryptfs_calculate_md5 - calculates the md5 of @src + * @dst: Pointer to 16 bytes of allocated memory + * @crypt_stat: Pointer to crypt_stat struct for the current inode + * @src: Data to be md5'd + * @len: Length of @src + * + * Uses the allocated crypto context that crypt_stat references to + * generate the MD5 sum of the contents of src. + */ +static int ecryptfs_calculate_md5(char *dst, + struct ecryptfs_crypt_stat *crypt_stat, + char *src, int len) +{ + int rc = 0; + struct scatterlist sg; + + mutex_lock(&crypt_stat->cs_md5_tfm_mutex); + sg_init_one(&sg, (u8 *)src, len); + if (!crypt_stat->md5_tfm) { + crypt_stat->md5_tfm = + crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); + if (!crypt_stat->md5_tfm) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error attempting to " + "allocate crypto context\n"); + goto out; + } + } + crypto_digest_init(crypt_stat->md5_tfm); + crypto_digest_update(crypt_stat->md5_tfm, &sg, 1); + crypto_digest_final(crypt_stat->md5_tfm, dst); + mutex_unlock(&crypt_stat->cs_md5_tfm_mutex); +out: + return rc; +} + +/** + * ecryptfs_derive_iv + * @iv: destination for the derived iv vale + * @crypt_stat: Pointer to crypt_stat struct for the current inode + * @offset: Offset of the page whose's iv we are to derive + * + * Generate the initialization vector from the given root IV and page + * offset. + * + * Returns zero on success; non-zero on error. + */ +static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, + pgoff_t offset) +{ + int rc = 0; + char dst[MD5_DIGEST_SIZE]; + char src[ECRYPTFS_MAX_IV_BYTES + 16]; + + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "root iv:\n"); + ecryptfs_dump_hex(crypt_stat->root_iv, crypt_stat->iv_bytes); + } + /* TODO: It is probably secure to just cast the least + * significant bits of the root IV into an unsigned long and + * add the offset to that rather than go through all this + * hashing business. -Halcrow */ + memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes); + memset((src + crypt_stat->iv_bytes), 0, 16); + snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "source:\n"); + ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16)); + } + rc = ecryptfs_calculate_md5(dst, crypt_stat, src, + (crypt_stat->iv_bytes + 16)); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error attempting to compute " + "MD5 while generating IV for a page\n"); + goto out; + } + memcpy(iv, dst, crypt_stat->iv_bytes); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "derived iv:\n"); + ecryptfs_dump_hex(iv, crypt_stat->iv_bytes); + } +out: + return rc; +} + +/** + * ecryptfs_init_crypt_stat + * @crypt_stat: Pointer to the crypt_stat struct to initialize. + * + * Initialize the crypt_stat structure. + */ +void +ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) +{ + memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); + mutex_init(&crypt_stat->cs_mutex); + mutex_init(&crypt_stat->cs_tfm_mutex); + mutex_init(&crypt_stat->cs_md5_tfm_mutex); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED); +} + +/** + * ecryptfs_destruct_crypt_stat + * @crypt_stat: Pointer to the crypt_stat struct to initialize. + * + * Releases all memory associated with a crypt_stat struct. + */ +void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) +{ + if (crypt_stat->tfm) + crypto_free_tfm(crypt_stat->tfm); + if (crypt_stat->md5_tfm) + crypto_free_tfm(crypt_stat->md5_tfm); + memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); +} + +void ecryptfs_destruct_mount_crypt_stat( + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + if (mount_crypt_stat->global_auth_tok_key) + key_put(mount_crypt_stat->global_auth_tok_key); + if (mount_crypt_stat->global_key_tfm) + crypto_free_tfm(mount_crypt_stat->global_key_tfm); + memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat)); +} + +/** + * virt_to_scatterlist + * @addr: Virtual address + * @size: Size of data; should be an even multiple of the block size + * @sg: Pointer to scatterlist array; set to NULL to obtain only + * the number of scatterlist structs required in array + * @sg_size: Max array size + * + * Fills in a scatterlist array with page references for a passed + * virtual address. + * + * Returns the number of scatterlist structs in array used + */ +int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, + int sg_size) +{ + int i = 0; + struct page *pg; + int offset; + int remainder_of_page; + + while (size > 0 && i < sg_size) { + pg = virt_to_page(addr); + offset = offset_in_page(addr); + if (sg) { + sg[i].page = pg; + sg[i].offset = offset; + } + remainder_of_page = PAGE_CACHE_SIZE - offset; + if (size >= remainder_of_page) { + if (sg) + sg[i].length = remainder_of_page; + addr += remainder_of_page; + size -= remainder_of_page; + } else { + if (sg) + sg[i].length = size; + addr += size; + size = 0; + } + i++; + } + if (size > 0) + return -ENOMEM; + return i; +} + +/** + * encrypt_scatterlist + * @crypt_stat: Pointer to the crypt_stat struct to initialize. + * @dest_sg: Destination of encrypted data + * @src_sg: Data to be encrypted + * @size: Length of data to be encrypted + * @iv: iv to use during encryption + * + * Returns the number of bytes encrypted; negative value on error + */ +static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, + struct scatterlist *dest_sg, + struct scatterlist *src_sg, int size, + unsigned char *iv) +{ + int rc = 0; + + BUG_ON(!crypt_stat || !crypt_stat->tfm + || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_STRUCT_INITIALIZED)); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n", + crypt_stat->key_size); + ecryptfs_dump_hex(crypt_stat->key, + crypt_stat->key_size); + } + /* Consider doing this once, when the file is opened */ + mutex_lock(&crypt_stat->cs_tfm_mutex); + rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key, + crypt_stat->key_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", + rc); + mutex_unlock(&crypt_stat->cs_tfm_mutex); + rc = -EINVAL; + goto out; + } + ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size); + crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv); + mutex_unlock(&crypt_stat->cs_tfm_mutex); +out: + return rc; +} + +static void +ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx, + int *byte_offset, + struct ecryptfs_crypt_stat *crypt_stat, + unsigned long extent_num) +{ + unsigned long lower_extent_num; + int extents_occupied_by_headers_at_front; + int bytes_occupied_by_headers_at_front; + int extent_offset; + int extents_per_page; + + bytes_occupied_by_headers_at_front = + ( crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front ); + extents_occupied_by_headers_at_front = + ( bytes_occupied_by_headers_at_front + / crypt_stat->extent_size ); + lower_extent_num = extents_occupied_by_headers_at_front + extent_num; + extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; + (*lower_page_idx) = lower_extent_num / extents_per_page; + extent_offset = lower_extent_num % extents_per_page; + (*byte_offset) = extent_offset * crypt_stat->extent_size; + ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = " + "[%d]\n", crypt_stat->header_extent_size); + ecryptfs_printk(KERN_DEBUG, " * crypt_stat->" + "num_header_extents_at_front = [%d]\n", + crypt_stat->num_header_extents_at_front); + ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_" + "front = [%d]\n", extents_occupied_by_headers_at_front); + ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n", + lower_extent_num); + ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n", + extents_per_page); + ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n", + (*lower_page_idx)); + ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n", + extent_offset); + ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n", + (*byte_offset)); +} + +static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx, + struct page *lower_page, + struct inode *lower_inode, + int byte_offset_in_page, int bytes_to_write) +{ + int rc = 0; + + if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { + rc = ecryptfs_commit_lower_page(lower_page, lower_inode, + ctx->param.lower_file, + byte_offset_in_page, + bytes_to_write); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error calling lower " + "commit; rc = [%d]\n", rc); + goto out; + } + } else { + rc = ecryptfs_writepage_and_release_lower_page(lower_page, + lower_inode, + ctx->param.wbc); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error calling lower " + "writepage(); rc = [%d]\n", rc); + goto out; + } + } +out: + return rc; +} + +static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx, + struct page **lower_page, + struct inode *lower_inode, + unsigned long lower_page_idx, + int byte_offset_in_page) +{ + int rc = 0; + + if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { + /* TODO: Limit this to only the data extents that are + * needed */ + rc = ecryptfs_get_lower_page(lower_page, lower_inode, + ctx->param.lower_file, + lower_page_idx, + byte_offset_in_page, + (PAGE_CACHE_SIZE + - byte_offset_in_page)); + if (rc) { + ecryptfs_printk( + KERN_ERR, "Error attempting to grab, map, " + "and prepare_write lower page with index " + "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc); + goto out; + } + } else { + rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, + lower_inode, + lower_page_idx); + if (rc) { + ecryptfs_printk( + KERN_ERR, "Error attempting to grab and map " + "lower page with index [0x%.16x]; rc = [%d]\n", + lower_page_idx, rc); + goto out; + } + } +out: + return rc; +} + +/** + * ecryptfs_encrypt_page + * @ctx: The context of the page + * + * Encrypt an eCryptfs page. This is done on a per-extent basis. Note + * that eCryptfs pages may straddle the lower pages -- for instance, + * if the file was created on a machine with an 8K page size + * (resulting in an 8K header), and then the file is copied onto a + * host with a 32K page size, then when reading page 0 of the eCryptfs + * file, 24K of page 0 of the lower file will be read and decrypted, + * and then 8K of page 1 of the lower file will be read and decrypted. + * + * The actual operations performed on each page depends on the + * contents of the ecryptfs_page_crypt_context struct. + * + * Returns zero on success; negative on error + */ +int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx) +{ + char extent_iv[ECRYPTFS_MAX_IV_BYTES]; + unsigned long base_extent; + unsigned long extent_offset = 0; + unsigned long lower_page_idx = 0; + unsigned long prior_lower_page_idx = 0; + struct page *lower_page; + struct inode *lower_inode; + struct ecryptfs_inode_info *inode_info; + struct ecryptfs_crypt_stat *crypt_stat; + int rc = 0; + int lower_byte_offset = 0; + int orig_byte_offset = 0; + int num_extents_per_page; +#define ECRYPTFS_PAGE_STATE_UNREAD 0 +#define ECRYPTFS_PAGE_STATE_READ 1 +#define ECRYPTFS_PAGE_STATE_MODIFIED 2 +#define ECRYPTFS_PAGE_STATE_WRITTEN 3 + int page_state; + + lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host); + inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host); + crypt_stat = &inode_info->crypt_stat; + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) { + rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode, + ctx->param.lower_file); + if (rc) + ecryptfs_printk(KERN_ERR, "Error attempting to copy " + "page at index [0x%.16x]\n", + ctx->page->index); + goto out; + } + num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; + base_extent = (ctx->page->index * num_extents_per_page); + page_state = ECRYPTFS_PAGE_STATE_UNREAD; + while (extent_offset < num_extents_per_page) { + ecryptfs_extent_to_lwr_pg_idx_and_offset( + &lower_page_idx, &lower_byte_offset, crypt_stat, + (base_extent + extent_offset)); + if (prior_lower_page_idx != lower_page_idx + && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) { + rc = ecryptfs_write_out_page(ctx, lower_page, + lower_inode, + orig_byte_offset, + (PAGE_CACHE_SIZE + - orig_byte_offset)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting " + "to write out page; rc = [%d]" + "\n", rc); + goto out; + } + page_state = ECRYPTFS_PAGE_STATE_WRITTEN; + } + if (page_state == ECRYPTFS_PAGE_STATE_UNREAD + || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) { + rc = ecryptfs_read_in_page(ctx, &lower_page, + lower_inode, lower_page_idx, + lower_byte_offset); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting " + "to read in lower page with " + "index [0x%.16x]; rc = [%d]\n", + lower_page_idx, rc); + goto out; + } + orig_byte_offset = lower_byte_offset; + prior_lower_page_idx = lower_page_idx; + page_state = ECRYPTFS_PAGE_STATE_READ; + } + BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED + || page_state == ECRYPTFS_PAGE_STATE_READ)); + rc = ecryptfs_derive_iv(extent_iv, crypt_stat, + (base_extent + extent_offset)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to " + "derive IV for extent [0x%.16x]; " + "rc = [%d]\n", + (base_extent + extent_offset), rc); + goto out; + } + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Encrypting extent " + "with iv:\n"); + ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); + ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " + "encryption:\n"); + ecryptfs_dump_hex((char *) + (page_address(ctx->page) + + (extent_offset + * crypt_stat->extent_size)), 8); + } + rc = ecryptfs_encrypt_page_offset( + crypt_stat, lower_page, lower_byte_offset, ctx->page, + (extent_offset * crypt_stat->extent_size), + crypt_stat->extent_size, extent_iv); + ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; " + "rc = [%d]\n", + (base_extent + extent_offset), rc); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " + "encryption:\n"); + ecryptfs_dump_hex((char *)(page_address(lower_page) + + lower_byte_offset), 8); + } + page_state = ECRYPTFS_PAGE_STATE_MODIFIED; + extent_offset++; + } + BUG_ON(orig_byte_offset != 0); + rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0, + (lower_byte_offset + + crypt_stat->extent_size)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to write out " + "page; rc = [%d]\n", rc); + goto out; + } +out: + return rc; +} + +/** + * ecryptfs_decrypt_page + * @file: The ecryptfs file + * @page: The page in ecryptfs to decrypt + * + * Decrypt an eCryptfs page. This is done on a per-extent basis. Note + * that eCryptfs pages may straddle the lower pages -- for instance, + * if the file was created on a machine with an 8K page size + * (resulting in an 8K header), and then the file is copied onto a + * host with a 32K page size, then when reading page 0 of the eCryptfs + * file, 24K of page 0 of the lower file will be read and decrypted, + * and then 8K of page 1 of the lower file will be read and decrypted. + * + * Returns zero on success; negative on error + */ +int ecryptfs_decrypt_page(struct file *file, struct page *page) +{ + char extent_iv[ECRYPTFS_MAX_IV_BYTES]; + unsigned long base_extent; + unsigned long extent_offset = 0; + unsigned long lower_page_idx = 0; + unsigned long prior_lower_page_idx = 0; + struct page *lower_page; + char *lower_page_virt = NULL; + struct inode *lower_inode; + struct ecryptfs_crypt_stat *crypt_stat; + int rc = 0; + int byte_offset; + int num_extents_per_page; + int page_state; + + crypt_stat = &(ecryptfs_inode_to_private( + page->mapping->host)->crypt_stat); + lower_inode = ecryptfs_inode_to_lower(page->mapping->host); + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) { + rc = ecryptfs_do_readpage(file, page, page->index); + if (rc) + ecryptfs_printk(KERN_ERR, "Error attempting to copy " + "page at index [0x%.16x]\n", + page->index); + goto out; + } + num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; + base_extent = (page->index * num_extents_per_page); + lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache, + SLAB_KERNEL); + if (!lower_page_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error getting page for encrypted " + "lower page(s)\n"); + goto out; + } + lower_page = virt_to_page(lower_page_virt); + page_state = ECRYPTFS_PAGE_STATE_UNREAD; + while (extent_offset < num_extents_per_page) { + ecryptfs_extent_to_lwr_pg_idx_and_offset( + &lower_page_idx, &byte_offset, crypt_stat, + (base_extent + extent_offset)); + if (prior_lower_page_idx != lower_page_idx + || page_state == ECRYPTFS_PAGE_STATE_UNREAD) { + rc = ecryptfs_do_readpage(file, lower_page, + lower_page_idx); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error reading " + "lower encrypted page; rc = " + "[%d]\n", rc); + goto out; + } + prior_lower_page_idx = lower_page_idx; + page_state = ECRYPTFS_PAGE_STATE_READ; + } + rc = ecryptfs_derive_iv(extent_iv, crypt_stat, + (base_extent + extent_offset)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to " + "derive IV for extent [0x%.16x]; rc = " + "[%d]\n", + (base_extent + extent_offset), rc); + goto out; + } + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Decrypting extent " + "with iv:\n"); + ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); + ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " + "decryption:\n"); + ecryptfs_dump_hex((lower_page_virt + byte_offset), 8); + } + rc = ecryptfs_decrypt_page_offset(crypt_stat, page, + (extent_offset + * crypt_stat->extent_size), + lower_page, byte_offset, + crypt_stat->extent_size, + extent_iv); + if (rc != crypt_stat->extent_size) { + ecryptfs_printk(KERN_ERR, "Error attempting to " + "decrypt extent [0x%.16x]\n", + (base_extent + extent_offset)); + goto out; + } + rc = 0; + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " + "decryption:\n"); + ecryptfs_dump_hex((char *)(page_address(page) + + byte_offset), 8); + } + extent_offset++; + } +out: + if (lower_page_virt) + kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt); + return rc; +} + +/** + * decrypt_scatterlist + * + * Returns the number of bytes decrypted; negative value on error + */ +static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, + struct scatterlist *dest_sg, + struct scatterlist *src_sg, int size, + unsigned char *iv) +{ + int rc = 0; + + /* Consider doing this once, when the file is opened */ + mutex_lock(&crypt_stat->cs_tfm_mutex); + rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key, + crypt_stat->key_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", + rc); + mutex_unlock(&crypt_stat->cs_tfm_mutex); + rc = -EINVAL; + goto out; + } + ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size); + rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, + iv); + mutex_unlock(&crypt_stat->cs_tfm_mutex); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n", + rc); + goto out; + } + rc = size; +out: + return rc; +} + +/** + * ecryptfs_encrypt_page_offset + * + * Returns the number of bytes encrypted + */ +static int +ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, int dst_offset, + struct page *src_page, int src_offset, int size, + unsigned char *iv) +{ + struct scatterlist src_sg, dst_sg; + + src_sg.page = src_page; + src_sg.offset = src_offset; + src_sg.length = size; + dst_sg.page = dst_page; + dst_sg.offset = dst_offset; + dst_sg.length = size; + return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); +} + +/** + * ecryptfs_decrypt_page_offset + * + * Returns the number of bytes decrypted + */ +static int +ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, int dst_offset, + struct page *src_page, int src_offset, int size, + unsigned char *iv) +{ + struct scatterlist src_sg, dst_sg; + + src_sg.page = src_page; + src_sg.offset = src_offset; + src_sg.length = size; + dst_sg.page = dst_page; + dst_sg.offset = dst_offset; + dst_sg.length = size; + return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); +} + +#define ECRYPTFS_MAX_SCATTERLIST_LEN 4 + +/** + * ecryptfs_init_crypt_ctx + * @crypt_stat: Uninitilized crypt stats structure + * + * Initialize the crypto context. + * + * TODO: Performance: Keep a cache of initialized cipher contexts; + * only init if needed + */ +int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) +{ + int rc = -EINVAL; + + if (!crypt_stat->cipher) { + ecryptfs_printk(KERN_ERR, "No cipher specified\n"); + goto out; + } + ecryptfs_printk(KERN_DEBUG, + "Initializing cipher [%s]; strlen = [%d]; " + "key_size_bits = [%d]\n", + crypt_stat->cipher, (int)strlen(crypt_stat->cipher), + crypt_stat->key_size << 3); + if (crypt_stat->tfm) { + rc = 0; + goto out; + } + mutex_lock(&crypt_stat->cs_tfm_mutex); + crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher, + ECRYPTFS_DEFAULT_CHAINING_MODE + | CRYPTO_TFM_REQ_WEAK_KEY); + mutex_unlock(&crypt_stat->cs_tfm_mutex); + if (!crypt_stat->tfm) { + ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " + "Error initializing cipher [%s]\n", + crypt_stat->cipher); + goto out; + } + rc = 0; +out: + return rc; +} + +static void set_extent_mask_and_shift(struct ecryptfs_crypt_stat *crypt_stat) +{ + int extent_size_tmp; + + crypt_stat->extent_mask = 0xFFFFFFFF; + crypt_stat->extent_shift = 0; + if (crypt_stat->extent_size == 0) + return; + extent_size_tmp = crypt_stat->extent_size; + while ((extent_size_tmp & 0x01) == 0) { + extent_size_tmp >>= 1; + crypt_stat->extent_mask <<= 1; + crypt_stat->extent_shift++; + } +} + +void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) +{ + /* Default values; may be overwritten as we are parsing the + * packets. */ + crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; + set_extent_mask_and_shift(crypt_stat); + crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; + if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { + crypt_stat->header_extent_size = + ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; + } else + crypt_stat->header_extent_size = PAGE_CACHE_SIZE; + crypt_stat->num_header_extents_at_front = 1; +} + +/** + * ecryptfs_compute_root_iv + * @crypt_stats + * + * On error, sets the root IV to all 0's. + */ +int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat) +{ + int rc = 0; + char dst[MD5_DIGEST_SIZE]; + + BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE); + BUG_ON(crypt_stat->iv_bytes <= 0); + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID)) { + rc = -EINVAL; + ecryptfs_printk(KERN_WARNING, "Session key not valid; " + "cannot generate root IV\n"); + goto out; + } + rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key, + crypt_stat->key_size); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error attempting to compute " + "MD5 while generating root IV\n"); + goto out; + } + memcpy(crypt_stat->root_iv, dst, crypt_stat->iv_bytes); +out: + if (rc) { + memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes); + ECRYPTFS_SET_FLAG(crypt_stat->flags, + ECRYPTFS_SECURITY_WARNING); + } + return rc; +} + +static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat) +{ + get_random_bytes(crypt_stat->key, crypt_stat->key_size); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); + ecryptfs_compute_root_iv(crypt_stat); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n"); + ecryptfs_dump_hex(crypt_stat->key, + crypt_stat->key_size); + } +} + +/** + * ecryptfs_set_default_crypt_stat_vals + * @crypt_stat + * + * Default values in the event that policy does not override them. + */ +static void ecryptfs_set_default_crypt_stat_vals( + struct ecryptfs_crypt_stat *crypt_stat, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + ecryptfs_set_default_sizes(crypt_stat); + strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER); + crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES; + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); + crypt_stat->file_version = ECRYPTFS_FILE_VERSION; + crypt_stat->mount_crypt_stat = mount_crypt_stat; +} + +/** + * ecryptfs_new_file_context + * @ecryptfs_dentry + * + * If the crypto context for the file has not yet been established, + * this is where we do that. Establishing a new crypto context + * involves the following decisions: + * - What cipher to use? + * - What set of authentication tokens to use? + * Here we just worry about getting enough information into the + * authentication tokens so that we know that they are available. + * We associate the available authentication tokens with the new file + * via the set of signatures in the crypt_stat struct. Later, when + * the headers are actually written out, we may again defer to + * userspace to perform the encryption of the session key; for the + * foreseeable future, this will be the case with public key packets. + * + * Returns zero on success; non-zero otherwise + */ +/* Associate an authentication token(s) with the file */ +int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry) +{ + int rc = 0; + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + int cipher_name_len; + + ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat); + /* See if there are mount crypt options */ + if (mount_crypt_stat->global_auth_tok) { + ecryptfs_printk(KERN_DEBUG, "Initializing context for new " + "file using mount_crypt_stat\n"); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); + memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++], + mount_crypt_stat->global_auth_tok_sig, + ECRYPTFS_SIG_SIZE_HEX); + cipher_name_len = + strlen(mount_crypt_stat->global_default_cipher_name); + memcpy(crypt_stat->cipher, + mount_crypt_stat->global_default_cipher_name, + cipher_name_len); + crypt_stat->cipher[cipher_name_len] = '\0'; + crypt_stat->key_size = + mount_crypt_stat->global_default_cipher_key_size; + ecryptfs_generate_new_key(crypt_stat); + } else + /* We should not encounter this scenario since we + * should detect lack of global_auth_tok at mount time + * TODO: Applies to 0.1 release only; remove in future + * release */ + BUG(); + rc = ecryptfs_init_crypt_ctx(crypt_stat); + if (rc) + ecryptfs_printk(KERN_ERR, "Error initializing cryptographic " + "context for cipher [%s]: rc = [%d]\n", + crypt_stat->cipher, rc); + return rc; +} + +/** + * contains_ecryptfs_marker - check for the ecryptfs marker + * @data: The data block in which to check + * + * Returns one if marker found; zero if not found + */ +int contains_ecryptfs_marker(char *data) +{ + u32 m_1, m_2; + + memcpy(&m_1, data, 4); + m_1 = be32_to_cpu(m_1); + memcpy(&m_2, (data + 4), 4); + m_2 = be32_to_cpu(m_2); + if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) + return 1; + ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " + "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2, + MAGIC_ECRYPTFS_MARKER); + ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = " + "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER)); + return 0; +} + +struct ecryptfs_flag_map_elem { + u32 file_flag; + u32 local_flag; +}; + +/* Add support for additional flags by adding elements here. */ +static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = { + {0x00000001, ECRYPTFS_ENABLE_HMAC}, + {0x00000002, ECRYPTFS_ENCRYPTED} +}; + +/** + * ecryptfs_process_flags + * @crypt_stat + * @page_virt: Source data to be parsed + * @bytes_read: Updated with the number of bytes read + * + * Returns zero on success; non-zero if the flag set is invalid + */ +static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat, + char *page_virt, int *bytes_read) +{ + int rc = 0; + int i; + u32 flags; + + memcpy(&flags, page_virt, 4); + flags = be32_to_cpu(flags); + for (i = 0; i < ((sizeof(ecryptfs_flag_map) + / sizeof(struct ecryptfs_flag_map_elem))); i++) + if (flags & ecryptfs_flag_map[i].file_flag) { + ECRYPTFS_SET_FLAG(crypt_stat->flags, + ecryptfs_flag_map[i].local_flag); + } else + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, + ecryptfs_flag_map[i].local_flag); + /* Version is in top 8 bits of the 32-bit flag vector */ + crypt_stat->file_version = ((flags >> 24) & 0xFF); + (*bytes_read) = 4; + return rc; +} + +/** + * write_ecryptfs_marker + * @page_virt: The pointer to in a page to begin writing the marker + * @written: Number of bytes written + * + * Marker = 0x3c81b7f5 + */ +static void write_ecryptfs_marker(char *page_virt, size_t *written) +{ + u32 m_1, m_2; + + get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); + m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER); + m_1 = cpu_to_be32(m_1); + memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); + m_2 = cpu_to_be32(m_2); + memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2, + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); + (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; +} + +static void +write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, + size_t *written) +{ + u32 flags = 0; + int i; + + for (i = 0; i < ((sizeof(ecryptfs_flag_map) + / sizeof(struct ecryptfs_flag_map_elem))); i++) + if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ecryptfs_flag_map[i].local_flag)) + flags |= ecryptfs_flag_map[i].file_flag; + /* Version is in top 8 bits of the 32-bit flag vector */ + flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); + flags = cpu_to_be32(flags); + memcpy(page_virt, &flags, 4); + (*written) = 4; +} + +struct ecryptfs_cipher_code_str_map_elem { + char cipher_str[16]; + u16 cipher_code; +}; + +/* Add support for additional ciphers by adding elements here. The + * cipher_code is whatever OpenPGP applicatoins use to identify the + * ciphers. List in order of probability. */ +static struct ecryptfs_cipher_code_str_map_elem +ecryptfs_cipher_code_str_map[] = { + {"aes",RFC2440_CIPHER_AES_128 }, + {"blowfish", RFC2440_CIPHER_BLOWFISH}, + {"des3_ede", RFC2440_CIPHER_DES3_EDE}, + {"cast5", RFC2440_CIPHER_CAST_5}, + {"twofish", RFC2440_CIPHER_TWOFISH}, + {"cast6", RFC2440_CIPHER_CAST_6}, + {"aes", RFC2440_CIPHER_AES_192}, + {"aes", RFC2440_CIPHER_AES_256} +}; + +/** + * ecryptfs_code_for_cipher_string + * @str: The string representing the cipher name + * + * Returns zero on no match, or the cipher code on match + */ +u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) +{ + int i; + u16 code = 0; + struct ecryptfs_cipher_code_str_map_elem *map = + ecryptfs_cipher_code_str_map; + + if (strcmp(crypt_stat->cipher, "aes") == 0) { + switch (crypt_stat->key_size) { + case 16: + code = RFC2440_CIPHER_AES_128; + break; + case 24: + code = RFC2440_CIPHER_AES_192; + break; + case 32: + code = RFC2440_CIPHER_AES_256; + } + } else { + for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) + if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){ + code = map[i].cipher_code; + break; + } + } + return code; +} + +/** + * ecryptfs_cipher_code_to_string + * @str: Destination to write out the cipher name + * @cipher_code: The code to convert to cipher name string + * + * Returns zero on success + */ +int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code) +{ + int rc = 0; + int i; + + str[0] = '\0'; + for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) + if (cipher_code == ecryptfs_cipher_code_str_map[i].cipher_code) + strcpy(str, ecryptfs_cipher_code_str_map[i].cipher_str); + if (str[0] == '\0') { + ecryptfs_printk(KERN_WARNING, "Cipher code not recognized: " + "[%d]\n", cipher_code); + rc = -EINVAL; + } + return rc; +} + +/** + * ecryptfs_read_header_region + * @data + * @dentry + * @nd + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_read_header_region(char *data, struct dentry *dentry, + struct vfsmount *mnt) +{ + struct file *file; + mm_segment_t oldfs; + int rc; + + mnt = mntget(mnt); + file = dentry_open(dentry, mnt, O_RDONLY); + if (IS_ERR(file)) { + ecryptfs_printk(KERN_DEBUG, "Error opening file to " + "read header region\n"); + mntput(mnt); + rc = PTR_ERR(file); + goto out; + } + file->f_pos = 0; + oldfs = get_fs(); + set_fs(get_ds()); + /* For releases 0.1 and 0.2, all of the header information + * fits in the first data extent-sized region. */ + rc = file->f_op->read(file, (char __user *)data, + ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos); + set_fs(oldfs); + fput(file); + rc = 0; +out: + return rc; +} + +static void +write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, + size_t *written) +{ + u32 header_extent_size; + u16 num_header_extents_at_front; + + header_extent_size = (u32)crypt_stat->header_extent_size; + num_header_extents_at_front = + (u16)crypt_stat->num_header_extents_at_front; + header_extent_size = cpu_to_be32(header_extent_size); + memcpy(virt, &header_extent_size, 4); + virt += 4; + num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front); + memcpy(virt, &num_header_extents_at_front, 2); + (*written) = 6; +} + +struct kmem_cache *ecryptfs_header_cache_0; +struct kmem_cache *ecryptfs_header_cache_1; +struct kmem_cache *ecryptfs_header_cache_2; + +/** + * ecryptfs_write_headers_virt + * @page_virt + * @crypt_stat + * @ecryptfs_dentry + * + * Format version: 1 + * + * Header Extent: + * Octets 0-7: Unencrypted file size (big-endian) + * Octets 8-15: eCryptfs special marker + * Octets 16-19: Flags + * Octet 16: File format version number (between 0 and 255) + * Octets 17-18: Reserved + * Octet 19: Bit 1 (lsb): Reserved + * Bit 2: Encrypted? + * Bits 3-8: Reserved + * Octets 20-23: Header extent size (big-endian) + * Octets 24-25: Number of header extents at front of file + * (big-endian) + * Octet 26: Begin RFC 2440 authentication token packet set + * Data Extent 0: + * Lower data (CBC encrypted) + * Data Extent 1: + * Lower data (CBC encrypted) + * ... + * + * Returns zero on success + */ +int ecryptfs_write_headers_virt(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry) +{ + int rc; + size_t written; + size_t offset; + + offset = ECRYPTFS_FILE_SIZE_BYTES; + write_ecryptfs_marker((page_virt + offset), &written); + offset += written; + write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); + offset += written; + write_header_metadata((page_virt + offset), crypt_stat, &written); + offset += written; + rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat, + ecryptfs_dentry, &written, + PAGE_CACHE_SIZE - offset); + if (rc) + ecryptfs_printk(KERN_WARNING, "Error generating key packet " + "set; rc = [%d]\n", rc); + return rc; +} + +/** + * ecryptfs_write_headers + * @lower_file: The lower file struct, which was returned from dentry_open + * + * Write the file headers out. This will likely involve a userspace + * callout, in which the session key is encrypted with one or more + * public keys and/or the passphrase necessary to do the encryption is + * retrieved via a prompt. Exactly what happens at this point should + * be policy-dependent. + * + * Returns zero on success; non-zero on error + */ +int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, + struct file *lower_file) +{ + mm_segment_t oldfs; + struct ecryptfs_crypt_stat *crypt_stat; + char *page_virt; + int current_header_page; + int header_pages; + int rc = 0; + + crypt_stat = &ecryptfs_inode_to_private( + ecryptfs_dentry->d_inode)->crypt_stat; + if (likely(ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_ENCRYPTED))) { + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_KEY_VALID)) { + ecryptfs_printk(KERN_DEBUG, "Key is " + "invalid; bailing out\n"); + rc = -EINVAL; + goto out; + } + } else { + rc = -EINVAL; + ecryptfs_printk(KERN_WARNING, + "Called with crypt_stat->encrypted == 0\n"); + goto out; + } + /* Released in this function */ + page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER); + if (!page_virt) { + ecryptfs_printk(KERN_ERR, "Out of memory\n"); + rc = -ENOMEM; + goto out; + } + memset(page_virt, 0, PAGE_CACHE_SIZE); + rc = ecryptfs_write_headers_virt(page_virt, crypt_stat, + ecryptfs_dentry); + if (unlikely(rc)) { + ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n"); + memset(page_virt, 0, PAGE_CACHE_SIZE); + goto out_free; + } + ecryptfs_printk(KERN_DEBUG, + "Writing key packet set to underlying file\n"); + lower_file->f_pos = 0; + oldfs = get_fs(); + set_fs(get_ds()); + ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->" + "write() w/ header page; lower_file->f_pos = " + "[0x%.16x]\n", lower_file->f_pos); + lower_file->f_op->write(lower_file, (char __user *)page_virt, + PAGE_CACHE_SIZE, &lower_file->f_pos); + header_pages = ((crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front) + / PAGE_CACHE_SIZE); + memset(page_virt, 0, PAGE_CACHE_SIZE); + current_header_page = 1; + while (current_header_page < header_pages) { + ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->" + "write() w/ zero'd page; lower_file->f_pos = " + "[0x%.16x]\n", lower_file->f_pos); + lower_file->f_op->write(lower_file, (char __user *)page_virt, + PAGE_CACHE_SIZE, &lower_file->f_pos); + current_header_page++; + } + set_fs(oldfs); + ecryptfs_printk(KERN_DEBUG, + "Done writing key packet set to underlying file.\n"); +out_free: + kmem_cache_free(ecryptfs_header_cache_0, page_virt); +out: + return rc; +} + +static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, + char *virt, int *bytes_read) +{ + int rc = 0; + u32 header_extent_size; + u16 num_header_extents_at_front; + + memcpy(&header_extent_size, virt, 4); + header_extent_size = be32_to_cpu(header_extent_size); + virt += 4; + memcpy(&num_header_extents_at_front, virt, 2); + num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front); + crypt_stat->header_extent_size = (int)header_extent_size; + crypt_stat->num_header_extents_at_front = + (int)num_header_extents_at_front; + (*bytes_read) = 6; + if ((crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front) + < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { + rc = -EINVAL; + ecryptfs_printk(KERN_WARNING, "Invalid header extent size: " + "[%d]\n", crypt_stat->header_extent_size); + } + return rc; +} + +/** + * set_default_header_data + * + * For version 0 file format; this function is only for backwards + * compatibility for files created with the prior versions of + * eCryptfs. + */ +static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) +{ + crypt_stat->header_extent_size = 4096; + crypt_stat->num_header_extents_at_front = 1; +} + +/** + * ecryptfs_read_headers_virt + * + * Read/parse the header data. The header format is detailed in the + * comment block for the ecryptfs_write_headers_virt() function. + * + * Returns zero on success + */ +static int ecryptfs_read_headers_virt(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry) +{ + int rc = 0; + int offset; + int bytes_read; + + ecryptfs_set_default_sizes(crypt_stat); + crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + offset = ECRYPTFS_FILE_SIZE_BYTES; + rc = contains_ecryptfs_marker(page_virt + offset); + if (rc == 0) { + ecryptfs_printk(KERN_WARNING, "Valid eCryptfs marker not " + "found\n"); + rc = -EINVAL; + goto out; + } + offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; + rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset), + &bytes_read); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error processing flags\n"); + goto out; + } + if (crypt_stat->file_version > ECRYPTFS_SUPPORTED_FILE_VERSION) { + ecryptfs_printk(KERN_WARNING, "File version is [%d]; only " + "file version [%d] is supported by this " + "version of eCryptfs\n", + crypt_stat->file_version, + ECRYPTFS_SUPPORTED_FILE_VERSION); + rc = -EINVAL; + goto out; + } + offset += bytes_read; + if (crypt_stat->file_version >= 1) { + rc = parse_header_metadata(crypt_stat, (page_virt + offset), + &bytes_read); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error reading header " + "metadata; rc = [%d]\n", rc); + } + offset += bytes_read; + } else + set_default_header_data(crypt_stat); + rc = ecryptfs_parse_packet_set(crypt_stat, (page_virt + offset), + ecryptfs_dentry); +out: + return rc; +} + +/** + * ecryptfs_read_headers + * + * Returns zero if valid headers found and parsed; non-zero otherwise + */ +int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, + struct file *lower_file) +{ + int rc = 0; + char *page_virt = NULL; + mm_segment_t oldfs; + ssize_t bytes_read; + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; + + /* Read the first page from the underlying file */ + page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER); + if (!page_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n"); + goto out; + } + lower_file->f_pos = 0; + oldfs = get_fs(); + set_fs(get_ds()); + bytes_read = lower_file->f_op->read(lower_file, + (char __user *)page_virt, + ECRYPTFS_DEFAULT_EXTENT_SIZE, + &lower_file->f_pos); + set_fs(oldfs); + if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) { + ecryptfs_printk(KERN_ERR, "Expected size of header not read." + "Instead [%d] bytes were read\n", bytes_read); + rc = -EINVAL; + goto out; + } + rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, + ecryptfs_dentry); + if (rc) { + ecryptfs_printk(KERN_DEBUG, "Valid eCryptfs headers not " + "found\n"); + rc = -EINVAL; + } +out: + if (page_virt) { + memset(page_virt, 0, PAGE_CACHE_SIZE); + kmem_cache_free(ecryptfs_header_cache_1, page_virt); + } + return rc; +} + +/** + * ecryptfs_encode_filename - converts a plaintext file name to cipher text + * @crypt_stat: The crypt_stat struct associated with the file anem to encode + * @name: The plaintext name + * @length: The length of the plaintext + * @encoded_name: The encypted name + * + * Encrypts and encodes a filename into something that constitutes a + * valid filename for a filesystem, with printable characters. + * + * We assume that we have a properly initialized crypto context, + * pointed to by crypt_stat->tfm. + * + * TODO: Implement filename decoding and decryption here, in place of + * memcpy. We are keeping the framework around for now to (1) + * facilitate testing of the components needed to implement filename + * encryption and (2) to provide a code base from which other + * developers in the community can easily implement this feature. + * + * Returns the length of encoded filename; negative if error + */ +int +ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, + const char *name, int length, char **encoded_name) +{ + int error = 0; + + (*encoded_name) = kmalloc(length + 2, GFP_KERNEL); + if (!(*encoded_name)) { + error = -ENOMEM; + goto out; + } + /* TODO: Filename encryption is a scheduled feature for a + * future version of eCryptfs. This function is here only for + * the purpose of providing a framework for other developers + * to easily implement filename encryption. Hint: Replace this + * memcpy() with a call to encrypt and encode the + * filename, the set the length accordingly. */ + memcpy((void *)(*encoded_name), (void *)name, length); + (*encoded_name)[length] = '\0'; + error = length + 1; +out: + return error; +} + +/** + * ecryptfs_decode_filename - converts the cipher text name to plaintext + * @crypt_stat: The crypt_stat struct associated with the file + * @name: The filename in cipher text + * @length: The length of the cipher text name + * @decrypted_name: The plaintext name + * + * Decodes and decrypts the filename. + * + * We assume that we have a properly initialized crypto context, + * pointed to by crypt_stat->tfm. + * + * TODO: Implement filename decoding and decryption here, in place of + * memcpy. We are keeping the framework around for now to (1) + * facilitate testing of the components needed to implement filename + * encryption and (2) to provide a code base from which other + * developers in the community can easily implement this feature. + * + * Returns the length of decoded filename; negative if error + */ +int +ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, + const char *name, int length, char **decrypted_name) +{ + int error = 0; + + (*decrypted_name) = kmalloc(length + 2, GFP_KERNEL); + if (!(*decrypted_name)) { + error = -ENOMEM; + goto out; + } + /* TODO: Filename encryption is a scheduled feature for a + * future version of eCryptfs. This function is here only for + * the purpose of providing a framework for other developers + * to easily implement filename encryption. Hint: Replace this + * memcpy() with a call to decode and decrypt the + * filename, the set the length accordingly. */ + memcpy((void *)(*decrypted_name), (void *)name, length); + (*decrypted_name)[length + 1] = '\0'; /* Only for convenience + * in printing out the + * string in debug + * messages */ + error = length; +out: + return error; +} + +/** + * ecryptfs_process_cipher - Perform cipher initialization. + * @tfm: Crypto context set by this function + * @key_tfm: Crypto context for key material, set by this function + * @cipher_name: Name of the cipher. + * @key_size: Size of the key in bytes. + * + * Returns zero on success. Any crypto_tfm structs allocated here + * should be released by other functions, such as on a superblock put + * event, regardless of whether this function succeeds for fails. + */ +int +ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm, + char *cipher_name, size_t key_size) +{ + char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; + int rc; + + *tfm = *key_tfm = NULL; + if (key_size > ECRYPTFS_MAX_KEY_BYTES) { + rc = -EINVAL; + printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum " + "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES); + goto out; + } + *tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE + | CRYPTO_TFM_REQ_WEAK_KEY)); + if (!(*tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Unable to allocate crypto cipher with name " + "[%s]\n", cipher_name); + goto out; + } + *key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY); + if (!(*key_tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Unable to allocate crypto cipher with name " + "[%s]\n", cipher_name); + goto out; + } + if (key_size < crypto_tfm_alg_min_keysize(*tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Request key size is [%Zd]; minimum key size " + "supported by cipher [%s] is [%d]\n", key_size, + cipher_name, crypto_tfm_alg_min_keysize(*tfm)); + goto out; + } + if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Request key size is [%Zd]; minimum key size " + "supported by cipher [%s] is [%d]\n", key_size, + cipher_name, crypto_tfm_alg_min_keysize(*key_tfm)); + goto out; + } + if (key_size > crypto_tfm_alg_max_keysize(*tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Request key size is [%Zd]; maximum key size " + "supported by cipher [%s] is [%d]\n", key_size, + cipher_name, crypto_tfm_alg_min_keysize(*tfm)); + goto out; + } + if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Request key size is [%Zd]; maximum key size " + "supported by cipher [%s] is [%d]\n", key_size, + cipher_name, crypto_tfm_alg_min_keysize(*key_tfm)); + goto out; + } + get_random_bytes(dummy_key, key_size); + rc = crypto_cipher_setkey(*tfm, dummy_key, key_size); + if (rc) { + printk(KERN_ERR "Error attempting to set key of size [%Zd] for " + "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc); + rc = -EINVAL; + goto out; + } + rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size); + if (rc) { + printk(KERN_ERR "Error attempting to set key of size [%Zd] for " + "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc); + rc = -EINVAL; + goto out; + } +out: + return rc; +} diff -urN oldtree/fs/ecryptfs/debug.c newtree/fs/ecryptfs/debug.c --- oldtree/fs/ecryptfs/debug.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/debug.c 2006-09-30 04:33:46.000000000 -0400 @@ -0,0 +1,123 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * Functions only useful for debugging. + * + * Copyright (C) 2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include "ecryptfs_kernel.h" + +/** + * ecryptfs_dump_auth_tok - debug function to print auth toks + * + * This function will print the contents of an ecryptfs authentication + * token. + */ +void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok) +{ + char salt[ECRYPTFS_SALT_SIZE * 2 + 1]; + char sig[ECRYPTFS_SIG_SIZE_HEX + 1]; + + ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n", + auth_tok); + if (ECRYPTFS_CHECK_FLAG(auth_tok->flags, ECRYPTFS_PRIVATE_KEY)) { + ecryptfs_printk(KERN_DEBUG, " * private key type\n"); + ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT " + "IN ECRYPTFS VERSION 0.1)\n"); + } else { + ecryptfs_printk(KERN_DEBUG, " * passphrase type\n"); + ecryptfs_to_hex(salt, auth_tok->token.password.salt, + ECRYPTFS_SALT_SIZE); + salt[ECRYPTFS_SALT_SIZE * 2] = '\0'; + ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt); + if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags, + ECRYPTFS_PERSISTENT_PASSWORD)) { + ecryptfs_printk(KERN_DEBUG, " * persistent\n"); + } + memcpy(sig, auth_tok->token.password.signature, + ECRYPTFS_SIG_SIZE_HEX); + sig[ECRYPTFS_SIG_SIZE_HEX] = '\0'; + ecryptfs_printk(KERN_DEBUG, " * signature = [%s]\n", sig); + } + ecryptfs_printk(KERN_DEBUG, " * session_key.flags = [0x%x]\n", + auth_tok->session_key.flags); + if (auth_tok->session_key.flags + & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT) + ecryptfs_printk(KERN_DEBUG, + " * Userspace decrypt request set\n"); + if (auth_tok->session_key.flags + & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT) + ecryptfs_printk(KERN_DEBUG, + " * Userspace encrypt request set\n"); + if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_DECRYPTED_KEY) { + ecryptfs_printk(KERN_DEBUG, " * Contains decrypted key\n"); + ecryptfs_printk(KERN_DEBUG, + " * session_key.decrypted_key_size = [0x%x]\n", + auth_tok->session_key.decrypted_key_size); + ecryptfs_printk(KERN_DEBUG, " * Decrypted session key " + "dump:\n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(auth_tok->session_key.decrypted_key, + ECRYPTFS_DEFAULT_KEY_BYTES); + } + if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_ENCRYPTED_KEY) { + ecryptfs_printk(KERN_DEBUG, " * Contains encrypted key\n"); + ecryptfs_printk(KERN_DEBUG, + " * session_key.encrypted_key_size = [0x%x]\n", + auth_tok->session_key.encrypted_key_size); + ecryptfs_printk(KERN_DEBUG, " * Encrypted session key " + "dump:\n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(auth_tok->session_key.encrypted_key, + auth_tok->session_key. + encrypted_key_size); + } +} + +/** + * ecryptfs_dump_hex - debug hex printer + * @data: string of bytes to be printed + * @bytes: number of bytes to print + * + * Dump hexadecimal representation of char array + */ +void ecryptfs_dump_hex(char *data, int bytes) +{ + int i = 0; + int add_newline = 1; + + if (ecryptfs_verbosity < 1) + return; + if (bytes != 0) { + printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]); + i++; + } + while (i < bytes) { + printk("0x%.2x.", (unsigned char)data[i]); + i++; + if (i % 16 == 0) { + printk("\n"); + add_newline = 0; + } else + add_newline = 1; + } + if (add_newline) + printk("\n"); +} + diff -urN oldtree/fs/ecryptfs/dentry.c newtree/fs/ecryptfs/dentry.c --- oldtree/fs/ecryptfs/dentry.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/dentry.c 2006-09-30 04:33:04.000000000 -0400 @@ -0,0 +1,87 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include "ecryptfs_kernel.h" + +/** + * ecryptfs_d_revalidate - revalidate an ecryptfs dentry + * @dentry: The ecryptfs dentry + * @nd: The associated nameidata + * + * Called when the VFS needs to revalidate a dentry. This + * is called whenever a name lookup finds a dentry in the + * dcache. Most filesystems leave this as NULL, because all their + * dentries in the dcache are valid. + * + * Returns 1 if valid, 0 otherwise. + * + */ +static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct dentry *dentry_save; + struct vfsmount *vfsmount_save; + int rc = 1; + + if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) + goto out; + dentry_save = nd->dentry; + vfsmount_save = nd->mnt; + nd->dentry = lower_dentry; + nd->mnt = lower_mnt; + rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); + nd->dentry = dentry_save; + nd->mnt = vfsmount_save; +out: + return rc; +} + +struct kmem_cache *ecryptfs_dentry_info_cache; + +/** + * ecryptfs_d_release + * @dentry: The ecryptfs dentry + * + * Called when a dentry is really deallocated. + */ +static void ecryptfs_d_release(struct dentry *dentry) +{ + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (ecryptfs_dentry_to_private(dentry)) + kmem_cache_free(ecryptfs_dentry_info_cache, + ecryptfs_dentry_to_private(dentry)); + if (lower_dentry) + dput(lower_dentry); + return; +} + +struct dentry_operations ecryptfs_dops = { + .d_revalidate = ecryptfs_d_revalidate, + .d_release = ecryptfs_d_release, +}; diff -urN oldtree/fs/ecryptfs/ecryptfs_kernel.h newtree/fs/ecryptfs/ecryptfs_kernel.h --- oldtree/fs/ecryptfs/ecryptfs_kernel.h 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/ecryptfs_kernel.h 2006-09-30 04:33:46.000000000 -0400 @@ -0,0 +1,479 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * Kernel declarations. + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef ECRYPTFS_KERNEL_H +#define ECRYPTFS_KERNEL_H + +#include +#include +#include + +/* Version verification for shared data structures w/ userspace */ +#define ECRYPTFS_VERSION_MAJOR 0x00 +#define ECRYPTFS_VERSION_MINOR 0x04 +#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x01 +/* These flags indicate which features are supported by the kernel + * module; userspace tools such as the mount helper read + * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine + * how to behave. */ +#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 +#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 +#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 +#define ECRYPTFS_VERSIONING_POLICY 0x00000008 +#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE) + +#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 +#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH +#define ECRYPTFS_SALT_SIZE 8 +#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2) +/* The original signature size is only for what is stored on disk; all + * in-memory representations are expanded hex, so it better adapted to + * be passed around or referenced on the command line */ +#define ECRYPTFS_SIG_SIZE 8 +#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2) +#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX +#define ECRYPTFS_MAX_KEY_BYTES 64 +#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 +#define ECRYPTFS_DEFAULT_IV_BYTES 16 +#define ECRYPTFS_FILE_VERSION 0x01 +#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192 +#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 +#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 + +#define RFC2440_CIPHER_DES3_EDE 0x02 +#define RFC2440_CIPHER_CAST_5 0x03 +#define RFC2440_CIPHER_BLOWFISH 0x04 +#define RFC2440_CIPHER_AES_128 0x07 +#define RFC2440_CIPHER_AES_192 0x08 +#define RFC2440_CIPHER_AES_256 0x09 +#define RFC2440_CIPHER_TWOFISH 0x0a +#define RFC2440_CIPHER_CAST_6 0x0b + +#define ECRYPTFS_SET_FLAG(flag_bit_vector, flag) (flag_bit_vector |= (flag)) +#define ECRYPTFS_CLEAR_FLAG(flag_bit_vector, flag) (flag_bit_vector &= ~(flag)) +#define ECRYPTFS_CHECK_FLAG(flag_bit_vector, flag) (flag_bit_vector & (flag)) + +/** + * For convenience, we may need to pass around the encrypted session + * key between kernel and userspace because the authentication token + * may not be extractable. For example, the TPM may not release the + * private key, instead requiring the encrypted data and returning the + * decrypted data. + */ +struct ecryptfs_session_key { +#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001 +#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002 +#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004 +#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008 + u32 flags; + u32 encrypted_key_size; + u32 decrypted_key_size; + u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; + u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES]; +}; + +struct ecryptfs_password { + u32 password_bytes; + s32 hash_algo; + u32 hash_iterations; + u32 session_key_encryption_key_bytes; +#define ECRYPTFS_PERSISTENT_PASSWORD 0x01 +#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02 + u32 flags; + /* Iterated-hash concatenation of salt and passphrase */ + u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; + u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; + /* Always in expanded hex */ + u8 salt[ECRYPTFS_SALT_SIZE]; +}; + +enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY}; + +/* May be a password or a private key */ +struct ecryptfs_auth_tok { + u16 version; /* 8-bit major and 8-bit minor */ + u16 token_type; + u32 flags; + struct ecryptfs_session_key session_key; + u8 reserved[32]; + union { + struct ecryptfs_password password; + /* Private key is in future eCryptfs releases */ + } token; +} __attribute__ ((packed)); + +void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok); +extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size); +extern void ecryptfs_from_hex(char *dst, char *src, int dst_size); + +struct ecryptfs_key_record { + unsigned char type; + size_t enc_key_size; + unsigned char sig[ECRYPTFS_SIG_SIZE]; + unsigned char enc_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; +}; + +struct ecryptfs_auth_tok_list { + struct ecryptfs_auth_tok *auth_tok; + struct list_head list; +}; + +struct ecryptfs_crypt_stat; +struct ecryptfs_mount_crypt_stat; + +struct ecryptfs_page_crypt_context { + struct page *page; +#define ECRYPTFS_PREPARE_COMMIT_MODE 0 +#define ECRYPTFS_WRITEPAGE_MODE 1 + unsigned int mode; + union { + struct file *lower_file; + struct writeback_control *wbc; + } param; +}; + +static inline struct ecryptfs_auth_tok * +ecryptfs_get_key_payload_data(struct key *key) +{ + return (struct ecryptfs_auth_tok *) + (((struct user_key_payload*)key->payload.data)->data); +} + +#define ECRYPTFS_SUPER_MAGIC 0xf15f +#define ECRYPTFS_MAX_KEYSET_SIZE 1024 +#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 +#define ECRYPTFS_MAX_NUM_ENC_KEYS 64 +#define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */ +#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ +#define ECRYPTFS_SALT_BYTES 2 +#define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 +#define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ +#define ECRYPTFS_FILE_SIZE_BYTES 8 +#define ECRYPTFS_DEFAULT_CIPHER "aes" +#define ECRYPTFS_DEFAULT_KEY_BYTES 16 +#define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC +#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C +#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED +#define MD5_DIGEST_SIZE 16 + +/** + * This is the primary struct associated with each encrypted file. + * + * TODO: cache align/pack? + */ +struct ecryptfs_crypt_stat { +#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001 +#define ECRYPTFS_POLICY_APPLIED 0x00000002 +#define ECRYPTFS_NEW_FILE 0x00000004 +#define ECRYPTFS_ENCRYPTED 0x00000008 +#define ECRYPTFS_SECURITY_WARNING 0x00000010 +#define ECRYPTFS_ENABLE_HMAC 0x00000020 +#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 +#define ECRYPTFS_KEY_VALID 0x00000080 + u32 flags; + unsigned int file_version; + size_t iv_bytes; + size_t num_keysigs; + size_t header_extent_size; + size_t num_header_extents_at_front; + size_t extent_size; /* Data extent size; default is 4096 */ + size_t key_size; + size_t extent_shift; + unsigned int extent_mask; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + struct crypto_tfm *tfm; + struct crypto_tfm *md5_tfm; /* Crypto context for generating + * the initialization vectors */ + unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; + unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; + unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; + unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX]; + struct mutex cs_tfm_mutex; + struct mutex cs_md5_tfm_mutex; + struct mutex cs_mutex; +}; + +/* inode private data. */ +struct ecryptfs_inode_info { + struct inode vfs_inode; + struct inode *wii_inode; + struct ecryptfs_crypt_stat crypt_stat; +}; + +/* dentry private data. Each dentry must keep track of a lower + * vfsmount too. */ +struct ecryptfs_dentry_info { + struct dentry *wdi_dentry; + struct vfsmount *lower_mnt; + struct ecryptfs_crypt_stat *crypt_stat; +}; + +/** + * This struct is to enable a mount-wide passphrase/salt combo. This + * is more or less a stopgap to provide similar functionality to other + * crypto filesystems like EncFS or CFS until full policy support is + * implemented in eCryptfs. + */ +struct ecryptfs_mount_crypt_stat { + /* Pointers to memory we do not own, do not free these */ + struct ecryptfs_auth_tok *global_auth_tok; + struct key *global_auth_tok_key; + size_t global_default_cipher_key_size; + struct crypto_tfm *global_key_tfm; + struct mutex global_key_tfm_mutex; + unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + + 1]; + unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1]; +}; + +/* superblock private data. */ +struct ecryptfs_sb_info { + struct super_block *wsi_sb; + struct ecryptfs_mount_crypt_stat mount_crypt_stat; +}; + +/* file private data. */ +struct ecryptfs_file_info { + struct file *wfi_file; + struct ecryptfs_crypt_stat *crypt_stat; +}; + +/* auth_tok <=> encrypted_session_key mappings */ +struct ecryptfs_auth_tok_list_item { + unsigned char encrypted_session_key[ECRYPTFS_MAX_KEY_BYTES]; + struct list_head list; + struct ecryptfs_auth_tok auth_tok; +}; + +static inline struct ecryptfs_file_info * +ecryptfs_file_to_private(struct file *file) +{ + return (struct ecryptfs_file_info *)file->private_data; +} + +static inline void +ecryptfs_set_file_private(struct file *file, + struct ecryptfs_file_info *file_info) +{ + file->private_data = file_info; +} + +static inline struct file *ecryptfs_file_to_lower(struct file *file) +{ + return ((struct ecryptfs_file_info *)file->private_data)->wfi_file; +} + +static inline void +ecryptfs_set_file_lower(struct file *file, struct file *lower_file) +{ + ((struct ecryptfs_file_info *)file->private_data)->wfi_file = + lower_file; +} + +static inline struct ecryptfs_inode_info * +ecryptfs_inode_to_private(struct inode *inode) +{ + return container_of(inode, struct ecryptfs_inode_info, vfs_inode); +} + +static inline struct inode *ecryptfs_inode_to_lower(struct inode *inode) +{ + return ecryptfs_inode_to_private(inode)->wii_inode; +} + +static inline void +ecryptfs_set_inode_lower(struct inode *inode, struct inode *lower_inode) +{ + ecryptfs_inode_to_private(inode)->wii_inode = lower_inode; +} + +static inline struct ecryptfs_sb_info * +ecryptfs_superblock_to_private(struct super_block *sb) +{ + return (struct ecryptfs_sb_info *)sb->s_fs_info; +} + +static inline void +ecryptfs_set_superblock_private(struct super_block *sb, + struct ecryptfs_sb_info *sb_info) +{ + sb->s_fs_info = sb_info; +} + +static inline struct super_block * +ecryptfs_superblock_to_lower(struct super_block *sb) +{ + return ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb; +} + +static inline void +ecryptfs_set_superblock_lower(struct super_block *sb, + struct super_block *lower_sb) +{ + ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb = lower_sb; +} + +static inline struct ecryptfs_dentry_info * +ecryptfs_dentry_to_private(struct dentry *dentry) +{ + return (struct ecryptfs_dentry_info *)dentry->d_fsdata; +} + +static inline void +ecryptfs_set_dentry_private(struct dentry *dentry, + struct ecryptfs_dentry_info *dentry_info) +{ + dentry->d_fsdata = dentry_info; +} + +static inline struct dentry * +ecryptfs_dentry_to_lower(struct dentry *dentry) +{ + return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry; +} + +static inline void +ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry) +{ + ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry = + lower_dentry; +} + +static inline struct vfsmount * +ecryptfs_dentry_to_lower_mnt(struct dentry *dentry) +{ + return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt; +} + +static inline void +ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) +{ + ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt = + lower_mnt; +} + +#define ecryptfs_printk(type, fmt, arg...) \ + __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); +void __ecryptfs_printk(const char *fmt, ...); + +extern const struct file_operations ecryptfs_main_fops; +extern const struct file_operations ecryptfs_dir_fops; +extern struct inode_operations ecryptfs_main_iops; +extern struct inode_operations ecryptfs_dir_iops; +extern struct inode_operations ecryptfs_symlink_iops; +extern struct super_operations ecryptfs_sops; +extern struct dentry_operations ecryptfs_dops; +extern struct address_space_operations ecryptfs_aops; +extern int ecryptfs_verbosity; + +extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache; +extern struct kmem_cache *ecryptfs_file_info_cache; +extern struct kmem_cache *ecryptfs_dentry_info_cache; +extern struct kmem_cache *ecryptfs_inode_info_cache; +extern struct kmem_cache *ecryptfs_sb_info_cache; +extern struct kmem_cache *ecryptfs_header_cache_0; +extern struct kmem_cache *ecryptfs_header_cache_1; +extern struct kmem_cache *ecryptfs_header_cache_2; +extern struct kmem_cache *ecryptfs_lower_page_cache; + +int ecryptfs_interpose(struct dentry *hidden_dentry, + struct dentry *this_dentry, struct super_block *sb, + int flag); +int ecryptfs_fill_zeros(struct file *file, loff_t new_length); +int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, + const char *name, int length, + char **decrypted_name); +int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, + const char *name, int length, + char **encoded_name); +struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); +void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src); +void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src); +void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src); +void ecryptfs_dump_hex(char *data, int bytes); +int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, + int sg_size); +int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat); +void ecryptfs_rotate_iv(unsigned char *iv); +void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); +void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); +void ecryptfs_destruct_mount_crypt_stat( + struct ecryptfs_mount_crypt_stat *mount_crypt_stat); +int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_write_inode_size_to_header(struct file *lower_file, + struct inode *lower_inode, + struct inode *inode); +int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, + struct file *lower_file, + unsigned long lower_page_index, int byte_offset, + int region_bytes); +int +ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, + struct file *lower_file, int byte_offset, + int region_size); +int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, + struct file *lower_file); +int ecryptfs_do_readpage(struct file *file, struct page *page, + pgoff_t lower_page_index); +int ecryptfs_grab_and_map_lower_page(struct page **lower_page, + char **lower_virt, + struct inode *lower_inode, + unsigned long lower_page_index); +int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, + struct inode *lower_inode, + struct writeback_control *wbc); +int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx); +int ecryptfs_decrypt_page(struct file *file, struct page *page); +int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, + struct file *lower_file); +int ecryptfs_write_headers_virt(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry); +int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, + struct file *lower_file); +int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); +int contains_ecryptfs_marker(char *data); +int ecryptfs_read_header_region(char *data, struct dentry *dentry, + struct vfsmount *mnt); +u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code); +void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_generate_key_packet_set(char *dest_base, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry, + size_t *len, size_t max); +int process_request_key_err(long err_code); +int +ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, + unsigned char *src, struct dentry *ecryptfs_dentry); +int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); +int +ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm, + char *cipher_name, size_t key_size); +int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); +int ecryptfs_inode_set(struct inode *inode, void *lower_inode); +void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); + +#endif /* #ifndef ECRYPTFS_KERNEL_H */ diff -urN oldtree/fs/ecryptfs/file.c newtree/fs/ecryptfs/file.c --- oldtree/fs/ecryptfs/file.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/file.c 2006-09-30 04:33:04.000000000 -0400 @@ -0,0 +1,422 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2004 Erez Zadok + * Copyright (C) 2001-2004 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * Michael C. Thompson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ecryptfs_kernel.h" + +/** + * ecryptfs_llseek + * @file: File we are seeking in + * @offset: The offset to seek to + * @origin: 2 - offset from i_size; 1 - offset from f_pos + * + * Returns the position we have seeked to, or negative on error + */ +static loff_t ecryptfs_llseek(struct file *file, loff_t offset, int origin) +{ + loff_t rv; + loff_t new_end_pos; + int rc; + int expanding_file = 0; + struct inode *inode = file->f_mapping->host; + + /* If our offset is past the end of our file, we're going to + * need to grow it so we have a valid length of 0's */ + new_end_pos = offset; + switch (origin) { + case 2: + new_end_pos += i_size_read(inode); + expanding_file = 1; + break; + case 1: + new_end_pos += file->f_pos; + if (new_end_pos > i_size_read(inode)) { + ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) " + "> i_size_read(inode)(=[0x%.16x])\n", + new_end_pos, i_size_read(inode)); + expanding_file = 1; + } + break; + default: + if (new_end_pos > i_size_read(inode)) { + ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) " + "> i_size_read(inode)(=[0x%.16x])\n", + new_end_pos, i_size_read(inode)); + expanding_file = 1; + } + } + ecryptfs_printk(KERN_DEBUG, "new_end_pos = [0x%.16x]\n", new_end_pos); + if (expanding_file) { + rc = ecryptfs_truncate(file->f_dentry, new_end_pos); + if (rc) { + rv = rc; + ecryptfs_printk(KERN_ERR, "Error on attempt to " + "truncate to (higher) offset [0x%.16x];" + " rc = [%d]\n", new_end_pos, rc); + goto out; + } + } + rv = generic_file_llseek(file, offset, origin); +out: + return rv; +} + +/** + * ecryptfs_read_update_atime + * + * generic_file_read updates the atime of upper layer inode. But, it + * doesn't give us a chance to update the atime of the lower layer + * inode. This function is a wrapper to generic_file_read. It + * updates the atime of the lower level inode if generic_file_read + * returns without any errors. This is to be used only for file reads. + * The function to be used for directory reads is ecryptfs_read. + */ +static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + int rc; + struct dentry *lower_dentry; + struct vfsmount *lower_vfsmount; + struct file *file = iocb->ki_filp; + + rc = generic_file_aio_read(iocb, iov, nr_segs, pos); + /* + * Even though this is a async interface, we need to wait + * for IO to finish to update atime + */ + if (-EIOCBQUEUED == rc) + rc = wait_on_sync_kiocb(iocb); + if (rc >= 0) { + lower_dentry = ecryptfs_dentry_to_lower(file->f_dentry); + lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_dentry); + touch_atime(lower_vfsmount, lower_dentry); + } + return rc; +} + +struct ecryptfs_getdents_callback { + void *dirent; + struct dentry *dentry; + filldir_t filldir; + int err; + int filldir_called; + int entries_written; +}; + +/* Inspired by generic filldir in fs/readir.c */ +static int +ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, + ino_t ino, unsigned int d_type) +{ + struct ecryptfs_crypt_stat *crypt_stat; + struct ecryptfs_getdents_callback *buf = + (struct ecryptfs_getdents_callback *)dirent; + int rc; + int decoded_length; + char *decoded_name; + + crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat; + buf->filldir_called++; + decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen, + &decoded_name); + if (decoded_length < 0) { + rc = decoded_length; + goto out; + } + rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset, + ino, d_type); + kfree(decoded_name); + if (rc >= 0) + buf->entries_written++; +out: + return rc; +} + +/** + * ecryptfs_readdir + * @file: The ecryptfs file struct + * @dirent: Directory entry + * @filldir: The filldir callback function + */ +static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int rc; + struct file *lower_file; + struct inode *inode; + struct ecryptfs_getdents_callback buf; + + lower_file = ecryptfs_file_to_lower(file); + lower_file->f_pos = file->f_pos; + inode = file->f_dentry->d_inode; + memset(&buf, 0, sizeof(buf)); + buf.dirent = dirent; + buf.dentry = file->f_dentry; + buf.filldir = filldir; +retry: + buf.filldir_called = 0; + buf.entries_written = 0; + buf.err = 0; + rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); + if (buf.err) + rc = buf.err; + if (buf.filldir_called && !buf.entries_written) + goto retry; + file->f_pos = lower_file->f_pos; + if (rc >= 0) + ecryptfs_copy_attr_atime(inode, lower_file->f_dentry->d_inode); + return rc; +} + +struct kmem_cache *ecryptfs_file_info_cache; + +/** + * ecryptfs_open + * @inode: inode speciying file to open + * @file: Structure to return filled in + * + * Opens the file specified by inode. + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_open(struct inode *inode, struct file *file) +{ + int rc = 0; + struct ecryptfs_crypt_stat *crypt_stat = NULL; + struct dentry *ecryptfs_dentry = file->f_dentry; + /* Private value of ecryptfs_dentry allocated in + * ecryptfs_lookup() */ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + struct inode *lower_inode = NULL; + struct file *lower_file = NULL; + struct vfsmount *lower_mnt; + struct ecryptfs_file_info *file_info; + int lower_flags; + + /* Released in ecryptfs_release or end of function if failure */ + file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL); + ecryptfs_set_file_private(file, file_info); + if (!file_info) { + ecryptfs_printk(KERN_ERR, + "Error attempting to allocate memory\n"); + rc = -ENOMEM; + goto out; + } + memset(file_info, 0, sizeof(*file_info)); + lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + mutex_lock(&crypt_stat->cs_mutex); + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) { + ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n"); + /* Policy code enabled in future release */ + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); + } + mutex_unlock(&crypt_stat->cs_mutex); + /* This mntget & dget is undone via fput when the file is released */ + dget(lower_dentry); + lower_flags = file->f_flags; + if ((lower_flags & O_ACCMODE) == O_WRONLY) + lower_flags = (lower_flags & O_ACCMODE) | O_RDWR; + if (file->f_flags & O_APPEND) + lower_flags &= ~O_APPEND; + lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); + mntget(lower_mnt); + /* Corresponding fput() in ecryptfs_release() */ + lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags); + if (IS_ERR(lower_file)) { + rc = PTR_ERR(lower_file); + ecryptfs_printk(KERN_ERR, "Error opening lower file\n"); + goto out_puts; + } + ecryptfs_set_file_lower(file, lower_file); + /* Isn't this check the same as the one in lookup? */ + lower_inode = lower_dentry->d_inode; + if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); + rc = 0; + goto out; + } + mutex_lock(&crypt_stat->cs_mutex); + if (i_size_read(lower_inode) == 0) { + ecryptfs_printk(KERN_EMERG, "Zero-length lower file; " + "ecryptfs_create() had a problem?\n"); + rc = -ENOENT; + mutex_unlock(&crypt_stat->cs_mutex); + goto out_puts; + } else if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_POLICY_APPLIED) + || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_KEY_VALID)) { + rc = ecryptfs_read_headers(ecryptfs_dentry, lower_file); + if (rc) { + ecryptfs_printk(KERN_DEBUG, + "Valid headers not found\n"); + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, + ECRYPTFS_ENCRYPTED); + /* At this point, we could just move on and + * have the encrypted data passed through + * as-is to userspace. For release 0.1, we are + * going to default to -EIO. */ + rc = -EIO; + mutex_unlock(&crypt_stat->cs_mutex); + goto out_puts; + } + } + mutex_unlock(&crypt_stat->cs_mutex); + ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] " + "size: [0x%.16x]\n", inode, inode->i_ino, + i_size_read(inode)); + ecryptfs_set_file_lower(file, lower_file); + goto out; +out_puts: + mntput(lower_mnt); + dput(lower_dentry); + kmem_cache_free(ecryptfs_file_info_cache, + ecryptfs_file_to_private(file)); +out: + return rc; +} + +static int ecryptfs_flush(struct file *file, fl_owner_t td) +{ + int rc = 0; + struct file *lower_file = NULL; + + lower_file = ecryptfs_file_to_lower(file); + if (lower_file->f_op && lower_file->f_op->flush) + rc = lower_file->f_op->flush(lower_file, td); + return rc; +} + +static int ecryptfs_release(struct inode *inode, struct file *file) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file); + struct inode *lower_inode = ecryptfs_inode_to_lower(inode); + + fput(lower_file); + inode->i_blocks = lower_inode->i_blocks; + kmem_cache_free(ecryptfs_file_info_cache, file_info); + return 0; +} + +static int +ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct inode *lower_inode = lower_dentry->d_inode; + int rc = -EINVAL; + + if (lower_inode->i_fop->fsync) { + mutex_lock(&lower_inode->i_mutex); + rc = lower_inode->i_fop->fsync(lower_file, lower_dentry, + datasync); + mutex_unlock(&lower_inode->i_mutex); + } + return rc; +} + +static int ecryptfs_fasync(int fd, struct file *file, int flag) +{ + int rc = 0; + struct file *lower_file = NULL; + + lower_file = ecryptfs_file_to_lower(file); + if (lower_file->f_op && lower_file->f_op->fasync) + rc = lower_file->f_op->fasync(fd, lower_file, flag); + return rc; +} + +static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos, + size_t count, read_actor_t actor, void *target) +{ + struct file *lower_file = NULL; + int rc = -EINVAL; + + lower_file = ecryptfs_file_to_lower(file); + if (lower_file->f_op && lower_file->f_op->sendfile) + rc = lower_file->f_op->sendfile(lower_file, ppos, count, + actor, target); + + return rc; +} + +static int ecryptfs_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); + +const struct file_operations ecryptfs_dir_fops = { + .readdir = ecryptfs_readdir, + .ioctl = ecryptfs_ioctl, + .mmap = generic_file_mmap, + .open = ecryptfs_open, + .flush = ecryptfs_flush, + .release = ecryptfs_release, + .fsync = ecryptfs_fsync, + .fasync = ecryptfs_fasync, + .sendfile = ecryptfs_sendfile, +}; + +const struct file_operations ecryptfs_main_fops = { + .llseek = ecryptfs_llseek, + .read = do_sync_read, + .aio_read = ecryptfs_read_update_atime, + .write = do_sync_write, + .aio_write = generic_file_aio_write, + .readdir = ecryptfs_readdir, + .ioctl = ecryptfs_ioctl, + .mmap = generic_file_mmap, + .open = ecryptfs_open, + .flush = ecryptfs_flush, + .release = ecryptfs_release, + .fsync = ecryptfs_fsync, + .fasync = ecryptfs_fasync, + .sendfile = ecryptfs_sendfile, +}; + +static int +ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + int rc = 0; + struct file *lower_file = NULL; + + if (ecryptfs_file_to_private(file)) + lower_file = ecryptfs_file_to_lower(file); + if (lower_file && lower_file->f_op && lower_file->f_op->ioctl) + rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode), + lower_file, cmd, arg); + else + rc = -ENOTTY; + return rc; +} diff -urN oldtree/fs/ecryptfs/inode.c newtree/fs/ecryptfs/inode.c --- oldtree/fs/ecryptfs/inode.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/inode.c 2006-09-30 04:33:33.000000000 -0400 @@ -0,0 +1,1079 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2004 Erez Zadok + * Copyright (C) 2001-2004 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * Michael C. Thompsion + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ecryptfs_kernel.h" + +static struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir; + + dir = dget(dentry->d_parent); + mutex_lock(&(dir->d_inode->i_mutex)); + return dir; +} + +static void unlock_parent(struct dentry *dentry) +{ + mutex_unlock(&(dentry->d_parent->d_inode->i_mutex)); + dput(dentry->d_parent); +} + +static void unlock_dir(struct dentry *dir) +{ + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); +} + +void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src) +{ + i_size_write(dst, i_size_read((struct inode *)src)); + dst->i_blocks = src->i_blocks; +} + +void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src) +{ + dest->i_atime = src->i_atime; +} + +static void ecryptfs_copy_attr_times(struct inode *dest, + const struct inode *src) +{ + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; +} + +static void ecryptfs_copy_attr_timesizes(struct inode *dest, + const struct inode *src) +{ + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + ecryptfs_copy_inode_size(dest, src); +} + +void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src) +{ + dest->i_mode = src->i_mode; + dest->i_nlink = src->i_nlink; + dest->i_uid = src->i_uid; + dest->i_gid = src->i_gid; + dest->i_rdev = src->i_rdev; + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; +} + +/** + * ecryptfs_create_underlying_file + * @lower_dir_inode: inode of the parent in the lower fs of the new file + * @lower_dentry: New file's dentry in the lower fs + * @ecryptfs_dentry: New file's dentry in ecryptfs + * @mode: The mode of the new file + * @nd: nameidata of ecryptfs' parent's dentry & vfsmount + * + * Creates the file in the lower file system. + * + * Returns zero on success; non-zero on error condition + */ +static int +ecryptfs_create_underlying_file(struct inode *lower_dir_inode, + struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct dentry *dentry_save; + struct vfsmount *vfsmount_save; + int rc; + + dentry_save = nd->dentry; + vfsmount_save = nd->mnt; + nd->dentry = lower_dentry; + nd->mnt = lower_mnt; + rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); + nd->dentry = dentry_save; + nd->mnt = vfsmount_save; + return rc; +} + +/** + * ecryptfs_do_create + * @directory_inode: inode of the new file's dentry's parent in ecryptfs + * @ecryptfs_dentry: New file's dentry in ecryptfs + * @mode: The mode of the new file + * @nd: nameidata of ecryptfs' parent's dentry & vfsmount + * + * Creates the underlying file and the eCryptfs inode which will link to + * it. It will also update the eCryptfs directory inode to mimic the + * stat of the lower directory inode. + * + * Returns zero on success; non-zero on error condition + */ +static int +ecryptfs_do_create(struct inode *directory_inode, + struct dentry *ecryptfs_dentry, int mode, + struct nameidata *nd) +{ + int rc; + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + if (unlikely(IS_ERR(lower_dir_dentry))) { + ecryptfs_printk(KERN_ERR, "Error locking directory of " + "dentry\n"); + rc = PTR_ERR(lower_dir_dentry); + goto out; + } + rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, + ecryptfs_dentry, mode, nd); + if (unlikely(rc)) { + ecryptfs_printk(KERN_ERR, + "Failure to create underlying file\n"); + goto out_lock; + } + rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, + directory_inode->i_sb, 0); + if (rc) { + ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n"); + goto out_lock; + } + ecryptfs_copy_attr_timesizes(directory_inode, + lower_dir_dentry->d_inode); +out_lock: + unlock_dir(lower_dir_dentry); +out: + return rc; +} + +/** + * grow_file + * @ecryptfs_dentry: the ecryptfs dentry + * @lower_file: The lower file + * @inode: The ecryptfs inode + * @lower_inode: The lower inode + * + * This is the code which will grow the file to its correct size. + */ +static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file, + struct inode *inode, struct inode *lower_inode) +{ + int rc = 0; + struct file fake_file; + struct ecryptfs_file_info tmp_file_info; + + memset(&fake_file, 0, sizeof(fake_file)); + fake_file.f_dentry = ecryptfs_dentry; + memset(&tmp_file_info, 0, sizeof(tmp_file_info)); + ecryptfs_set_file_private(&fake_file, &tmp_file_info); + ecryptfs_set_file_lower(&fake_file, lower_file); + rc = ecryptfs_fill_zeros(&fake_file, 1); + if (rc) { + ECRYPTFS_SET_FLAG( + ecryptfs_inode_to_private(inode)->crypt_stat.flags, + ECRYPTFS_SECURITY_WARNING); + ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros " + "in file; rc = [%d]\n", rc); + goto out; + } + i_size_write(inode, 0); + ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode); + ECRYPTFS_SET_FLAG(ecryptfs_inode_to_private(inode)->crypt_stat.flags, + ECRYPTFS_NEW_FILE); +out: + return rc; +} + +/** + * ecryptfs_initialize_file + * + * Cause the file to be changed from a basic empty file to an ecryptfs + * file with a header and first data page. + * + * Returns zero on success + */ +static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) +{ + int rc = 0; + int lower_flags; + struct ecryptfs_crypt_stat *crypt_stat; + struct dentry *lower_dentry; + struct dentry *tlower_dentry = NULL; + struct file *lower_file; + struct inode *inode, *lower_inode; + struct vfsmount *lower_mnt; + + lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n", + lower_dentry->d_name.name); + inode = ecryptfs_dentry->d_inode; + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + tlower_dentry = dget(lower_dentry); + if (!tlower_dentry) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n"); + goto out; + } + lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR; +#if BITS_PER_LONG != 32 + lower_flags |= O_LARGEFILE; +#endif + lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); + mntget(lower_mnt); + /* Corresponding fput() at end of this function */ + lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags); + if (IS_ERR(lower_file)) { + rc = PTR_ERR(lower_file); + ecryptfs_printk(KERN_ERR, + "Error opening dentry; rc = [%i]\n", rc); + goto out; + } + /* fput(lower_file) should handle the puts if we do this */ + lower_file->f_dentry = tlower_dentry; + lower_file->f_vfsmnt = lower_mnt; + lower_inode = tlower_dentry->d_inode; + if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); + goto out_fput; + } + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE); + ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); + rc = ecryptfs_new_file_context(ecryptfs_dentry); + if (rc) { + ecryptfs_printk(KERN_DEBUG, "Error creating new file " + "context\n"); + goto out_fput; + } + rc = ecryptfs_write_headers(ecryptfs_dentry, lower_file); + if (rc) { + ecryptfs_printk(KERN_DEBUG, "Error writing headers\n"); + goto out_fput; + } + rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode); +out_fput: + fput(lower_file); +out: + return rc; +} + +/** + * ecryptfs_create + * @dir: The inode of the directory in which to create the file. + * @dentry: The eCryptfs dentry + * @mode: The mode of the new file. + * @nd: nameidata + * + * Creates a new file. + * + * Returns zero on success; non-zero on error condition + */ +static int +ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, + int mode, struct nameidata *nd) +{ + int rc; + + rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); + if (unlikely(rc)) { + ecryptfs_printk(KERN_WARNING, "Failed to create file in" + "lower filesystem\n"); + goto out; + } + /* At this point, a file exists on "disk"; we need to make sure + * that this on disk file is prepared to be an ecryptfs file */ + rc = ecryptfs_initialize_file(ecryptfs_dentry); +out: + return rc; +} + +/** + * ecryptfs_lookup + * @dir: inode + * @dentry: The dentry + * @nd: nameidata, may be NULL + * + * Find a file on disk. If the file does not exist, then we'll add it to the + * dentry cache and continue on to read it from the disk. + */ +static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + int rc = 0; + struct dentry *lower_dir_dentry; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct dentry *tlower_dentry = NULL; + char *encoded_name; + unsigned int encoded_namelen; + struct ecryptfs_crypt_stat *crypt_stat = NULL; + char *page_virt = NULL; + struct inode *lower_inode; + u64 file_size; + + lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); + dentry->d_op = &ecryptfs_dops; + if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, ".")) + || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, ".."))) + goto out_drop; + encoded_namelen = ecryptfs_encode_filename(crypt_stat, + dentry->d_name.name, + dentry->d_name.len, + &encoded_name); + if (encoded_namelen < 0) { + rc = encoded_namelen; + goto out_drop; + } + ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen " + "= [%d]\n", encoded_name, encoded_namelen); + lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry, + encoded_namelen - 1); + kfree(encoded_name); + lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); + if (IS_ERR(lower_dentry)) { + ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n"); + rc = PTR_ERR(lower_dentry); + goto out_drop; + } + ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->" + "d_name.name = [%s]\n", lower_dentry, + lower_dentry->d_name.name); + lower_inode = lower_dentry->d_inode; + ecryptfs_copy_attr_atime(dir, lower_dir_dentry->d_inode); + BUG_ON(!atomic_read(&lower_dentry->d_count)); + ecryptfs_set_dentry_private(dentry, + kmem_cache_alloc(ecryptfs_dentry_info_cache, + SLAB_KERNEL)); + if (!ecryptfs_dentry_to_private(dentry)) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting " + "to allocate ecryptfs_dentry_info struct\n"); + goto out_dput; + } + ecryptfs_set_dentry_lower(dentry, lower_dentry); + ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); + if (!lower_dentry->d_inode) { + /* We want to add because we couldn't find in lower */ + d_add(dentry, NULL); + goto out; + } + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error interposing\n"); + goto out_dput; + } + if (S_ISDIR(lower_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); + goto out; + } + if (S_ISLNK(lower_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n"); + goto out; + } + if (!nd) { + ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave" + "as we *think* we are about to unlink\n"); + goto out; + } + tlower_dentry = dget(lower_dentry); + if (!tlower_dentry || IS_ERR(tlower_dentry)) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n"); + goto out_dput; + } + /* Released in this function */ + page_virt = + (char *)kmem_cache_alloc(ecryptfs_header_cache_2, + SLAB_USER); + if (!page_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, + "Cannot ecryptfs_kmalloc a page\n"); + goto out_dput; + } + memset(page_virt, 0, PAGE_CACHE_SIZE); + rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt); + crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) + ecryptfs_set_default_sizes(crypt_stat); + if (rc) { + rc = 0; + ecryptfs_printk(KERN_WARNING, "Error reading header region;" + " assuming unencrypted\n"); + } else { + if (!contains_ecryptfs_marker(page_virt + + ECRYPTFS_FILE_SIZE_BYTES)) { + ecryptfs_printk(KERN_WARNING, "Underlying file " + "lacks recognizable eCryptfs marker\n"); + } + memcpy(&file_size, page_virt, sizeof(file_size)); + file_size = be64_to_cpu(file_size); + i_size_write(dentry->d_inode, (loff_t)file_size); + } + kmem_cache_free(ecryptfs_header_cache_2, page_virt); + goto out; + +out_dput: + dput(lower_dentry); + if (tlower_dentry) + dput(tlower_dentry); +out_drop: + d_drop(dentry); +out: + return ERR_PTR(rc); +} + +static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + struct dentry *lower_old_dentry; + struct dentry *lower_new_dentry; + struct dentry *lower_dir_dentry; + u64 file_size_save; + int rc; + + file_size_save = i_size_read(old_dentry->d_inode); + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); + lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); + dget(lower_old_dentry); + dget(lower_new_dentry); + lower_dir_dentry = lock_parent(lower_new_dentry); + rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, + lower_new_dentry); + if (rc || !lower_new_dentry->d_inode) + goto out_lock; + rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); + if (rc) + goto out_lock; + ecryptfs_copy_attr_timesizes(dir, lower_new_dentry->d_inode); + old_dentry->d_inode->i_nlink = + ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; + i_size_write(new_dentry->d_inode, file_size_save); +out_lock: + unlock_dir(lower_dir_dentry); + dput(lower_new_dentry); + dput(lower_old_dentry); + if (!new_dentry->d_inode) + d_drop(new_dentry); + return rc; +} + +static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int rc = 0; + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + + lock_parent(lower_dentry); + rc = vfs_unlink(lower_dir_inode, lower_dentry); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error in vfs_unlink\n"); + goto out_unlock; + } + ecryptfs_copy_attr_times(dir, lower_dir_inode); + dentry->d_inode->i_nlink = + ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink; + dentry->d_inode->i_ctime = dir->i_ctime; +out_unlock: + unlock_parent(lower_dentry); + return rc; +} + +static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int rc; + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + umode_t mode; + char *encoded_symname; + unsigned int encoded_symlen; + struct ecryptfs_crypt_stat *crypt_stat = NULL; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + mode = S_IALLUGO; + encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, + strlen(symname), + &encoded_symname); + if (encoded_symlen < 0) { + rc = encoded_symlen; + goto out_lock; + } + rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, + encoded_symname, mode); + kfree(encoded_symname); + if (rc || !lower_dentry->d_inode) + goto out_lock; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + if (rc) + goto out_lock; + ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); +out_lock: + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + if (!dentry->d_inode) + d_drop(dentry); + return rc; +} + +static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int rc; + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + lower_dir_dentry = lock_parent(lower_dentry); + rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode); + if (rc || !lower_dentry->d_inode) + goto out; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + if (rc) + goto out; + ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); + dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; +out: + unlock_dir(lower_dir_dentry); + if (!dentry->d_inode) + d_drop(dentry); + return rc; +} + +static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + int rc = 0; + struct dentry *tdentry = NULL; + struct dentry *lower_dentry; + struct dentry *tlower_dentry = NULL; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!(tdentry = dget(dentry))) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n", + dentry); + goto out; + } + lower_dir_dentry = lock_parent(lower_dentry); + if (!(tlower_dentry = dget(lower_dentry))) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry " + "[%p]\n", lower_dentry); + goto out; + } + rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); + if (!rc) { + d_delete(tlower_dentry); + tlower_dentry = NULL; + } + ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode); + dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; + unlock_dir(lower_dir_dentry); + if (!rc) + d_drop(dentry); +out: + if (tdentry) + dput(tdentry); + if (tlower_dentry) + dput(tlower_dentry); + return rc; +} + +static int +ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + int rc; + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + lower_dir_dentry = lock_parent(lower_dentry); + rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev); + if (rc || !lower_dentry->d_inode) + goto out; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + if (rc) + goto out; + ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); +out: + unlock_dir(lower_dir_dentry); + if (!dentry->d_inode) + d_drop(dentry); + return rc; +} + +static int +ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int rc; + struct dentry *lower_old_dentry; + struct dentry *lower_new_dentry; + struct dentry *lower_old_dir_dentry; + struct dentry *lower_new_dir_dentry; + + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); + lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); + dget(lower_old_dentry); + dget(lower_new_dentry); + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, + lower_new_dir_dentry->d_inode, lower_new_dentry); + if (rc) + goto out_lock; + ecryptfs_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); + if (new_dir != old_dir) + ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); +out_lock: + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dput(lower_new_dentry); + dput(lower_old_dentry); + return rc; +} + +static int +ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz) +{ + int rc; + struct dentry *lower_dentry; + char *decoded_name; + char *lower_buf; + mm_segment_t old_fs; + struct ecryptfs_crypt_stat *crypt_stat; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op || + !lower_dentry->d_inode->i_op->readlink) { + rc = -EINVAL; + goto out; + } + /* Released in this function */ + lower_buf = kmalloc(bufsiz, GFP_KERNEL); + if (lower_buf == NULL) { + ecryptfs_printk(KERN_ERR, "Out of memory\n"); + rc = -ENOMEM; + goto out; + } + old_fs = get_fs(); + set_fs(get_ds()); + ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " + "lower_dentry->d_name.name = [%s]\n", + lower_dentry->d_name.name); + rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, + (char __user *)lower_buf, + bufsiz); + set_fs(old_fs); + if (rc >= 0) { + crypt_stat = NULL; + rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc, + &decoded_name); + if (rc == -ENOMEM) + goto out_free_lower_buf; + if (rc > 0) { + ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes " + "to userspace: [%*s]\n", rc, + decoded_name); + if (copy_to_user(buf, decoded_name, rc)) + rc = -EFAULT; + } + kfree(decoded_name); + ecryptfs_copy_attr_atime(dentry->d_inode, + lower_dentry->d_inode); + } +out_free_lower_buf: + kfree(lower_buf); +out: + return rc; +} + +static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *buf; + int len = PAGE_SIZE, rc; + mm_segment_t old_fs; + + /* Released in ecryptfs_put_link(); only release here on error */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; + goto out; + } + old_fs = get_fs(); + set_fs(get_ds()); + ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " + "dentry->d_name.name = [%s]\n", dentry->d_name.name); + rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); + buf[rc] = '\0'; + set_fs(old_fs); + if (rc < 0) + goto out_free; + rc = 0; + nd_set_link(nd, buf); + goto out; +out_free: + kfree(buf); +out: + return ERR_PTR(rc); +} + +static void +ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) +{ + /* Free the char* */ + kfree(nd_get_link(nd)); +} + +/** + * upper_size_to_lower_size + * @crypt_stat: Crypt_stat associated with file + * @upper_size: Size of the upper file + * + * Calculate the requried size of the lower file based on the + * specified size of the upper file. This calculation is based on the + * number of headers in the underlying file and the extent size. + * + * Returns Calculated size of the lower file. + */ +static loff_t +upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, + loff_t upper_size) +{ + loff_t lower_size; + + lower_size = ( crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front ); + if (upper_size != 0) { + loff_t num_extents; + + num_extents = upper_size >> crypt_stat->extent_shift; + if (upper_size & ~crypt_stat->extent_mask) + num_extents++; + lower_size += (num_extents * crypt_stat->extent_size); + } + return lower_size; +} + +/** + * ecryptfs_truncate + * @dentry: The ecryptfs layer dentry + * @new_length: The length to expand the file to + * + * Function to handle truncations modifying the size of the file. Note + * that the file sizes are interpolated. When expanding, we are simply + * writing strings of 0's out. When truncating, we need to modify the + * underlying file size according to the page index interpolations. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) +{ + int rc = 0; + struct inode *inode = dentry->d_inode; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct file fake_ecryptfs_file, *lower_file = NULL; + struct ecryptfs_crypt_stat *crypt_stat; + loff_t i_size = i_size_read(inode); + loff_t lower_size_before_truncate; + loff_t lower_size_after_truncate; + + if (unlikely((new_length == i_size))) + goto out; + crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; + /* Set up a fake ecryptfs file, this is used to interface with + * the file in the underlying filesystem so that the + * truncation has an effect there as well. */ + memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file)); + fake_ecryptfs_file.f_dentry = dentry; + /* Released at out_free: label */ + ecryptfs_set_file_private(&fake_ecryptfs_file, + kmem_cache_alloc(ecryptfs_file_info_cache, + SLAB_KERNEL)); + if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) { + rc = -ENOMEM; + goto out; + } + lower_dentry = ecryptfs_dentry_to_lower(dentry); + /* This dget & mntget is released through fput at out_fput: */ + dget(lower_dentry); + lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + mntget(lower_mnt); + lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR); + if (unlikely(IS_ERR(lower_file))) { + rc = PTR_ERR(lower_file); + goto out_free; + } + ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file); + /* Switch on growing or shrinking file */ + if (new_length > i_size) { + rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length); + if (rc) { + ecryptfs_printk(KERN_ERR, + "Problem with fill_zeros\n"); + goto out_fput; + } + i_size_write(inode, new_length); + rc = ecryptfs_write_inode_size_to_header(lower_file, + lower_dentry->d_inode, + inode); + if (rc) { + ecryptfs_printk(KERN_ERR, + "Problem with ecryptfs_write" + "_inode_size\n"); + goto out_fput; + } + } else { /* new_length < i_size_read(inode) */ + vmtruncate(inode, new_length); + ecryptfs_write_inode_size_to_header(lower_file, + lower_dentry->d_inode, + inode); + /* We are reducing the size of the ecryptfs file, and need to + * know if we need to reduce the size of the lower file. */ + lower_size_before_truncate = + upper_size_to_lower_size(crypt_stat, i_size); + lower_size_after_truncate = + upper_size_to_lower_size(crypt_stat, new_length); + if (lower_size_after_truncate < lower_size_before_truncate) + vmtruncate(lower_dentry->d_inode, + lower_size_after_truncate); + } + /* Update the access times */ + lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime + = CURRENT_TIME; + mark_inode_dirty_sync(inode); +out_fput: + fput(lower_file); +out_free: + if (ecryptfs_file_to_private(&fake_ecryptfs_file)) + kmem_cache_free(ecryptfs_file_info_cache, + ecryptfs_file_to_private(&fake_ecryptfs_file)); +out: + return rc; +} + +static int +ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int rc; + + if (nd) { + struct vfsmount *vfsmnt_save = nd->mnt; + struct dentry *dentry_save = nd->dentry; + + nd->mnt = ecryptfs_dentry_to_lower_mnt(nd->dentry); + nd->dentry = ecryptfs_dentry_to_lower(nd->dentry); + rc = permission(ecryptfs_inode_to_lower(inode), mask, nd); + nd->mnt = vfsmnt_save; + nd->dentry = dentry_save; + } else + rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL); + return rc; +} + +/** + * ecryptfs_setattr + * @dentry: dentry handle to the inode to modify + * @ia: Structure with flags of what to change and values + * + * Updates the metadata of an inode. If the update is to the size + * i.e. truncation, then ecryptfs_truncate will handle the size modification + * of both the ecryptfs inode and the lower inode. + * + * All other metadata changes will be passed right to the lower filesystem, + * and we will just update our inode to look like the lower. + */ +static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) +{ + int rc = 0; + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + struct ecryptfs_crypt_stat *crypt_stat; + + crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; + lower_dentry = ecryptfs_dentry_to_lower(dentry); + inode = dentry->d_inode; + lower_inode = ecryptfs_inode_to_lower(inode); + if (ia->ia_valid & ATTR_SIZE) { + ecryptfs_printk(KERN_DEBUG, + "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n", + ia->ia_valid, ATTR_SIZE); + rc = ecryptfs_truncate(dentry, ia->ia_size); + /* ecryptfs_truncate handles resizing of the lower file */ + ia->ia_valid &= ~ATTR_SIZE; + ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n", + ia->ia_valid); + if (rc < 0) + goto out; + } + rc = notify_change(lower_dentry, ia); +out: + ecryptfs_copy_attr_all(inode, lower_inode); + return rc; +} + +static int +ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) +{ + int rc = 0; + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op->setxattr) { + rc = -ENOSYS; + goto out; + } + mutex_lock(&lower_dentry->d_inode->i_mutex); + rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value, + size, flags); + mutex_unlock(&lower_dentry->d_inode->i_mutex); +out: + return rc; +} + +static ssize_t +ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, + size_t size) +{ + int rc = 0; + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op->getxattr) { + rc = -ENOSYS; + goto out; + } + mutex_lock(&lower_dentry->d_inode->i_mutex); + rc = lower_dentry->d_inode->i_op->getxattr(lower_dentry, name, value, + size); + mutex_unlock(&lower_dentry->d_inode->i_mutex); +out: + return rc; +} + +static ssize_t +ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size) +{ + int rc = 0; + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op->listxattr) { + rc = -ENOSYS; + goto out; + } + mutex_lock(&lower_dentry->d_inode->i_mutex); + rc = lower_dentry->d_inode->i_op->listxattr(lower_dentry, list, size); + mutex_unlock(&lower_dentry->d_inode->i_mutex); +out: + return rc; +} + +static int ecryptfs_removexattr(struct dentry *dentry, const char *name) +{ + int rc = 0; + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op->removexattr) { + rc = -ENOSYS; + goto out; + } + mutex_lock(&lower_dentry->d_inode->i_mutex); + rc = lower_dentry->d_inode->i_op->removexattr(lower_dentry, name); + mutex_unlock(&lower_dentry->d_inode->i_mutex); +out: + return rc; +} + +int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode) +{ + if ((ecryptfs_inode_to_lower(inode) + == (struct inode *)candidate_lower_inode)) + return 1; + else + return 0; +} + +int ecryptfs_inode_set(struct inode *inode, void *lower_inode) +{ + ecryptfs_init_inode(inode, (struct inode *)lower_inode); + return 0; +} + +struct inode_operations ecryptfs_symlink_iops = { + .readlink = ecryptfs_readlink, + .follow_link = ecryptfs_follow_link, + .put_link = ecryptfs_put_link, + .permission = ecryptfs_permission, + .setattr = ecryptfs_setattr, + .setxattr = ecryptfs_setxattr, + .getxattr = ecryptfs_getxattr, + .listxattr = ecryptfs_listxattr, + .removexattr = ecryptfs_removexattr +}; + +struct inode_operations ecryptfs_dir_iops = { + .create = ecryptfs_create, + .lookup = ecryptfs_lookup, + .link = ecryptfs_link, + .unlink = ecryptfs_unlink, + .symlink = ecryptfs_symlink, + .mkdir = ecryptfs_mkdir, + .rmdir = ecryptfs_rmdir, + .mknod = ecryptfs_mknod, + .rename = ecryptfs_rename, + .permission = ecryptfs_permission, + .setattr = ecryptfs_setattr, + .setxattr = ecryptfs_setxattr, + .getxattr = ecryptfs_getxattr, + .listxattr = ecryptfs_listxattr, + .removexattr = ecryptfs_removexattr +}; + +struct inode_operations ecryptfs_main_iops = { + .permission = ecryptfs_permission, + .setattr = ecryptfs_setattr, + .setxattr = ecryptfs_setxattr, + .getxattr = ecryptfs_getxattr, + .listxattr = ecryptfs_listxattr, + .removexattr = ecryptfs_removexattr +}; diff -urN oldtree/fs/ecryptfs/keystore.c newtree/fs/ecryptfs/keystore.c --- oldtree/fs/ecryptfs/keystore.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/keystore.c 2006-09-30 04:33:46.000000000 -0400 @@ -0,0 +1,1061 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * In-kernel key management code. Includes functions to parse and + * write authentication token-related packets with the underlying + * file. + * + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * Michael C. Thompson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ecryptfs_kernel.h" + +/** + * request_key returned an error instead of a valid key address; + * determine the type of error, make appropriate log entries, and + * return an error code. + */ +int process_request_key_err(long err_code) +{ + int rc = 0; + + switch (err_code) { + case ENOKEY: + ecryptfs_printk(KERN_WARNING, "No key\n"); + rc = -ENOENT; + break; + case EKEYEXPIRED: + ecryptfs_printk(KERN_WARNING, "Key expired\n"); + rc = -ETIME; + break; + case EKEYREVOKED: + ecryptfs_printk(KERN_WARNING, "Key revoked\n"); + rc = -EINVAL; + break; + default: + ecryptfs_printk(KERN_WARNING, "Unknown error code: " + "[0x%.16x]\n", err_code); + rc = -EINVAL; + } + return rc; +} + +static void wipe_auth_tok_list(struct list_head *auth_tok_list_head) +{ + struct list_head *walker; + struct ecryptfs_auth_tok_list_item *auth_tok_list_item; + + walker = auth_tok_list_head->next; + while (walker != auth_tok_list_head) { + auth_tok_list_item = + list_entry(walker, struct ecryptfs_auth_tok_list_item, + list); + walker = auth_tok_list_item->list.next; + memset(auth_tok_list_item, 0, + sizeof(struct ecryptfs_auth_tok_list_item)); + kmem_cache_free(ecryptfs_auth_tok_list_item_cache, + auth_tok_list_item); + } +} + +struct kmem_cache *ecryptfs_auth_tok_list_item_cache; + +/** + * parse_packet_length + * @data: Pointer to memory containing length at offset + * @size: This function writes the decoded size to this memory + * address; zero on error + * @length_size: The number of bytes occupied by the encoded length + * + * Returns Zero on success + */ +static int parse_packet_length(unsigned char *data, size_t *size, + size_t *length_size) +{ + int rc = 0; + + (*length_size) = 0; + (*size) = 0; + if (data[0] < 192) { + /* One-byte length */ + (*size) = data[0]; + (*length_size) = 1; + } else if (data[0] < 224) { + /* Two-byte length */ + (*size) = ((data[0] - 192) * 256); + (*size) += (data[1] + 192); + (*length_size) = 2; + } else if (data[0] == 255) { + /* Five-byte length; we're not supposed to see this */ + ecryptfs_printk(KERN_ERR, "Five-byte packet length not " + "supported\n"); + rc = -EINVAL; + goto out; + } else { + ecryptfs_printk(KERN_ERR, "Error parsing packet length\n"); + rc = -EINVAL; + goto out; + } +out: + return rc; +} + +/** + * write_packet_length + * @dest: The byte array target into which to write the + * length. Must have at least 5 bytes allocated. + * @size: The length to write. + * @packet_size_length: The number of bytes used to encode the + * packet length is written to this address. + * + * Returns zero on success; non-zero on error. + */ +static int write_packet_length(char *dest, size_t size, + size_t *packet_size_length) +{ + int rc = 0; + + if (size < 192) { + dest[0] = size; + (*packet_size_length) = 1; + } else if (size < 65536) { + dest[0] = (((size - 192) / 256) + 192); + dest[1] = ((size - 192) % 256); + (*packet_size_length) = 2; + } else { + rc = -EINVAL; + ecryptfs_printk(KERN_WARNING, + "Unsupported packet size: [%d]\n", size); + } + return rc; +} + +/** + * parse_tag_3_packet + * @crypt_stat: The cryptographic context to modify based on packet + * contents. + * @data: The raw bytes of the packet. + * @auth_tok_list: eCryptfs parses packets into authentication tokens; + * a new authentication token will be placed at the end + * of this list for this packet. + * @new_auth_tok: Pointer to a pointer to memory that this function + * allocates; sets the memory address of the pointer to + * NULL on error. This object is added to the + * auth_tok_list. + * @packet_size: This function writes the size of the parsed packet + * into this memory location; zero on error. + * @max_packet_size: maximum number of bytes to parse + * + * Returns zero on success; non-zero on error. + */ +static int +parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, + unsigned char *data, struct list_head *auth_tok_list, + struct ecryptfs_auth_tok **new_auth_tok, + size_t *packet_size, size_t max_packet_size) +{ + int rc = 0; + size_t body_size; + struct ecryptfs_auth_tok_list_item *auth_tok_list_item; + size_t length_size; + + (*packet_size) = 0; + (*new_auth_tok) = NULL; + + /* we check that: + * one byte for the Tag 3 ID flag + * two bytes for the body size + * do not exceed the maximum_packet_size + */ + if (unlikely((*packet_size) + 3 > max_packet_size)) { + ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + rc = -EINVAL; + goto out; + } + + /* check for Tag 3 identifyer - one byte */ + if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) { + ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n", + ECRYPTFS_TAG_3_PACKET_TYPE); + rc = -EINVAL; + goto out; + } + /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or + * at end of function upon failure */ + auth_tok_list_item = + kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL); + if (!auth_tok_list_item) { + ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); + rc = -ENOMEM; + goto out; + } + memset(auth_tok_list_item, 0, + sizeof(struct ecryptfs_auth_tok_list_item)); + (*new_auth_tok) = &auth_tok_list_item->auth_tok; + + /* check for body size - one to two bytes */ + rc = parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " + "rc = [%d]\n", rc); + goto out_free; + } + if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) { + ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", + body_size); + rc = -EINVAL; + goto out_free; + } + (*packet_size) += length_size; + + /* now we know the length of the remainting Tag 3 packet size: + * 5 fix bytes for: version string, cipher, S2K ID, hash algo, + * number of hash iterations + * ECRYPTFS_SALT_SIZE bytes for salt + * body_size bytes minus the stuff above is the encrypted key size + */ + if (unlikely((*packet_size) + body_size > max_packet_size)) { + ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + rc = -EINVAL; + goto out_free; + } + + /* There are 5 characters of additional information in the + * packet */ + (*new_auth_tok)->session_key.encrypted_key_size = + body_size - (0x05 + ECRYPTFS_SALT_SIZE); + ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n", + (*new_auth_tok)->session_key.encrypted_key_size); + + /* Version 4 (from RFC2440) - one byte */ + if (unlikely(data[(*packet_size)++] != 0x04)) { + ecryptfs_printk(KERN_DEBUG, "Unknown version number " + "[%d]\n", data[(*packet_size) - 1]); + rc = -EINVAL; + goto out_free; + } + + /* cipher - one byte */ + ecryptfs_cipher_code_to_string(crypt_stat->cipher, + (u16)data[(*packet_size)]); + /* A little extra work to differentiate among the AES key + * sizes; see RFC2440 */ + switch(data[(*packet_size)++]) { + case RFC2440_CIPHER_AES_192: + crypt_stat->key_size = 24; + break; + default: + crypt_stat->key_size = + (*new_auth_tok)->session_key.encrypted_key_size; + } + ecryptfs_init_crypt_ctx(crypt_stat); + /* S2K identifier 3 (from RFC2440) */ + if (unlikely(data[(*packet_size)++] != 0x03)) { + ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently " + "supported\n"); + rc = -ENOSYS; + goto out_free; + } + + /* TODO: finish the hash mapping */ + /* hash algorithm - one byte */ + switch (data[(*packet_size)++]) { + case 0x01: /* See RFC2440 for these numbers and their mappings */ + /* Choose MD5 */ + /* salt - ECRYPTFS_SALT_SIZE bytes */ + memcpy((*new_auth_tok)->token.password.salt, + &data[(*packet_size)], ECRYPTFS_SALT_SIZE); + (*packet_size) += ECRYPTFS_SALT_SIZE; + + /* This conversion was taken straight from RFC2440 */ + /* number of hash iterations - one byte */ + (*new_auth_tok)->token.password.hash_iterations = + ((u32) 16 + (data[(*packet_size)] & 15)) + << ((data[(*packet_size)] >> 4) + 6); + (*packet_size)++; + + /* encrypted session key - + * (body_size-5-ECRYPTFS_SALT_SIZE) bytes */ + memcpy((*new_auth_tok)->session_key.encrypted_key, + &data[(*packet_size)], + (*new_auth_tok)->session_key.encrypted_key_size); + (*packet_size) += + (*new_auth_tok)->session_key.encrypted_key_size; + (*new_auth_tok)->session_key.flags &= + ~ECRYPTFS_CONTAINS_DECRYPTED_KEY; + (*new_auth_tok)->session_key.flags |= + ECRYPTFS_CONTAINS_ENCRYPTED_KEY; + (*new_auth_tok)->token.password.hash_algo = 0x01; + break; + default: + ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: " + "[%d]\n", data[(*packet_size) - 1]); + rc = -ENOSYS; + goto out_free; + } + (*new_auth_tok)->token_type = ECRYPTFS_PASSWORD; + /* TODO: Parametarize; we might actually want userspace to + * decrypt the session key. */ + ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags, + ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT); + ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags, + ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT); + list_add(&auth_tok_list_item->list, auth_tok_list); + goto out; +out_free: + (*new_auth_tok) = NULL; + memset(auth_tok_list_item, 0, + sizeof(struct ecryptfs_auth_tok_list_item)); + kmem_cache_free(ecryptfs_auth_tok_list_item_cache, + auth_tok_list_item); +out: + if (rc) + (*packet_size) = 0; + return rc; +} + +/** + * parse_tag_11_packet + * @data: The raw bytes of the packet + * @contents: This function writes the data contents of the literal + * packet into this memory location + * @max_contents_bytes: The maximum number of bytes that this function + * is allowed to write into contents + * @tag_11_contents_size: This function writes the size of the parsed + * contents into this memory location; zero on + * error + * @packet_size: This function writes the size of the parsed packet + * into this memory location; zero on error + * @max_packet_size: maximum number of bytes to parse + * + * Returns zero on success; non-zero on error. + */ +static int +parse_tag_11_packet(unsigned char *data, unsigned char *contents, + size_t max_contents_bytes, size_t *tag_11_contents_size, + size_t *packet_size, size_t max_packet_size) +{ + int rc = 0; + size_t body_size; + size_t length_size; + + (*packet_size) = 0; + (*tag_11_contents_size) = 0; + + /* check that: + * one byte for the Tag 11 ID flag + * two bytes for the Tag 11 length + * do not exceed the maximum_packet_size + */ + if (unlikely((*packet_size) + 3 > max_packet_size)) { + ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + rc = -EINVAL; + goto out; + } + + /* check for Tag 11 identifyer - one byte */ + if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) { + ecryptfs_printk(KERN_WARNING, + "Invalid tag 11 packet format\n"); + rc = -EINVAL; + goto out; + } + + /* get Tag 11 content length - one or two bytes */ + rc = parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); + if (rc) { + ecryptfs_printk(KERN_WARNING, + "Invalid tag 11 packet format\n"); + goto out; + } + (*packet_size) += length_size; + + if (body_size < 13) { + ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", + body_size); + rc = -EINVAL; + goto out; + } + /* We have 13 bytes of surrounding packet values */ + (*tag_11_contents_size) = (body_size - 13); + + /* now we know the length of the remainting Tag 11 packet size: + * 14 fix bytes for: special flag one, special flag two, + * 12 skipped bytes + * body_size bytes minus the stuff above is the Tag 11 content + */ + /* FIXME why is the body size one byte smaller than the actual + * size of the body? + * this seems to be an error here as well as in + * write_tag_11_packet() */ + if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) { + ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + rc = -EINVAL; + goto out; + } + + /* special flag one - one byte */ + if (data[(*packet_size)++] != 0x62) { + ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); + rc = -EINVAL; + goto out; + } + + /* special flag two - one byte */ + if (data[(*packet_size)++] != 0x08) { + ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); + rc = -EINVAL; + goto out; + } + + /* skip the next 12 bytes */ + (*packet_size) += 12; /* We don't care about the filename or + * the timestamp */ + + /* get the Tag 11 contents - tag_11_contents_size bytes */ + memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size)); + (*packet_size) += (*tag_11_contents_size); + +out: + if (rc) { + (*packet_size) = 0; + (*tag_11_contents_size) = 0; + } + return rc; +} + +/** + * decrypt_session_key - Decrypt the session key with the given auth_tok. + * + * Returns Zero on success; non-zero error otherwise. + */ +static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, + struct ecryptfs_crypt_stat *crypt_stat) +{ + int rc = 0; + struct ecryptfs_password *password_s_ptr; + struct crypto_tfm *tfm = NULL; + struct scatterlist src_sg[2], dst_sg[2]; + struct mutex *tfm_mutex = NULL; + /* TODO: Use virt_to_scatterlist for these */ + char *encrypted_session_key; + char *session_key; + + password_s_ptr = &auth_tok->token.password; + if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags, + ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) + ecryptfs_printk(KERN_DEBUG, "Session key encryption key " + "set; skipping key generation\n"); + ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])" + ":\n", + password_s_ptr->session_key_encryption_key_bytes); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key, + password_s_ptr-> + session_key_encryption_key_bytes); + if (!strcmp(crypt_stat->cipher, + crypt_stat->mount_crypt_stat->global_default_cipher_name) + && crypt_stat->mount_crypt_stat->global_key_tfm) { + tfm = crypt_stat->mount_crypt_stat->global_key_tfm; + tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; + } else { + tfm = crypto_alloc_tfm(crypt_stat->cipher, + CRYPTO_TFM_REQ_WEAK_KEY); + if (!tfm) { + printk(KERN_ERR "Error allocating crypto context\n"); + rc = -ENOMEM; + goto out; + } + } + if (password_s_ptr->session_key_encryption_key_bytes + < crypto_tfm_alg_min_keysize(tfm)) { + printk(KERN_WARNING "Session key encryption key is [%d] bytes; " + "minimum keysize for selected cipher is [%d] bytes.\n", + password_s_ptr->session_key_encryption_key_bytes, + crypto_tfm_alg_min_keysize(tfm)); + rc = -EINVAL; + goto out; + } + if (tfm_mutex) + mutex_lock(tfm_mutex); + crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key, + crypt_stat->key_size); + /* TODO: virt_to_scatterlist */ + encrypted_session_key = (char *)__get_free_page(GFP_KERNEL); + if (!encrypted_session_key) { + ecryptfs_printk(KERN_ERR, "Out of memory\n"); + rc = -ENOMEM; + goto out_free_tfm; + } + session_key = (char *)__get_free_page(GFP_KERNEL); + if (!session_key) { + kfree(encrypted_session_key); + ecryptfs_printk(KERN_ERR, "Out of memory\n"); + rc = -ENOMEM; + goto out_free_tfm; + } + memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key, + auth_tok->session_key.encrypted_key_size); + src_sg[0].page = virt_to_page(encrypted_session_key); + src_sg[0].offset = 0; + BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE); + src_sg[0].length = auth_tok->session_key.encrypted_key_size; + dst_sg[0].page = virt_to_page(session_key); + dst_sg[0].offset = 0; + auth_tok->session_key.decrypted_key_size = + auth_tok->session_key.encrypted_key_size; + dst_sg[0].length = auth_tok->session_key.encrypted_key_size; + /* TODO: Handle error condition */ + crypto_cipher_decrypt(tfm, dst_sg, src_sg, + auth_tok->session_key.encrypted_key_size); + auth_tok->session_key.decrypted_key_size = + auth_tok->session_key.encrypted_key_size; + memcpy(auth_tok->session_key.decrypted_key, session_key, + auth_tok->session_key.decrypted_key_size); + auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; + memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, + auth_tok->session_key.decrypted_key_size); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); + ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(crypt_stat->key, + crypt_stat->key_size); + memset(encrypted_session_key, 0, PAGE_CACHE_SIZE); + free_page((unsigned long)encrypted_session_key); + memset(session_key, 0, PAGE_CACHE_SIZE); + free_page((unsigned long)session_key); +out_free_tfm: + if (tfm_mutex) + mutex_unlock(tfm_mutex); + else + crypto_free_tfm(tfm); +out: + return rc; +} + +/** + * ecryptfs_parse_packet_set + * @dest: The header page in memory + * @version: Version of file format, to guide parsing behavior + * + * Get crypt_stat to have the file's session key if the requisite key + * is available to decrypt the session key. + * + * Returns Zero if a valid authentication token was retrieved and + * processed; negative value for file not encrypted or for error + * conditions. + */ +int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, + unsigned char *src, + struct dentry *ecryptfs_dentry) +{ + size_t i = 0; + int rc = 0; + size_t found_auth_tok = 0; + size_t next_packet_is_auth_tok_packet; + char sig[ECRYPTFS_SIG_SIZE_HEX]; + struct list_head auth_tok_list; + struct list_head *walker; + struct ecryptfs_auth_tok *chosen_auth_tok = NULL; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + struct ecryptfs_auth_tok *candidate_auth_tok = NULL; + size_t packet_size; + struct ecryptfs_auth_tok *new_auth_tok; + unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE]; + size_t tag_11_contents_size; + size_t tag_11_packet_size; + + INIT_LIST_HEAD(&auth_tok_list); + /* Parse the header to find as many packets as we can, these will be + * added the our &auth_tok_list */ + next_packet_is_auth_tok_packet = 1; + while (next_packet_is_auth_tok_packet) { + size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i); + + switch (src[i]) { + case ECRYPTFS_TAG_3_PACKET_TYPE: + rc = parse_tag_3_packet(crypt_stat, + (unsigned char *)&src[i], + &auth_tok_list, &new_auth_tok, + &packet_size, max_packet_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error parsing " + "tag 3 packet\n"); + rc = -EIO; + goto out_wipe_list; + } + i += packet_size; + rc = parse_tag_11_packet((unsigned char *)&src[i], + sig_tmp_space, + ECRYPTFS_SIG_SIZE, + &tag_11_contents_size, + &tag_11_packet_size, + max_packet_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "No valid " + "(ecryptfs-specific) literal " + "packet containing " + "authentication token " + "signature found after " + "tag 3 packet\n"); + rc = -EIO; + goto out_wipe_list; + } + i += tag_11_packet_size; + if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) { + ecryptfs_printk(KERN_ERR, "Expected " + "signature of size [%d]; " + "read size [%d]\n", + ECRYPTFS_SIG_SIZE, + tag_11_contents_size); + rc = -EIO; + goto out_wipe_list; + } + ecryptfs_to_hex(new_auth_tok->token.password.signature, + sig_tmp_space, tag_11_contents_size); + new_auth_tok->token.password.signature[ + ECRYPTFS_PASSWORD_SIG_SIZE] = '\0'; + ECRYPTFS_SET_FLAG(crypt_stat->flags, + ECRYPTFS_ENCRYPTED); + break; + case ECRYPTFS_TAG_11_PACKET_TYPE: + ecryptfs_printk(KERN_WARNING, "Invalid packet set " + "(Tag 11 not allowed by itself)\n"); + rc = -EIO; + goto out_wipe_list; + break; + default: + ecryptfs_printk(KERN_DEBUG, "No packet at offset " + "[%d] of the file header; hex value of " + "character is [0x%.2x]\n", i, src[i]); + next_packet_is_auth_tok_packet = 0; + } + } + if (list_empty(&auth_tok_list)) { + rc = -EINVAL; /* Do not support non-encrypted files in + * the 0.1 release */ + goto out; + } + /* If we have a global auth tok, then we should try to use + * it */ + if (mount_crypt_stat->global_auth_tok) { + memcpy(sig, mount_crypt_stat->global_auth_tok_sig, + ECRYPTFS_SIG_SIZE_HEX); + chosen_auth_tok = mount_crypt_stat->global_auth_tok; + } else + BUG(); /* We should always have a global auth tok in + * the 0.1 release */ + /* Scan list to see if our chosen_auth_tok works */ + list_for_each(walker, &auth_tok_list) { + struct ecryptfs_auth_tok_list_item *auth_tok_list_item; + auth_tok_list_item = + list_entry(walker, struct ecryptfs_auth_tok_list_item, + list); + candidate_auth_tok = &auth_tok_list_item->auth_tok; + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, + "Considering cadidate auth tok:\n"); + ecryptfs_dump_auth_tok(candidate_auth_tok); + } + /* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */ + if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD + && !strncmp(candidate_auth_tok->token.password.signature, + sig, ECRYPTFS_SIG_SIZE_HEX)) { + found_auth_tok = 1; + goto leave_list; + /* TODO: Transfer the common salt into the + * crypt_stat salt */ + } + } +leave_list: + if (!found_auth_tok) { + ecryptfs_printk(KERN_ERR, "Could not find authentication " + "token on temporary list for sig [%.*s]\n", + ECRYPTFS_SIG_SIZE_HEX, sig); + rc = -EIO; + goto out_wipe_list; + } else { + memcpy(&(candidate_auth_tok->token.password), + &(chosen_auth_tok->token.password), + sizeof(struct ecryptfs_password)); + rc = decrypt_session_key(candidate_auth_tok, crypt_stat); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error decrypting the " + "session key\n"); + goto out_wipe_list; + } + rc = ecryptfs_compute_root_iv(crypt_stat); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error computing " + "the root IV\n"); + goto out_wipe_list; + } + } + rc = ecryptfs_init_crypt_ctx(crypt_stat); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error initializing crypto " + "context for cipher [%s]; rc = [%d]\n", + crypt_stat->cipher, rc); + } +out_wipe_list: + wipe_auth_tok_list(&auth_tok_list); +out: + return rc; +} + +/** + * write_tag_11_packet + * @dest: Target into which Tag 11 packet is to be written + * @max: Maximum packet length + * @contents: Byte array of contents to copy in + * @contents_length: Number of bytes in contents + * @packet_length: Length of the Tag 11 packet written; zero on error + * + * Returns zero on success; non-zero on error. + */ +static int +write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length, + size_t *packet_length) +{ + int rc = 0; + size_t packet_size_length; + + (*packet_length) = 0; + if ((13 + contents_length) > max) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "Packet length larger than " + "maximum allowable\n"); + goto out; + } + /* General packet header */ + /* Packet tag */ + dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; + /* Packet length */ + rc = write_packet_length(&dest[(*packet_length)], + (13 + contents_length), &packet_size_length); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet " + "header; cannot generate packet length\n"); + goto out; + } + (*packet_length) += packet_size_length; + /* Tag 11 specific */ + /* One-octet field that describes how the data is formatted */ + dest[(*packet_length)++] = 0x62; /* binary data */ + /* One-octet filename length followed by filename */ + dest[(*packet_length)++] = 8; + memcpy(&dest[(*packet_length)], "_CONSOLE", 8); + (*packet_length) += 8; + /* Four-octet number indicating modification date */ + memset(&dest[(*packet_length)], 0x00, 4); + (*packet_length) += 4; + /* Remainder is literal data */ + memcpy(&dest[(*packet_length)], contents, contents_length); + (*packet_length) += contents_length; + out: + if (rc) + (*packet_length) = 0; + return rc; +} + +/** + * write_tag_3_packet + * @dest: Buffer into which to write the packet + * @max: Maximum number of bytes that can be written + * @auth_tok: Authentication token + * @crypt_stat: The cryptographic context + * @key_rec: encrypted key + * @packet_size: This function will write the number of bytes that end + * up constituting the packet; set to zero on error + * + * Returns zero on success; non-zero on error. + */ +static int +write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, + struct ecryptfs_crypt_stat *crypt_stat, + struct ecryptfs_key_record *key_rec, size_t *packet_size) +{ + int rc = 0; + + size_t i; + size_t signature_is_valid = 0; + size_t encrypted_session_key_valid = 0; + char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; + struct scatterlist dest_sg[2]; + struct scatterlist src_sg[2]; + struct crypto_tfm *tfm = NULL; + struct mutex *tfm_mutex = NULL; + size_t key_rec_size; + size_t packet_size_length; + size_t cipher_code; + + (*packet_size) = 0; + /* Check for a valid signature on the auth_tok */ + for (i = 0; i < ECRYPTFS_SIG_SIZE_HEX; i++) + signature_is_valid |= auth_tok->token.password.signature[i]; + if (!signature_is_valid) + BUG(); + ecryptfs_from_hex((*key_rec).sig, auth_tok->token.password.signature, + ECRYPTFS_SIG_SIZE); + encrypted_session_key_valid = 0; + for (i = 0; i < crypt_stat->key_size; i++) + encrypted_session_key_valid |= + auth_tok->session_key.encrypted_key[i]; + if (encrypted_session_key_valid) { + memcpy((*key_rec).enc_key, + auth_tok->session_key.encrypted_key, + auth_tok->session_key.encrypted_key_size); + goto encrypted_session_key_set; + } + if (auth_tok->session_key.encrypted_key_size == 0) + auth_tok->session_key.encrypted_key_size = + crypt_stat->key_size; + if (crypt_stat->key_size == 24 + && strcmp("aes", crypt_stat->cipher) == 0) { + memset((crypt_stat->key + 24), 0, 8); + auth_tok->session_key.encrypted_key_size = 32; + } + (*key_rec).enc_key_size = + auth_tok->session_key.encrypted_key_size; + if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags, + ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) { + ecryptfs_printk(KERN_DEBUG, "Using previously generated " + "session key encryption key of size [%d]\n", + auth_tok->token.password. + session_key_encryption_key_bytes); + memcpy(session_key_encryption_key, + auth_tok->token.password.session_key_encryption_key, + crypt_stat->key_size); + ecryptfs_printk(KERN_DEBUG, + "Cached session key " "encryption key: \n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(session_key_encryption_key, 16); + } + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n"); + ecryptfs_dump_hex(session_key_encryption_key, 16); + } + rc = virt_to_scatterlist(crypt_stat->key, + (*key_rec).enc_key_size, src_sg, 2); + if (!rc) { + ecryptfs_printk(KERN_ERR, "Error generating scatterlist " + "for crypt_stat session key\n"); + rc = -ENOMEM; + goto out; + } + rc = virt_to_scatterlist((*key_rec).enc_key, + (*key_rec).enc_key_size, dest_sg, 2); + if (!rc) { + ecryptfs_printk(KERN_ERR, "Error generating scatterlist " + "for crypt_stat encrypted session key\n"); + rc = -ENOMEM; + goto out; + } + if (!strcmp(crypt_stat->cipher, + crypt_stat->mount_crypt_stat->global_default_cipher_name) + && crypt_stat->mount_crypt_stat->global_key_tfm) { + tfm = crypt_stat->mount_crypt_stat->global_key_tfm; + tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; + } else + tfm = crypto_alloc_tfm(crypt_stat->cipher, 0); + if (!tfm) { + ecryptfs_printk(KERN_ERR, "Could not initialize crypto " + "context for cipher [%s]\n", + crypt_stat->cipher); + rc = -EINVAL; + goto out; + } + if (tfm_mutex) + mutex_lock(tfm_mutex); + rc = crypto_cipher_setkey(tfm, session_key_encryption_key, + crypt_stat->key_size); + if (rc < 0) { + if (tfm_mutex) + mutex_unlock(tfm_mutex); + ecryptfs_printk(KERN_ERR, "Error setting key for crypto " + "context\n"); + goto out; + } + rc = 0; + ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", + crypt_stat->key_size); + crypto_cipher_encrypt(tfm, dest_sg, src_sg, + (*key_rec).enc_key_size); + if (tfm_mutex) + mutex_unlock(tfm_mutex); + ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex((*key_rec).enc_key, + (*key_rec).enc_key_size); +encrypted_session_key_set: + /* Now we have a valid key_rec. Append it to the + * key_rec set. */ + key_rec_size = (sizeof(struct ecryptfs_key_record) + - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES + + ((*key_rec).enc_key_size)); + /* TODO: Include a packet size limit as a parameter to this + * function once we have multi-packet headers (for versions + * later than 0.1 */ + if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) { + ecryptfs_printk(KERN_ERR, "Keyset too large\n"); + rc = -EINVAL; + goto out; + } + /* TODO: Packet size limit */ + /* We have 5 bytes of surrounding packet data */ + if ((0x05 + ECRYPTFS_SALT_SIZE + + (*key_rec).enc_key_size) >= max) { + ecryptfs_printk(KERN_ERR, "Authentication token is too " + "large\n"); + rc = -EINVAL; + goto out; + } + /* This format is inspired by OpenPGP; see RFC 2440 + * packet tag 3 */ + dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; + /* ver+cipher+s2k+hash+salt+iter+enc_key */ + rc = write_packet_length(&dest[(*packet_size)], + (0x05 + ECRYPTFS_SALT_SIZE + + (*key_rec).enc_key_size), + &packet_size_length); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet " + "header; cannot generate packet length\n"); + goto out; + } + (*packet_size) += packet_size_length; + dest[(*packet_size)++] = 0x04; /* version 4 */ + cipher_code = ecryptfs_code_for_cipher_string(crypt_stat); + if (cipher_code == 0) { + ecryptfs_printk(KERN_WARNING, "Unable to generate code for " + "cipher [%s]\n", crypt_stat->cipher); + rc = -EINVAL; + goto out; + } + dest[(*packet_size)++] = cipher_code; + dest[(*packet_size)++] = 0x03; /* S2K */ + dest[(*packet_size)++] = 0x01; /* MD5 (TODO: parameterize) */ + memcpy(&dest[(*packet_size)], auth_tok->token.password.salt, + ECRYPTFS_SALT_SIZE); + (*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */ + dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */ + memcpy(&dest[(*packet_size)], (*key_rec).enc_key, + (*key_rec).enc_key_size); + (*packet_size) += (*key_rec).enc_key_size; +out: + if (tfm && !tfm_mutex) + crypto_free_tfm(tfm); + if (rc) + (*packet_size) = 0; + return rc; +} + +/** + * ecryptfs_generate_key_packet_set + * @dest: Virtual address from which to write the key record set + * @crypt_stat: The cryptographic context from which the + * authentication tokens will be retrieved + * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat + * for the global parameters + * @len: The amount written + * @max: The maximum amount of data allowed to be written + * + * Generates a key packet set and writes it to the virtual address + * passed in. + * + * Returns zero on success; non-zero on error. + */ +int +ecryptfs_generate_key_packet_set(char *dest_base, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry, size_t *len, + size_t max) +{ + int rc = 0; + struct ecryptfs_auth_tok *auth_tok; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + size_t written; + struct ecryptfs_key_record key_rec; + + (*len) = 0; + if (mount_crypt_stat->global_auth_tok) { + auth_tok = mount_crypt_stat->global_auth_tok; + if (auth_tok->token_type == ECRYPTFS_PASSWORD) { + rc = write_tag_3_packet((dest_base + (*len)), + max, auth_tok, + crypt_stat, &key_rec, + &written); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error " + "writing tag 3 packet\n"); + goto out; + } + (*len) += written; + /* Write auth tok signature packet */ + rc = write_tag_11_packet( + (dest_base + (*len)), + (max - (*len)), + key_rec.sig, ECRYPTFS_SIG_SIZE, &written); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error writing " + "auth tok signature packet\n"); + goto out; + } + (*len) += written; + } else { + ecryptfs_printk(KERN_WARNING, "Unsupported " + "authentication token type\n"); + rc = -EINVAL; + goto out; + } + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error writing " + "authentication token packet with sig " + "= [%s]\n", + mount_crypt_stat->global_auth_tok_sig); + rc = -EIO; + goto out; + } + } else + BUG(); + if (likely((max - (*len)) > 0)) { + dest_base[(*len)] = 0x00; + } else { + ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n"); + rc = -EIO; + } +out: + if (rc) + (*len) = 0; + return rc; +} diff -urN oldtree/fs/ecryptfs/main.c newtree/fs/ecryptfs/main.c --- oldtree/fs/ecryptfs/main.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/main.c 2006-09-30 04:33:48.000000000 -0400 @@ -0,0 +1,826 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * Michael C. Thompson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ecryptfs_kernel.h" + +/** + * Module parameter that defines the ecryptfs_verbosity level. + */ +int ecryptfs_verbosity = 0; + +module_param(ecryptfs_verbosity, int, 0); +MODULE_PARM_DESC(ecryptfs_verbosity, + "Initial verbosity level (0 or 1; defaults to " + "0, which is Quiet)"); + +void __ecryptfs_printk(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + if (fmt[1] == '7') { /* KERN_DEBUG */ + if (ecryptfs_verbosity >= 1) + vprintk(fmt, args); + } else + vprintk(fmt, args); + va_end(args); +} + +/** + * ecryptfs_interpose + * @lower_dentry: Existing dentry in the lower filesystem + * @dentry: ecryptfs' dentry + * @sb: ecryptfs's super_block + * @flag: If set to true, then d_add is called, else d_instantiate is called + * + * Interposes upper and lower dentries. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, + struct super_block *sb, int flag) +{ + struct inode *lower_inode; + struct inode *inode; + int rc = 0; + + lower_inode = lower_dentry->d_inode; + if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { + rc = -EXDEV; + goto out; + } + if (!igrab(lower_inode)) { + rc = -ESTALE; + goto out; + } + inode = iget5_locked(sb, (unsigned long)lower_inode, + ecryptfs_inode_test, ecryptfs_inode_set, + lower_inode); + if (!inode) { + rc = -EACCES; + iput(lower_inode); + goto out; + } + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + else + iput(lower_inode); + if (S_ISLNK(lower_inode->i_mode)) + inode->i_op = &ecryptfs_symlink_iops; + else if (S_ISDIR(lower_inode->i_mode)) + inode->i_op = &ecryptfs_dir_iops; + if (S_ISDIR(lower_inode->i_mode)) + inode->i_fop = &ecryptfs_dir_fops; + /* TODO: Is there a better way to identify if the inode is + * special? */ + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) + init_special_inode(inode, lower_inode->i_mode, + lower_inode->i_rdev); + dentry->d_op = &ecryptfs_dops; + if (flag) + d_add(dentry, inode); + else + d_instantiate(dentry, inode); + ecryptfs_copy_attr_all(inode, lower_inode); + /* This size will be overwritten for real files w/ headers and + * other metadata */ + ecryptfs_copy_inode_size(inode, lower_inode); +out: + return rc; +} + +enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug, + ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher, + ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, + ecryptfs_opt_err }; + +static match_table_t tokens = { + {ecryptfs_opt_sig, "sig=%s"}, + {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, + {ecryptfs_opt_debug, "debug=%u"}, + {ecryptfs_opt_ecryptfs_debug, "ecryptfs_debug=%u"}, + {ecryptfs_opt_cipher, "cipher=%s"}, + {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"}, + {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"}, + {ecryptfs_opt_err, NULL} +}; + +/** + * ecryptfs_verify_version + * @version: The version number to confirm + * + * Returns zero on good version; non-zero otherwise + */ +static int ecryptfs_verify_version(u16 version) +{ + int rc = 0; + unsigned char major; + unsigned char minor; + + major = ((version >> 8) & 0xFF); + minor = (version & 0xFF); + if (major != ECRYPTFS_VERSION_MAJOR) { + ecryptfs_printk(KERN_ERR, "Major version number mismatch. " + "Expected [%d]; got [%d]\n", + ECRYPTFS_VERSION_MAJOR, major); + rc = -EINVAL; + goto out; + } + if (minor != ECRYPTFS_VERSION_MINOR) { + ecryptfs_printk(KERN_ERR, "Minor version number mismatch. " + "Expected [%d]; got [%d]\n", + ECRYPTFS_VERSION_MINOR, minor); + rc = -EINVAL; + goto out; + } +out: + return rc; +} + +/** + * ecryptfs_parse_options + * @sb: The ecryptfs super block + * @options: The options pased to the kernel + * + * Parse mount options: + * debug=N - ecryptfs_verbosity level for debug output + * sig=XXX - description(signature) of the key to use + * + * Returns the dentry object of the lower-level (lower/interposed) + * directory; We want to mount our stackable file system on top of + * that lower directory. + * + * The signature of the key to use must be the description of a key + * already in the keyring. Mounting will fail if the key can not be + * found. + * + * Returns zero on success; non-zero on error + */ +static int ecryptfs_parse_options(struct super_block *sb, char *options) +{ + char *p; + int rc = 0; + int sig_set = 0; + int cipher_name_set = 0; + int cipher_key_bytes; + int cipher_key_bytes_set = 0; + struct key *auth_tok_key = NULL; + struct ecryptfs_auth_tok *auth_tok = NULL; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; + substring_t args[MAX_OPT_ARGS]; + int token; + char *sig_src; + char *sig_dst; + char *debug_src; + char *cipher_name_dst; + char *cipher_name_src; + char *cipher_key_bytes_src; + struct crypto_tfm *tmp_tfm; + int cipher_name_len; + + if (!options) { + rc = -EINVAL; + goto out; + } + while ((p = strsep(&options, ",")) != NULL) { + if (!*p) + continue; + token = match_token(p, tokens, args); + switch (token) { + case ecryptfs_opt_sig: + case ecryptfs_opt_ecryptfs_sig: + sig_src = args[0].from; + sig_dst = + mount_crypt_stat->global_auth_tok_sig; + memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX); + sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0'; + ecryptfs_printk(KERN_DEBUG, + "The mount_crypt_stat " + "global_auth_tok_sig set to: " + "[%s]\n", sig_dst); + sig_set = 1; + break; + case ecryptfs_opt_debug: + case ecryptfs_opt_ecryptfs_debug: + debug_src = args[0].from; + ecryptfs_verbosity = + (int)simple_strtol(debug_src, &debug_src, + 0); + ecryptfs_printk(KERN_DEBUG, + "Verbosity set to [%d]" "\n", + ecryptfs_verbosity); + break; + case ecryptfs_opt_cipher: + case ecryptfs_opt_ecryptfs_cipher: + cipher_name_src = args[0].from; + cipher_name_dst = + mount_crypt_stat-> + global_default_cipher_name; + strncpy(cipher_name_dst, cipher_name_src, + ECRYPTFS_MAX_CIPHER_NAME_SIZE); + ecryptfs_printk(KERN_DEBUG, + "The mount_crypt_stat " + "global_default_cipher_name set to: " + "[%s]\n", cipher_name_dst); + cipher_name_set = 1; + break; + case ecryptfs_opt_ecryptfs_key_bytes: + cipher_key_bytes_src = args[0].from; + cipher_key_bytes = + (int)simple_strtol(cipher_key_bytes_src, + &cipher_key_bytes_src, 0); + mount_crypt_stat->global_default_cipher_key_size = + cipher_key_bytes; + ecryptfs_printk(KERN_DEBUG, + "The mount_crypt_stat " + "global_default_cipher_key_size " + "set to: [%d]\n", mount_crypt_stat-> + global_default_cipher_key_size); + cipher_key_bytes_set = 1; + break; + case ecryptfs_opt_err: + default: + ecryptfs_printk(KERN_WARNING, + "eCryptfs: unrecognized option '%s'\n", + p); + } + } + /* Do not support lack of mount-wide signature in 0.1 + * release */ + if (!sig_set) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "You must supply a valid " + "passphrase auth tok signature as a mount " + "parameter; see the eCryptfs README\n"); + goto out; + } + if (!cipher_name_set) { + cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); + if (unlikely(cipher_name_len + >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) { + rc = -EINVAL; + BUG(); + goto out; + } + memcpy(mount_crypt_stat->global_default_cipher_name, + ECRYPTFS_DEFAULT_CIPHER, cipher_name_len); + mount_crypt_stat->global_default_cipher_name[cipher_name_len] + = '\0'; + } + if (!cipher_key_bytes_set) { + mount_crypt_stat->global_default_cipher_key_size = + ECRYPTFS_DEFAULT_KEY_BYTES; + ecryptfs_printk(KERN_DEBUG, "Cipher key size was not " + "specified. Defaulting to [%d]\n", + mount_crypt_stat-> + global_default_cipher_key_size); + } + rc = ecryptfs_process_cipher( + &tmp_tfm, + &mount_crypt_stat->global_key_tfm, + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); + if (tmp_tfm) + crypto_free_tfm(tmp_tfm); + if (rc) { + printk(KERN_ERR "Error attempting to initialize cipher [%s] " + "with key size [%Zd] bytes; rc = [%d]\n", + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size, rc); + rc = -EINVAL; + goto out; + } + mutex_init(&mount_crypt_stat->global_key_tfm_mutex); + ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: " + "[%s]\n", mount_crypt_stat->global_auth_tok_sig); + /* The reference to this key is held until umount is done The + * call to key_put is done in ecryptfs_put_super() */ + auth_tok_key = request_key(&key_type_user, + mount_crypt_stat->global_auth_tok_sig, + NULL); + if (!auth_tok_key || IS_ERR(auth_tok_key)) { + ecryptfs_printk(KERN_ERR, "Could not find key with " + "description: [%s]\n", + mount_crypt_stat->global_auth_tok_sig); + process_request_key_err(PTR_ERR(auth_tok_key)); + rc = -EINVAL; + goto out; + } + auth_tok = ecryptfs_get_key_payload_data(auth_tok_key); + if (ecryptfs_verify_version(auth_tok->version)) { + ecryptfs_printk(KERN_ERR, "Data structure version mismatch. " + "Userspace tools must match eCryptfs kernel " + "module with major version [%d] and minor " + "version [%d]\n", ECRYPTFS_VERSION_MAJOR, + ECRYPTFS_VERSION_MINOR); + rc = -EINVAL; + goto out; + } + if (auth_tok->token_type != ECRYPTFS_PASSWORD) { + ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure " + "returned from key\n"); + rc = -EINVAL; + goto out; + } + mount_crypt_stat->global_auth_tok_key = auth_tok_key; + mount_crypt_stat->global_auth_tok = auth_tok; +out: + return rc; +} + +struct kmem_cache *ecryptfs_sb_info_cache; + +/** + * ecryptfs_fill_super + * @sb: The ecryptfs super block + * @raw_data: The options passed to mount + * @silent: Not used but required by function prototype + * + * Sets up what we can of the sb, rest is done in ecryptfs_read_super + * + * Returns zero on success; non-zero otherwise + */ +static int +ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) +{ + int rc = 0; + + /* Released in ecryptfs_put_super() */ + ecryptfs_set_superblock_private(sb, + kmem_cache_alloc(ecryptfs_sb_info_cache, + SLAB_KERNEL)); + if (!ecryptfs_superblock_to_private(sb)) { + ecryptfs_printk(KERN_WARNING, "Out of memory\n"); + rc = -ENOMEM; + goto out; + } + memset(ecryptfs_superblock_to_private(sb), 0, + sizeof(struct ecryptfs_sb_info)); + sb->s_op = &ecryptfs_sops; + /* Released through deactivate_super(sb) from get_sb_nodev */ + sb->s_root = d_alloc(NULL, &(const struct qstr) { + .hash = 0,.name = "/",.len = 1}); + if (!sb->s_root) { + ecryptfs_printk(KERN_ERR, "d_alloc failed\n"); + rc = -ENOMEM; + goto out; + } + sb->s_root->d_op = &ecryptfs_dops; + sb->s_root->d_sb = sb; + sb->s_root->d_parent = sb->s_root; + /* Released in d_release when dput(sb->s_root) is called */ + /* through deactivate_super(sb) from get_sb_nodev() */ + ecryptfs_set_dentry_private(sb->s_root, + kmem_cache_alloc(ecryptfs_dentry_info_cache, + SLAB_KERNEL)); + if (!ecryptfs_dentry_to_private(sb->s_root)) { + ecryptfs_printk(KERN_ERR, + "dentry_info_cache alloc failed\n"); + rc = -ENOMEM; + goto out; + } + memset(ecryptfs_dentry_to_private(sb->s_root), 0, + sizeof(struct ecryptfs_dentry_info)); + rc = 0; +out: + /* Should be able to rely on deactivate_super called from + * get_sb_nodev */ + return rc; +} + +/** + * ecryptfs_read_super + * @sb: The ecryptfs super block + * @dev_name: The path to mount over + * + * Read the super block of the lower filesystem, and use + * ecryptfs_interpose to create our initial inode and super block + * struct. + */ +static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) +{ + int rc; + struct nameidata nd; + struct dentry *lower_root; + struct vfsmount *lower_mnt; + + memset(&nd, 0, sizeof(struct nameidata)); + rc = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); + if (rc) { + ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); + goto out_free; + } + lower_root = nd.dentry; + if (!lower_root->d_inode) { + ecryptfs_printk(KERN_WARNING, + "No directory to interpose on\n"); + rc = -ENOENT; + goto out_free; + } + lower_mnt = nd.mnt; + ecryptfs_set_superblock_lower(sb, lower_root->d_sb); + sb->s_maxbytes = lower_root->d_sb->s_maxbytes; + ecryptfs_set_dentry_lower(sb->s_root, lower_root); + ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); + if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0))) + goto out_free; + rc = 0; + goto out; +out_free: + path_release(&nd); +out: + return rc; +} + +/** + * ecryptfs_get_sb + * @fs_type + * @flags + * @dev_name: The path to mount over + * @raw_data: The options passed into the kernel + * + * The whole ecryptfs_get_sb process is broken into 4 functions: + * ecryptfs_parse_options(): handle options passed to ecryptfs, if any + * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block + * with as much information as it can before needing + * the lower filesystem. + * ecryptfs_read_super(): this accesses the lower filesystem and uses + * ecryptfs_interpolate to perform most of the linking + * ecryptfs_interpolate(): links the lower filesystem into ecryptfs + */ +static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + int rc; + struct super_block *sb; + + rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt); + if (rc < 0) { + printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc); + goto out; + } + sb = mnt->mnt_sb; + rc = ecryptfs_parse_options(sb, raw_data); + if (rc) { + printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc); + goto out_abort; + } + rc = ecryptfs_read_super(sb, dev_name); + if (rc) { + printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc); + goto out_abort; + } + goto out; +out_abort: + dput(sb->s_root); + up_write(&sb->s_umount); + deactivate_super(sb); +out: + return rc; +} + +/** + * ecryptfs_kill_block_super + * @sb: The ecryptfs super block + * + * Used to bring the superblock down and free the private data. + * Private data is free'd in ecryptfs_put_super() + */ +static void ecryptfs_kill_block_super(struct super_block *sb) +{ + generic_shutdown_super(sb); +} + +static struct file_system_type ecryptfs_fs_type = { + .owner = THIS_MODULE, + .name = "ecryptfs", + .get_sb = ecryptfs_get_sb, + .kill_sb = ecryptfs_kill_block_super, + .fs_flags = 0 +}; + +/** + * inode_info_init_once + * + * Initializes the ecryptfs_inode_info_cache when it is created + */ +static void +inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags) +{ + struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; + + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + inode_init_once(&ei->vfs_inode); +} + +static struct ecryptfs_cache_info { + kmem_cache_t **cache; + const char *name; + size_t size; + void (*ctor)(void*, struct kmem_cache *, unsigned long); +} ecryptfs_cache_infos[] = { + { + .cache = &ecryptfs_auth_tok_list_item_cache, + .name = "ecryptfs_auth_tok_list_item", + .size = sizeof(struct ecryptfs_auth_tok_list_item), + }, + { + .cache = &ecryptfs_file_info_cache, + .name = "ecryptfs_file_cache", + .size = sizeof(struct ecryptfs_file_info), + }, + { + .cache = &ecryptfs_dentry_info_cache, + .name = "ecryptfs_dentry_info_cache", + .size = sizeof(struct ecryptfs_dentry_info), + }, + { + .cache = &ecryptfs_inode_info_cache, + .name = "ecryptfs_inode_cache", + .size = sizeof(struct ecryptfs_inode_info), + .ctor = inode_info_init_once, + }, + { + .cache = &ecryptfs_sb_info_cache, + .name = "ecryptfs_sb_cache", + .size = sizeof(struct ecryptfs_sb_info), + }, + { + .cache = &ecryptfs_header_cache_0, + .name = "ecryptfs_headers_0", + .size = PAGE_CACHE_SIZE, + }, + { + .cache = &ecryptfs_header_cache_1, + .name = "ecryptfs_headers_1", + .size = PAGE_CACHE_SIZE, + }, + { + .cache = &ecryptfs_header_cache_2, + .name = "ecryptfs_headers_2", + .size = PAGE_CACHE_SIZE, + }, + { + .cache = &ecryptfs_lower_page_cache, + .name = "ecryptfs_lower_page_cache", + .size = PAGE_CACHE_SIZE, + }, +}; + +static void ecryptfs_free_kmem_caches(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { + struct ecryptfs_cache_info *info; + + info = &ecryptfs_cache_infos[i]; + if (*(info->cache)) + kmem_cache_destroy(*(info->cache)); + } +} + +/** + * ecryptfs_init_kmem_caches + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_init_kmem_caches(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { + struct ecryptfs_cache_info *info; + + info = &ecryptfs_cache_infos[i]; + *(info->cache) = kmem_cache_create(info->name, info->size, + 0, SLAB_HWCACHE_ALIGN, info->ctor, NULL); + if (!*(info->cache)) { + ecryptfs_free_kmem_caches(); + ecryptfs_printk(KERN_WARNING, "%s: " + "kmem_cache_create failed\n", + info->name); + return -ENOMEM; + } + } + return 0; +} + +struct ecryptfs_obj { + char *name; + struct list_head slot_list; + struct kobject kobj; +}; + +struct ecryptfs_attribute { + struct attribute attr; + ssize_t(*show) (struct ecryptfs_obj *, char *); + ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t); +}; + +static ssize_t +ecryptfs_attr_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t len) +{ + struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, + kobj); + struct ecryptfs_attribute *attribute = + container_of(attr, struct ecryptfs_attribute, attr); + + return (attribute->store ? attribute->store(obj, buf, len) : 0); +} + +static ssize_t +ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, + kobj); + struct ecryptfs_attribute *attribute = + container_of(attr, struct ecryptfs_attribute, attr); + + return (attribute->show ? attribute->show(obj, buf) : 0); +} + +static struct sysfs_ops ecryptfs_sysfs_ops = { + .show = ecryptfs_attr_show, + .store = ecryptfs_attr_store +}; + +static struct kobj_type ecryptfs_ktype = { + .sysfs_ops = &ecryptfs_sysfs_ops +}; + +static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL); + +static ssize_t version_show(struct ecryptfs_obj *obj, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK); +} + +static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version); + +struct ecryptfs_version_str_map_elem { + u32 flag; + char *str; +} ecryptfs_version_str_map[] = { + {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"}, + {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"}, + {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"}, + {ECRYPTFS_VERSIONING_POLICY, "policy"} +}; + +static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff) +{ + int i; + int remaining = PAGE_SIZE; + int total_written = 0; + + buff[0] = '\0'; + for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) { + int entry_size; + + if (!(ECRYPTFS_VERSIONING_MASK + & ecryptfs_version_str_map[i].flag)) + continue; + entry_size = strlen(ecryptfs_version_str_map[i].str); + if ((entry_size + 2) > remaining) + goto out; + memcpy(buff, ecryptfs_version_str_map[i].str, entry_size); + buff[entry_size++] = '\n'; + buff[entry_size] = '\0'; + buff += entry_size; + total_written += entry_size; + remaining -= entry_size; + } +out: + return total_written; +} + +static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str); + +static int do_sysfs_registration(void) +{ + int rc; + + if ((rc = subsystem_register(&ecryptfs_subsys))) { + printk(KERN_ERR + "Unable to register ecryptfs sysfs subsystem\n"); + goto out; + } + rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version.attr); + if (rc) { + printk(KERN_ERR + "Unable to create ecryptfs version attribute\n"); + subsystem_unregister(&ecryptfs_subsys); + goto out; + } + rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version_str.attr); + if (rc) { + printk(KERN_ERR + "Unable to create ecryptfs version_str attribute\n"); + sysfs_remove_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version.attr); + subsystem_unregister(&ecryptfs_subsys); + goto out; + } +out: + return rc; +} + +static int __init ecryptfs_init(void) +{ + int rc; + + if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is " + "larger than the host's page size, and so " + "eCryptfs cannot run on this system. The " + "default eCryptfs extent size is [%d] bytes; " + "the page size is [%d] bytes.\n", + ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE); + goto out; + } + rc = ecryptfs_init_kmem_caches(); + if (rc) { + printk(KERN_ERR + "Failed to allocate one or more kmem_cache objects\n"); + goto out; + } + rc = register_filesystem(&ecryptfs_fs_type); + if (rc) { + printk(KERN_ERR "Failed to register filesystem\n"); + ecryptfs_free_kmem_caches(); + goto out; + } + kset_set_kset_s(&ecryptfs_subsys, fs_subsys); + sysfs_attr_version.attr.owner = THIS_MODULE; + sysfs_attr_version_str.attr.owner = THIS_MODULE; + rc = do_sysfs_registration(); + if (rc) { + printk(KERN_ERR "sysfs registration failed\n"); + unregister_filesystem(&ecryptfs_fs_type); + ecryptfs_free_kmem_caches(); + goto out; + } +out: + return rc; +} + +static void __exit ecryptfs_exit(void) +{ + sysfs_remove_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version.attr); + sysfs_remove_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version_str.attr); + subsystem_unregister(&ecryptfs_subsys); + unregister_filesystem(&ecryptfs_fs_type); + ecryptfs_free_kmem_caches(); +} + +MODULE_AUTHOR("Michael A. Halcrow "); +MODULE_DESCRIPTION("eCryptfs"); + +MODULE_LICENSE("GPL"); + +module_init(ecryptfs_init) +module_exit(ecryptfs_exit) diff -urN oldtree/fs/ecryptfs/mmap.c newtree/fs/ecryptfs/mmap.c --- oldtree/fs/ecryptfs/mmap.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/mmap.c 2006-09-30 04:33:58.000000000 -0400 @@ -0,0 +1,788 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * This is where eCryptfs coordinates the symmetric encryption and + * decryption of the file data as it passes between the lower + * encrypted file and the upper decrypted file. + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ecryptfs_kernel.h" + +struct kmem_cache *ecryptfs_lower_page_cache; + +/** + * ecryptfs_get1page + * + * Get one page from cache or lower f/s, return error otherwise. + * + * Returns unlocked and up-to-date page (if ok), with increased + * refcnt. + */ +static struct page *ecryptfs_get1page(struct file *file, int index) +{ + struct page *page; + struct dentry *dentry; + struct inode *inode; + struct address_space *mapping; + + dentry = file->f_dentry; + inode = dentry->d_inode; + mapping = inode->i_mapping; + page = read_cache_page(mapping, index, + (filler_t *)mapping->a_ops->readpage, + (void *)file); + if (IS_ERR(page)) + goto out; + wait_on_page_locked(page); +out: + return page; +} + +static +int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); + +/** + * ecryptfs_fill_zeros + * @file: The ecryptfs file + * @new_length: The new length of the data in the underlying file; + * everything between the prior end of the file and the + * new end of the file will be filled with zero's. + * new_length must be greater than current length + * + * Function for handling lseek-ing past the end of the file. + * + * This function does not support shrinking, only growing a file. + * + * Returns zero on success; non-zero otherwise. + */ +int ecryptfs_fill_zeros(struct file *file, loff_t new_length) +{ + int rc = 0; + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + pgoff_t old_end_page_index = 0; + pgoff_t index = old_end_page_index; + int old_end_pos_in_page = -1; + pgoff_t new_end_page_index; + int new_end_pos_in_page; + loff_t cur_length = i_size_read(inode); + + if (cur_length != 0) { + index = old_end_page_index = + ((cur_length - 1) >> PAGE_CACHE_SHIFT); + old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK); + } + new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT); + new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK); + ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; " + "old_end_pos_in_page = [%d]; " + "new_end_page_index = [0x%.16x]; " + "new_end_pos_in_page = [%d]\n", + old_end_page_index, old_end_pos_in_page, + new_end_page_index, new_end_pos_in_page); + if (old_end_page_index == new_end_page_index) { + /* Start and end are in the same page; we just need to + * set a portion of the existing page to zero's */ + rc = write_zeros(file, index, (old_end_pos_in_page + 1), + (new_end_pos_in_page - old_end_pos_in_page)); + if (rc) + ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + "index=[0x%.16x], " + "old_end_pos_in_page=[d], " + "(PAGE_CACHE_SIZE - new_end_pos_in_page" + "=[%d]" + ")=[d]) returned [%d]\n", file, index, + old_end_pos_in_page, + new_end_pos_in_page, + (PAGE_CACHE_SIZE - new_end_pos_in_page), + rc); + goto out; + } + /* Fill the remainder of the previous last page with zeros */ + rc = write_zeros(file, index, (old_end_pos_in_page + 1), + ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page)); + if (rc) { + ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + "index=[0x%.16x], old_end_pos_in_page=[d], " + "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) " + "returned [%d]\n", file, index, + old_end_pos_in_page, + (PAGE_CACHE_SIZE - old_end_pos_in_page), rc); + goto out; + } + index++; + while (index < new_end_page_index) { + /* Fill all intermediate pages with zeros */ + rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE); + if (rc) { + ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + "index=[0x%.16x], " + "old_end_pos_in_page=[d], " + "(PAGE_CACHE_SIZE - new_end_pos_in_page" + "=[%d]" + ")=[d]) returned [%d]\n", file, index, + old_end_pos_in_page, + new_end_pos_in_page, + (PAGE_CACHE_SIZE - new_end_pos_in_page), + rc); + goto out; + } + index++; + } + /* Fill the portion at the beginning of the last new page with + * zero's */ + rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1)); + if (rc) { + ecryptfs_printk(KERN_ERR, "write_zeros(file=" + "[%p], index=[0x%.16x], 0, " + "new_end_pos_in_page=[%d]" + "returned [%d]\n", file, index, + new_end_pos_in_page, rc); + goto out; + } +out: + return rc; +} + +/** + * ecryptfs_writepage + * @page: Page that is locked before this call is made + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct ecryptfs_page_crypt_context ctx; + int rc; + + ctx.page = page; + ctx.mode = ECRYPTFS_WRITEPAGE_MODE; + ctx.param.wbc = wbc; + rc = ecryptfs_encrypt_page(&ctx); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error encrypting " + "page (upper index [0x%.16x])\n", page->index); + ClearPageUptodate(page); + goto out; + } + SetPageUptodate(page); + unlock_page(page); +out: + return rc; +} + +/** + * Reads the data from the lower file file at index lower_page_index + * and copies that data into page. + * + * @param page Page to fill + * @param lower_page_index Index of the page in the lower file to get + */ +int ecryptfs_do_readpage(struct file *file, struct page *page, + pgoff_t lower_page_index) +{ + int rc; + struct dentry *dentry; + struct file *lower_file; + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + char *page_data; + struct page *lower_page = NULL; + char *lower_page_data; + const struct address_space_operations *lower_a_ops; + + dentry = file->f_dentry; + lower_file = ecryptfs_file_to_lower(file); + lower_dentry = ecryptfs_dentry_to_lower(dentry); + inode = dentry->d_inode; + lower_inode = ecryptfs_inode_to_lower(inode); + lower_a_ops = lower_inode->i_mapping->a_ops; + lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index, + (filler_t *)lower_a_ops->readpage, + (void *)lower_file); + if (IS_ERR(lower_page)) { + rc = PTR_ERR(lower_page); + lower_page = NULL; + ecryptfs_printk(KERN_ERR, "Error reading from page cache\n"); + goto out; + } + wait_on_page_locked(lower_page); + page_data = (char *)kmap(page); + if (!page_data) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error mapping page\n"); + goto out; + } + lower_page_data = (char *)kmap(lower_page); + if (!lower_page_data) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error mapping page\n"); + kunmap(page); + goto out; + } + memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE); + kunmap(lower_page); + kunmap(page); + rc = 0; +out: + if (likely(lower_page)) + page_cache_release(lower_page); + if (rc == 0) + SetPageUptodate(page); + else + ClearPageUptodate(page); + return rc; +} + +/** + * ecryptfs_readpage + * @file: This is an ecryptfs file + * @page: ecryptfs associated page to stick the read data into + * + * Read in a page, decrypting if necessary. + * + * Returns zero on success; non-zero on error. + */ +static int ecryptfs_readpage(struct file *file, struct page *page) +{ + int rc = 0; + struct ecryptfs_crypt_stat *crypt_stat; + + BUG_ON(!(file && file->f_dentry && file->f_dentry->d_inode)); + crypt_stat = + &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat; + if (!crypt_stat + || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED) + || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) { + ecryptfs_printk(KERN_DEBUG, + "Passing through unencrypted page\n"); + rc = ecryptfs_do_readpage(file, page, page->index); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error reading page; rc = " + "[%d]\n", rc); + goto out; + } + } else { + rc = ecryptfs_decrypt_page(file, page); + if (rc) { + + ecryptfs_printk(KERN_ERR, "Error decrypting page; " + "rc = [%d]\n", rc); + goto out; + } + } + SetPageUptodate(page); +out: + if (rc) + ClearPageUptodate(page); + ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", + page->index); + unlock_page(page); + return rc; +} + +static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) +{ + struct inode *inode = page->mapping->host; + int end_byte_in_page; + int rc = 0; + char *page_virt; + + if ((i_size_read(inode) / PAGE_CACHE_SIZE) == page->index) { + end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; + if (to > end_byte_in_page) + end_byte_in_page = to; + page_virt = kmap(page); + if (!page_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_WARNING, + "Could not map page\n"); + goto out; + } + memset((page_virt + end_byte_in_page), 0, + (PAGE_CACHE_SIZE - end_byte_in_page)); + kunmap(page); + } +out: + return rc; +} + +static int ecryptfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + int rc = 0; + + kmap(page); + if (from == 0 && to == PAGE_CACHE_SIZE) + goto out; /* If we are writing a full page, it will be + up to date. */ + if (!PageUptodate(page)) + rc = ecryptfs_do_readpage(file, page, page->index); +out: + return rc; +} + +int ecryptfs_grab_and_map_lower_page(struct page **lower_page, + char **lower_virt, + struct inode *lower_inode, + unsigned long lower_page_index) +{ + int rc = 0; + + (*lower_page) = grab_cache_page(lower_inode->i_mapping, + lower_page_index); + if (!(*lower_page)) { + ecryptfs_printk(KERN_ERR, "grab_cache_page for " + "lower_page_index = [0x%.16x] failed\n", + lower_page_index); + rc = -EINVAL; + goto out; + } + if (lower_virt) + (*lower_virt) = kmap((*lower_page)); + else + kmap((*lower_page)); +out: + return rc; +} + +int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, + struct inode *lower_inode, + struct writeback_control *wbc) +{ + int rc = 0; + + rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); " + "rc = [%d]\n", rc); + goto out; + } + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + page_cache_release(lower_page); +out: + return rc; +} + +static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page) +{ + kunmap(lower_page); + ecryptfs_printk(KERN_DEBUG, "Unlocking lower page with index = " + "[0x%.16x]\n", lower_page->index); + unlock_page(lower_page); + page_cache_release(lower_page); +} + +/** + * ecryptfs_write_inode_size_to_header + * + * Writes the lower file size to the first 8 bytes of the header. + * + * Returns zero on success; non-zero on error. + */ +int +ecryptfs_write_inode_size_to_header(struct file *lower_file, + struct inode *lower_inode, + struct inode *inode) +{ + int rc = 0; + struct page *header_page; + char *header_virt; + const struct address_space_operations *lower_a_ops; + u64 file_size; + + rc = ecryptfs_grab_and_map_lower_page(&header_page, &header_virt, + lower_inode, 0); + if (rc) { + ecryptfs_printk(KERN_ERR, "grab_cache_page for header page " + "failed\n"); + goto out; + } + lower_a_ops = lower_inode->i_mapping->a_ops; + rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8); + file_size = (u64)i_size_read(inode); + ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size); + file_size = cpu_to_be64(file_size); + memcpy(header_virt, &file_size, sizeof(u64)); + rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8); + if (rc < 0) + ecryptfs_printk(KERN_ERR, "Error commiting header page " + "write\n"); + ecryptfs_unmap_and_release_lower_page(header_page); + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); +out: + return rc; +} + +int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, + struct file *lower_file, + unsigned long lower_page_index, int byte_offset, + int region_bytes) +{ + int rc = 0; + + rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, lower_inode, + lower_page_index); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to grab and map " + "lower page with index [0x%.16x]\n", + lower_page_index); + goto out; + } + rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file, + (*lower_page), + byte_offset, + region_bytes); + if (rc) { + ecryptfs_printk(KERN_ERR, "prepare_write for " + "lower_page_index = [0x%.16x] failed; rc = " + "[%d]\n", lower_page_index, rc); + } +out: + if (rc && (*lower_page)) { + ecryptfs_unmap_and_release_lower_page(*lower_page); + (*lower_page) = NULL; + } + return rc; +} + +/** + * ecryptfs_commit_lower_page + * + * Returns zero on success; non-zero on error + */ +int +ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, + struct file *lower_file, int byte_offset, + int region_size) +{ + int rc = 0; + + rc = lower_inode->i_mapping->a_ops->commit_write( + lower_file, lower_page, byte_offset, region_size); + if (rc < 0) { + ecryptfs_printk(KERN_ERR, + "Error committing write; rc = [%d]\n", rc); + } else + rc = 0; + ecryptfs_unmap_and_release_lower_page(lower_page); + return rc; +} + +/** + * ecryptfs_copy_page_to_lower + * + * Used for plaintext pass-through; no page index interpolation + * required. + */ +int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, + struct file *lower_file) +{ + int rc = 0; + struct page *lower_page; + + rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file, + page->index, 0, PAGE_CACHE_SIZE); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to get page " + "at index [0x%.16x]\n", page->index); + goto out; + } + /* TODO: aops */ + memcpy((char *)page_address(lower_page), page_address(page), + PAGE_CACHE_SIZE); + rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file, + 0, PAGE_CACHE_SIZE); + if (rc) + ecryptfs_printk(KERN_ERR, "Error attempting to commit page " + "at index [0x%.16x]\n", page->index); +out: + return rc; +} + +static int +process_new_file(struct ecryptfs_crypt_stat *crypt_stat, + struct file *file, struct inode *inode) +{ + struct page *header_page; + const struct address_space_operations *lower_a_ops; + struct inode *lower_inode; + struct file *lower_file; + char *header_virt; + int rc = 0; + int current_header_page = 0; + int header_pages; + int more_header_data_to_be_written = 1; + + lower_inode = ecryptfs_inode_to_lower(inode); + lower_file = ecryptfs_file_to_lower(file); + lower_a_ops = lower_inode->i_mapping->a_ops; + header_pages = ((crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front) + / PAGE_CACHE_SIZE); + BUG_ON(header_pages < 1); + while (current_header_page < header_pages) { + rc = ecryptfs_grab_and_map_lower_page(&header_page, + &header_virt, + lower_inode, + current_header_page); + if (rc) { + ecryptfs_printk(KERN_ERR, "grab_cache_page for " + "header page [%d] failed; rc = [%d]\n", + current_header_page, rc); + goto out; + } + rc = lower_a_ops->prepare_write(lower_file, header_page, 0, + PAGE_CACHE_SIZE); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error preparing to write " + "header page out; rc = [%d]\n", rc); + goto out; + } + memset(header_virt, 0, PAGE_CACHE_SIZE); + if (more_header_data_to_be_written) { + rc = ecryptfs_write_headers_virt(header_virt, + crypt_stat, + file->f_dentry); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error " + "generating header; rc = " + "[%d]\n", rc); + rc = -EIO; + memset(header_virt, 0, PAGE_CACHE_SIZE); + ecryptfs_unmap_and_release_lower_page( + header_page); + goto out; + } + if (current_header_page == 0) + memset(header_virt, 0, 8); + more_header_data_to_be_written = 0; + } + rc = lower_a_ops->commit_write(lower_file, header_page, 0, + PAGE_CACHE_SIZE); + ecryptfs_unmap_and_release_lower_page(header_page); + if (rc < 0) { + ecryptfs_printk(KERN_ERR, + "Error commiting header page write; " + "rc = [%d]\n", rc); + break; + } + current_header_page++; + } + if (rc >= 0) { + rc = 0; + ecryptfs_printk(KERN_DEBUG, "lower_inode->i_blocks = " + "[0x%.16x]\n", lower_inode->i_blocks); + i_size_write(inode, 0); + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + } + ecryptfs_printk(KERN_DEBUG, "Clearing ECRYPTFS_NEW_FILE flag in " + "crypt_stat at memory location [%p]\n", crypt_stat); + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE); +out: + return rc; +} + +/** + * ecryptfs_commit_write + * @file: The eCryptfs file object + * @page: The eCryptfs page + * @from: Ignored (we rotate the page IV on each write) + * @to: Ignored + * + * This is where we encrypt the data and pass the encrypted data to + * the lower filesystem. In OpenPGP-compatible mode, we operate on + * entire underlying packets. + */ +static int ecryptfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct ecryptfs_page_crypt_context ctx; + loff_t pos; + struct inode *inode; + struct inode *lower_inode; + struct file *lower_file; + struct ecryptfs_crypt_stat *crypt_stat; + int rc; + + inode = page->mapping->host; + lower_inode = ecryptfs_inode_to_lower(inode); + lower_file = ecryptfs_file_to_lower(file); + mutex_lock(&lower_inode->i_mutex); + crypt_stat = + &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat; + if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) { + ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in " + "crypt_stat at memory location [%p]\n", crypt_stat); + rc = process_new_file(crypt_stat, file, inode); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error processing new " + "file; rc = [%d]\n", rc); + goto out; + } + } else + ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); + ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" + "(page w/ index = [0x%.16x], to = [%d])\n", page->index, + to); + rc = fill_zeros_to_end_of_page(page, to); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error attempting to fill " + "zeros in page with index = [0x%.16x]\n", + page->index); + goto out; + } + ctx.page = page; + ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE; + ctx.param.lower_file = lower_file; + rc = ecryptfs_encrypt_page(&ctx); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " + "index [0x%.16x])\n", page->index); + goto out; + } + rc = 0; + inode->i_blocks = lower_inode->i_blocks; + pos = (page->index << PAGE_CACHE_SHIFT) + to; + if (pos > i_size_read(inode)) { + i_size_write(inode, pos); + ecryptfs_printk(KERN_DEBUG, "Expanded file size to " + "[0x%.16x]\n", i_size_read(inode)); + } + ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode); + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); +out: + kunmap(page); /* mapped in prior call (prepare_write) */ + if (rc < 0) + ClearPageUptodate(page); + else + SetPageUptodate(page); + mutex_unlock(&lower_inode->i_mutex); + return rc; +} + +/** + * write_zeros + * @file: The ecryptfs file + * @index: The index in which we are writing + * @start: The position after the last block of data + * @num_zeros: The number of zeros to write + * + * Write a specified number of zero's to a page. + * + * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE + */ +static +int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) +{ + int rc = 0; + struct page *tmp_page; + + tmp_page = ecryptfs_get1page(file, index); + if (IS_ERR(tmp_page)) { + ecryptfs_printk(KERN_ERR, "Error getting page at index " + "[0x%.16x]\n", index); + rc = PTR_ERR(tmp_page); + goto out; + } + kmap(tmp_page); + rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error preparing to write zero's " + "to remainder of page at index [0x%.16x]\n", + index); + kunmap(tmp_page); + page_cache_release(tmp_page); + goto out; + } + memset(((char *)page_address(tmp_page) + start), 0, num_zeros); + rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); + if (rc < 0) { + ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " + "to remainder of page at index [0x%.16x]\n", + index); + kunmap(tmp_page); + page_cache_release(tmp_page); + goto out; + } + rc = 0; + kunmap(tmp_page); + page_cache_release(tmp_page); +out: + return rc; +} + +static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) +{ + int rc = 0; + struct inode *inode; + struct inode *lower_inode; + + inode = (struct inode *)mapping->host; + lower_inode = ecryptfs_inode_to_lower(inode); + if (lower_inode->i_mapping->a_ops->bmap) + rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping, + block); + return rc; +} + +static void ecryptfs_sync_page(struct page *page) +{ + struct inode *inode; + struct inode *lower_inode; + struct page *lower_page; + + inode = page->mapping->host; + lower_inode = ecryptfs_inode_to_lower(inode); + /* NOTE: Recently swapped with grab_cache_page(), since + * sync_page() just makes sure that pending I/O gets done. */ + lower_page = find_lock_page(lower_inode->i_mapping, page->index); + if (!lower_page) { + ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n"); + return; + } + lower_page->mapping->a_ops->sync_page(lower_page); + ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", + lower_page->index); + unlock_page(lower_page); + page_cache_release(lower_page); +} + +struct address_space_operations ecryptfs_aops = { + .writepage = ecryptfs_writepage, + .readpage = ecryptfs_readpage, + .prepare_write = ecryptfs_prepare_write, + .commit_write = ecryptfs_commit_write, + .bmap = ecryptfs_bmap, + .sync_page = ecryptfs_sync_page, +}; diff -urN oldtree/fs/ecryptfs/super.c newtree/fs/ecryptfs/super.c --- oldtree/fs/ecryptfs/super.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/ecryptfs/super.c 2006-09-30 04:33:33.000000000 -0400 @@ -0,0 +1,198 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow + * Michael C. Thompson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include "ecryptfs_kernel.h" + +struct kmem_cache *ecryptfs_inode_info_cache; + +/** + * ecryptfs_alloc_inode - allocate an ecryptfs inode + * @sb: Pointer to the ecryptfs super block + * + * Called to bring an inode into existence. + * + * Only handle allocation, setting up structures should be done in + * ecryptfs_read_inode. This is because the kernel, between now and + * then, will 0 out the private data pointer. + * + * Returns a pointer to a newly allocated inode, NULL otherwise + */ +static struct inode *ecryptfs_alloc_inode(struct super_block *sb) +{ + struct ecryptfs_inode_info *ecryptfs_inode; + struct inode *inode = NULL; + + ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache, + SLAB_KERNEL); + if (unlikely(!ecryptfs_inode)) + goto out; + ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat); + inode = &ecryptfs_inode->vfs_inode; +out: + return inode; +} + +/** + * ecryptfs_destroy_inode + * @inode: The ecryptfs inode + * + * This is used during the final destruction of the inode. + * All allocation of memory related to the inode, including allocated + * memory in the crypt_stat struct, will be released here. + * There should be no chance that this deallocation will be missed. + */ +static void ecryptfs_destroy_inode(struct inode *inode) +{ + struct ecryptfs_inode_info *inode_info; + + inode_info = ecryptfs_inode_to_private(inode); + ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat); + kmem_cache_free(ecryptfs_inode_info_cache, inode_info); +} + +/** + * ecryptfs_init_inode + * @inode: The ecryptfs inode + * + * Set up the ecryptfs inode. + */ +void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) +{ + ecryptfs_set_inode_lower(inode, lower_inode); + inode->i_ino = lower_inode->i_ino; + inode->i_version++; + inode->i_op = &ecryptfs_main_iops; + inode->i_fop = &ecryptfs_main_fops; + inode->i_mapping->a_ops = &ecryptfs_aops; +} + +/** + * ecryptfs_put_super + * @sb: Pointer to the ecryptfs super block + * + * Final actions when unmounting a file system. + * This will handle deallocation and release of our private data. + */ +static void ecryptfs_put_super(struct super_block *sb) +{ + struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); + + ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat); + kmem_cache_free(ecryptfs_sb_info_cache, sb_info); + ecryptfs_set_superblock_private(sb, NULL); +} + +/** + * ecryptfs_statfs + * @sb: The ecryptfs super block + * @buf: The struct kstatfs to fill in with stats + * + * Get the filesystem statistics. Currently, we let this pass right through + * to the lower filesystem and take no action ourselves. + */ +static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf); +} + +/** + * ecryptfs_clear_inode + * @inode - The ecryptfs inode + * + * Called by iput() when the inode reference count reached zero + * and the inode is not hashed anywhere. Used to clear anything + * that needs to be, before the inode is completely destroyed and put + * on the inode free list. We use this to drop out reference to the + * lower inode. + */ +static void ecryptfs_clear_inode(struct inode *inode) +{ + iput(ecryptfs_inode_to_lower(inode)); +} + +/** + * ecryptfs_umount_begin + * + * Called in do_umount(). + */ +static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags) +{ + struct vfsmount *lower_mnt = + ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root); + struct super_block *lower_sb; + + mntput(lower_mnt); + lower_sb = lower_mnt->mnt_sb; + if (lower_sb->s_op->umount_begin) + lower_sb->s_op->umount_begin(lower_mnt, flags); +} + +/** + * ecryptfs_show_options + * + * Prints the directory we are currently mounted over. + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct super_block *sb = mnt->mnt_sb; + struct dentry *lower_root_dentry = ecryptfs_dentry_to_lower(sb->s_root); + struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(sb->s_root); + char *tmp_page; + char *path; + int rc = 0; + + tmp_page = (char *)__get_free_page(GFP_KERNEL); + if (!tmp_page) { + rc = -ENOMEM; + goto out; + } + path = d_path(lower_root_dentry, lower_mnt, tmp_page, PAGE_SIZE); + if (IS_ERR(path)) { + rc = PTR_ERR(path); + goto out; + } + seq_printf(m, ",dir=%s", path); + free_page((unsigned long)tmp_page); +out: + return rc; +} + +struct super_operations ecryptfs_sops = { + .alloc_inode = ecryptfs_alloc_inode, + .destroy_inode = ecryptfs_destroy_inode, + .drop_inode = generic_delete_inode, + .put_super = ecryptfs_put_super, + .statfs = ecryptfs_statfs, + .remount_fs = NULL, + .clear_inode = ecryptfs_clear_inode, + .umount_begin = ecryptfs_umount_begin, + .show_options = ecryptfs_show_options +}; diff -urN oldtree/fs/exec.c newtree/fs/exec.c --- oldtree/fs/exec.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/exec.c 2006-09-30 04:19:34.000000000 -0400 @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff -urN oldtree/fs/ext2/file.c newtree/fs/ext2/file.c --- oldtree/fs/ext2/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/ext2/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -41,8 +41,8 @@ */ const struct file_operations ext2_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .ioctl = ext2_ioctl, @@ -50,8 +50,6 @@ .open = generic_file_open, .release = ext2_release_file, .fsync = ext2_sync_file, - .readv = generic_file_readv, - .writev = generic_file_writev, .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, diff -urN oldtree/fs/ext3/file.c newtree/fs/ext3/file.c --- oldtree/fs/ext3/file.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/ext3/file.c 2006-09-30 04:17:01.000000000 -0400 @@ -48,14 +48,15 @@ } static ssize_t -ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ext3_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode; ssize_t ret; int err; - ret = generic_file_aio_write(iocb, buf, count, pos); + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); /* * Skip flushing if there was an error, or if nothing was written. @@ -111,8 +112,6 @@ .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext3_file_write, - .readv = generic_file_readv, - .writev = generic_file_writev, .ioctl = ext3_ioctl, .mmap = generic_file_mmap, .open = generic_file_open, diff -urN oldtree/fs/fat/file.c newtree/fs/fat/file.c --- oldtree/fs/fat/file.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/fat/file.c 2006-09-30 04:17:01.000000000 -0400 @@ -127,8 +127,6 @@ .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, - .readv = generic_file_readv, - .writev = generic_file_writev, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, diff -urN oldtree/fs/fuse/dev.c newtree/fs/fuse/dev.c --- oldtree/fs/fuse/dev.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/fuse/dev.c 2006-09-30 04:17:01.000000000 -0400 @@ -680,14 +680,15 @@ * request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ -static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *off) +static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { int err; struct fuse_req *req; struct fuse_in *in; struct fuse_copy_state cs; unsigned reqsize; + struct file *file = iocb->ki_filp; struct fuse_conn *fc = fuse_get_conn(file); if (!fc) return -EPERM; @@ -761,15 +762,6 @@ return err; } -static ssize_t fuse_dev_read(struct file *file, char __user *buf, - size_t nbytes, loff_t *off) -{ - struct iovec iov; - iov.iov_len = nbytes; - iov.iov_base = buf; - return fuse_dev_readv(file, &iov, 1, off); -} - /* Look up request on processing list by unique ID */ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) { @@ -814,15 +806,15 @@ * it from the list and copy the rest of the buffer to the request. * The request is finished by calling request_end() */ -static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *off) +static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { int err; unsigned nbytes = iov_length(iov, nr_segs); struct fuse_req *req; struct fuse_out_header oh; struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(file); + struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); if (!fc) return -EPERM; @@ -898,15 +890,6 @@ return err; } -static ssize_t fuse_dev_write(struct file *file, const char __user *buf, - size_t nbytes, loff_t *off) -{ - struct iovec iov; - iov.iov_len = nbytes; - iov.iov_base = (char __user *) buf; - return fuse_dev_writev(file, &iov, 1, off); -} - static unsigned fuse_dev_poll(struct file *file, poll_table *wait) { unsigned mask = POLLOUT | POLLWRNORM; @@ -1041,10 +1024,10 @@ const struct file_operations fuse_dev_operations = { .owner = THIS_MODULE, .llseek = no_llseek, - .read = fuse_dev_read, - .readv = fuse_dev_readv, - .write = fuse_dev_write, - .writev = fuse_dev_writev, + .read = do_sync_read, + .aio_read = fuse_dev_read, + .write = do_sync_write, + .aio_write = fuse_dev_write, .poll = fuse_dev_poll, .release = fuse_dev_release, .fasync = fuse_dev_fasync, diff -urN oldtree/fs/fuse/file.c newtree/fs/fuse/file.c --- oldtree/fs/fuse/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/fuse/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -753,8 +753,10 @@ static const struct file_operations fuse_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = fuse_file_mmap, .open = fuse_open, .flush = fuse_flush, diff -urN oldtree/fs/gfs2/ops_file.c newtree/fs/gfs2/ops_file.c --- oldtree/fs/gfs2/ops_file.c 2006-09-29 15:59:29.000000000 -0400 +++ newtree/fs/gfs2/ops_file.c 2006-09-30 04:17:50.000000000 -0400 @@ -612,11 +612,9 @@ const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, - .read = generic_file_read, - .readv = generic_file_readv, + .read = do_sync_read, .aio_read = generic_file_aio_read, - .write = generic_file_write, - .writev = generic_file_writev, + .write = do_sync_write, .aio_write = generic_file_aio_write, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, diff -urN oldtree/fs/hfs/inode.c newtree/fs/hfs/inode.c --- oldtree/fs/hfs/inode.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/hfs/inode.c 2006-09-30 04:17:37.000000000 -0400 @@ -601,8 +601,10 @@ static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, .fsync = file_fsync, diff -urN oldtree/fs/hfsplus/inode.c newtree/fs/hfsplus/inode.c --- oldtree/fs/hfsplus/inode.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/hfsplus/inode.c 2006-09-30 04:17:37.000000000 -0400 @@ -282,8 +282,10 @@ static const struct file_operations hfsplus_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, .fsync = file_fsync, diff -urN oldtree/fs/hostfs/hostfs_kern.c newtree/fs/hostfs/hostfs_kern.c --- oldtree/fs/hostfs/hostfs_kern.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/hostfs/hostfs_kern.c 2006-09-30 04:17:37.000000000 -0400 @@ -385,13 +385,11 @@ static const struct file_operations hostfs_file_fops = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, .sendfile = generic_file_sendfile, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, - .readv = generic_file_readv, - .writev = generic_file_writev, - .write = generic_file_write, + .write = do_sync_write, .mmap = generic_file_mmap, .open = hostfs_file_open, .release = NULL, diff -urN oldtree/fs/hpfs/file.c newtree/fs/hpfs/file.c --- oldtree/fs/hpfs/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/hpfs/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -113,7 +113,7 @@ { ssize_t retval; - retval = generic_file_write(file, buf, count, ppos); + retval = do_sync_write(file, buf, count, ppos); if (retval > 0) hpfs_i(file->f_dentry->d_inode)->i_dirty = 1; return retval; @@ -122,8 +122,10 @@ const struct file_operations hpfs_file_ops = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .write = hpfs_file_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .release = hpfs_file_release, .fsync = hpfs_file_fsync, diff -urN oldtree/fs/jffs/inode-v23.c newtree/fs/jffs/inode-v23.c --- oldtree/fs/jffs/inode-v23.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/jffs/inode-v23.c 2006-09-30 04:17:37.000000000 -0400 @@ -1632,8 +1632,10 @@ { .open = generic_file_open, .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .ioctl = jffs_ioctl, .mmap = generic_file_readonly_mmap, .fsync = jffs_fsync, diff -urN oldtree/fs/jffs2/file.c newtree/fs/jffs2/file.c --- oldtree/fs/jffs2/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/jffs2/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -42,8 +42,10 @@ { .llseek = generic_file_llseek, .open = generic_file_open, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .ioctl = jffs2_ioctl, .mmap = generic_file_readonly_mmap, .fsync = jffs2_fsync, diff -urN oldtree/fs/jfs/file.c newtree/fs/jfs/file.c --- oldtree/fs/jfs/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/jfs/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -103,13 +103,11 @@ const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, - .write = generic_file_write, - .read = generic_file_read, + .write = do_sync_write, + .read = do_sync_read, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .readv = generic_file_readv, - .writev = generic_file_writev, .sendfile = generic_file_sendfile, .fsync = jfs_fsync, .release = jfs_release, diff -urN oldtree/fs/minix/file.c newtree/fs/minix/file.c --- oldtree/fs/minix/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/minix/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -17,8 +17,10 @@ const struct file_operations minix_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = minix_sync_file, .sendfile = generic_file_sendfile, diff -urN oldtree/fs/nfs/direct.c newtree/fs/nfs/direct.c --- oldtree/fs/nfs/direct.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/nfs/direct.c 2006-09-30 04:16:37.000000000 -0400 @@ -707,8 +707,8 @@ /** * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block - * @buf: user's buffer into which to read data - * @count: number of bytes to read + * @iov: vector of user buffers into which to read data + * @nr_segs: size of iov vector * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling @@ -725,17 +725,24 @@ * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; + /* XXX: temporary */ + const char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); + if (nr_segs != 1) + return -EINVAL; + if (count < 0) goto out; retval = -EFAULT; @@ -760,8 +767,8 @@ /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block - * @buf: user's buffer from which to write data - * @count: number of bytes to write + * @iov: vector of user buffers from which to write data + * @nr_segs: size of iov vector * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling @@ -782,17 +789,24 @@ * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { ssize_t retval; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; + /* XXX: temporary */ + const char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); + if (nr_segs != 1) + return -EINVAL; + retval = generic_write_checks(file, &pos, &count, 0); if (retval) goto out; diff -urN oldtree/fs/nfs/file.c newtree/fs/nfs/file.c --- oldtree/fs/nfs/file.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/nfs/file.c 2006-09-30 04:16:37.000000000 -0400 @@ -41,8 +41,10 @@ static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); -static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); -static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t); +static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); @@ -53,8 +55,8 @@ .llseek = nfs_file_llseek, .read = do_sync_read, .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, + .aio_read = nfs_file_read, + .aio_write = nfs_file_write, .mmap = nfs_file_mmap, .open = nfs_file_open, .flush = nfs_file_flush, @@ -196,15 +198,17 @@ } static ssize_t -nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) +nfs_file_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_dentry; struct inode * inode = dentry->d_inode; ssize_t result; + size_t count = iov_length(iov, nr_segs); #ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, buf, count, pos); + return nfs_file_direct_read(iocb, iov, nr_segs, pos); #endif dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", @@ -214,7 +218,7 @@ result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) - result = generic_file_aio_read(iocb, buf, count, pos); + result = generic_file_aio_read(iocb, iov, nr_segs, pos); return result; } @@ -336,24 +340,22 @@ #endif }; -/* - * Write to a file (through the page cache). - */ -static ssize_t -nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_dentry; struct inode * inode = dentry->d_inode; ssize_t result; + size_t count = iov_length(iov, nr_segs); #ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, buf, count, pos); + return nfs_file_direct_write(iocb, iov, nr_segs, pos); #endif - dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n", + dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, (unsigned long) count, (unsigned long) pos); + inode->i_ino, (unsigned long) count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -372,7 +374,7 @@ goto out; nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); - result = generic_file_aio_write(iocb, buf, count, pos); + result = generic_file_aio_write(iocb, iov, nr_segs, pos); out: return result; diff -urN oldtree/fs/ntfs/file.c newtree/fs/ntfs/file.c --- oldtree/fs/ntfs/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/ntfs/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -2176,20 +2176,18 @@ /** * ntfs_file_aio_write - */ -static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); - ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); + ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err = sync_page_range(inode, mapping, pos, ret); @@ -2298,13 +2296,11 @@ const struct file_operations ntfs_file_ops = { .llseek = generic_file_llseek, /* Seek inside file. */ - .read = generic_file_read, /* Read from file. */ + .read = do_sync_read, /* Read from file. */ .aio_read = generic_file_aio_read, /* Async read from file. */ - .readv = generic_file_readv, /* Read from file. */ #ifdef NTFS_RW .write = ntfs_file_write, /* Write to file. */ .aio_write = ntfs_file_aio_write, /* Async write to file. */ - .writev = ntfs_file_writev, /* Write to file. */ /*.release = ,*/ /* Last file is closed. See fs/ext2/file.c:: ext2_release_file() for diff -urN oldtree/fs/ocfs2/file.c newtree/fs/ocfs2/file.c --- oldtree/fs/ocfs2/file.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/ocfs2/file.c 2006-09-30 04:16:37.000000000 -0400 @@ -961,25 +961,23 @@ } static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, - const char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0; u32 clusters; struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; loff_t newsize, saved_pos; - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, - (unsigned int)count, + mlog_entry("(0x%p, %u, '%.*s')\n", filp, + (unsigned int)nr_segs, filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); /* happy write of zero bytes */ - if (count == 0) + if (iocb->ki_left == 0) return 0; if (!inode) { @@ -1048,7 +1046,7 @@ } else { saved_pos = iocb->ki_pos; } - newsize = count + saved_pos; + newsize = iocb->ki_left + saved_pos; mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", (long long) saved_pos, (long long) newsize, @@ -1081,7 +1079,7 @@ if (!clusters) break; - ret = ocfs2_extend_file(inode, NULL, newsize, count); + ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); @@ -1098,7 +1096,7 @@ /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb); - ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); + ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); @@ -1132,16 +1130,16 @@ } static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { int ret = 0, rw_level = -1, have_alloc_sem = 0; struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, - (unsigned int)count, + mlog_entry("(0x%p, %u, '%.*s')\n", filp, + (unsigned int)nr_segs, filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); @@ -1185,7 +1183,7 @@ } ocfs2_meta_unlock(inode, 0); - ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos); + ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); if (ret == -EINVAL) mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); diff -urN oldtree/fs/pipe.c newtree/fs/pipe.c --- oldtree/fs/pipe.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/pipe.c 2006-09-30 04:17:01.000000000 -0400 @@ -218,9 +218,10 @@ }; static ssize_t -pipe_readv(struct file *filp, const struct iovec *_iov, - unsigned long nr_segs, loff_t *ppos) +pipe_read(struct kiocb *iocb, const struct iovec *_iov, + unsigned long nr_segs, loff_t pos) { + struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; struct pipe_inode_info *pipe; int do_wakeup; @@ -330,17 +331,10 @@ } static ssize_t -pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) -{ - struct iovec iov = { .iov_base = buf, .iov_len = count }; - - return pipe_readv(filp, &iov, 1, ppos); -} - -static ssize_t -pipe_writev(struct file *filp, const struct iovec *_iov, - unsigned long nr_segs, loff_t *ppos) +pipe_write(struct kiocb *iocb, const struct iovec *_iov, + unsigned long nr_segs, loff_t ppos) { + struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; struct pipe_inode_info *pipe; ssize_t ret; @@ -510,15 +504,6 @@ } static ssize_t -pipe_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return pipe_writev(filp, &iov, 1, ppos); -} - -static ssize_t bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { return -EBADF; @@ -736,8 +721,8 @@ */ const struct file_operations read_fifo_fops = { .llseek = no_llseek, - .read = pipe_read, - .readv = pipe_readv, + .read = do_sync_read, + .aio_read = pipe_read, .write = bad_pipe_w, .poll = pipe_poll, .ioctl = pipe_ioctl, @@ -749,8 +734,8 @@ const struct file_operations write_fifo_fops = { .llseek = no_llseek, .read = bad_pipe_r, - .write = pipe_write, - .writev = pipe_writev, + .write = do_sync_write, + .aio_write = pipe_write, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_write_open, @@ -760,10 +745,10 @@ const struct file_operations rdwr_fifo_fops = { .llseek = no_llseek, - .read = pipe_read, - .readv = pipe_readv, - .write = pipe_write, - .writev = pipe_writev, + .read = do_sync_read, + .aio_read = pipe_read, + .write = do_sync_write, + .aio_write = pipe_write, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_rdwr_open, @@ -773,8 +758,8 @@ static struct file_operations read_pipe_fops = { .llseek = no_llseek, - .read = pipe_read, - .readv = pipe_readv, + .read = do_sync_read, + .aio_read = pipe_read, .write = bad_pipe_w, .poll = pipe_poll, .ioctl = pipe_ioctl, @@ -786,8 +771,8 @@ static struct file_operations write_pipe_fops = { .llseek = no_llseek, .read = bad_pipe_r, - .write = pipe_write, - .writev = pipe_writev, + .write = do_sync_write, + .aio_write = pipe_write, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_write_open, @@ -797,10 +782,10 @@ static struct file_operations rdwr_pipe_fops = { .llseek = no_llseek, - .read = pipe_read, - .readv = pipe_readv, - .write = pipe_write, - .writev = pipe_writev, + .read = do_sync_read, + .aio_read = pipe_read, + .write = do_sync_write, + .aio_write = pipe_write, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_rdwr_open, diff -urN oldtree/fs/qnx4/file.c newtree/fs/qnx4/file.c --- oldtree/fs/qnx4/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/qnx4/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -22,11 +22,13 @@ const struct file_operations qnx4_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, #ifdef CONFIG_QNX4FS_RW - .write = generic_file_write, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .fsync = qnx4_sync_file, #endif }; diff -urN oldtree/fs/ramfs/file-mmu.c newtree/fs/ramfs/file-mmu.c --- oldtree/fs/ramfs/file-mmu.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/ramfs/file-mmu.c 2006-09-30 04:17:37.000000000 -0400 @@ -33,8 +33,10 @@ }; const struct file_operations ramfs_file_operations = { - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = simple_sync_file, .sendfile = generic_file_sendfile, diff -urN oldtree/fs/ramfs/file-nommu.c newtree/fs/ramfs/file-nommu.c --- oldtree/fs/ramfs/file-nommu.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/ramfs/file-nommu.c 2006-09-30 04:17:37.000000000 -0400 @@ -36,8 +36,10 @@ const struct file_operations ramfs_file_operations = { .mmap = ramfs_nommu_mmap, .get_unmapped_area = ramfs_nommu_get_unmapped_area, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .fsync = simple_sync_file, .sendfile = generic_file_sendfile, .llseek = generic_file_llseek, diff -urN oldtree/fs/read_write.c newtree/fs/read_write.c --- oldtree/fs/read_write.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/read_write.c 2006-09-30 04:18:01.000000000 -0400 @@ -15,13 +15,15 @@ #include #include #include +#include "read_write.h" #include #include const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .mmap = generic_file_readonly_mmap, .sendfile = generic_file_sendfile, }; @@ -227,14 +229,20 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { + struct iovec iov = { .iov_base = buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - while (-EIOCBRETRY == - (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) + kiocb.ki_left = len; + + for (;;) { + ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; wait_on_retry_sync_kiocb(&kiocb); + } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); @@ -279,14 +287,20 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - while (-EIOCBRETRY == - (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) + kiocb.ki_left = len; + + for (;;) { + ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; wait_on_retry_sync_kiocb(&kiocb); + } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); @@ -438,78 +452,155 @@ EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */ +ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) +{ + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_left = len; + kiocb.ki_nbytes = len; + + for (;;) { + ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; + wait_on_retry_sync_kiocb(&kiocb); + } + + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + +/* Do it by hand, with file-ops */ +ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, + unsigned long nr_segs, loff_t *ppos, io_fn_t fn) +{ + struct iovec *vector = iov; + ssize_t ret = 0; + + while (nr_segs > 0) { + void __user *base; + size_t len; + ssize_t nr; + + base = vector->iov_base; + len = vector->iov_len; + vector++; + nr_segs--; + + nr = fn(filp, base, len, ppos); + + if (nr < 0) { + if (!ret) + ret = nr; + break; + } + ret += nr; + if (nr != len) + break; + } + + return ret; +} + /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) +ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, + struct iovec **ret_pointer) + { + unsigned long seg; + ssize_t ret; + struct iovec *iov = fast_pointer; + + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ + if (nr_segs == 0) { + ret = 0; + goto out; + } + + /* + * First get the "struct iovec" from user memory and + * verify all the pointers + */ + if (nr_segs > UIO_MAXIOV) { + ret = -EINVAL; + goto out; + } + if (nr_segs > fast_segs) { + iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); + if (iov == NULL) { + ret = -ENOMEM; + goto out; + } + } + if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { + ret = -EFAULT; + goto out; + } + + /* + * According to the Single Unix Specification we should return EINVAL + * if an element length is < 0 when cast to ssize_t or if the + * total length would overflow the ssize_t return value of the + * system call. + */ + ret = 0; + for (seg = 0; seg < nr_segs; seg++) { + void __user *buf = iov[seg].iov_base; + ssize_t len = (ssize_t)iov[seg].iov_len; + + /* see if we we're about to use an invalid len or if + * it's about to overflow ssize_t */ + if (len < 0 || (ret + len < ret)) { + ret = -EINVAL; + goto out; + } + if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { + ret = -EFAULT; + goto out; + } + + ret += len; + } +out: + *ret_pointer = iov; + return ret; +} + static ssize_t do_readv_writev(int type, struct file *file, const struct iovec __user * uvector, unsigned long nr_segs, loff_t *pos) { - typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); - typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); - size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *vector; + struct iovec *iov = iovstack; ssize_t ret; - int seg; io_fn_t fn; iov_fn_t fnv; - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - ret = 0; - if (nr_segs == 0) - goto out; - - /* - * First get the "struct iovec" from user memory and - * verify all the pointers - */ - ret = -EINVAL; - if (nr_segs > UIO_MAXIOV) - goto out; - if (!file->f_op) + if (!file->f_op) { + ret = -EINVAL; goto out; - if (nr_segs > UIO_FASTIOV) { - ret = -ENOMEM; - iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - goto out; } - ret = -EFAULT; - if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) - goto out; - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. The total length is fitting an ssize_t - * - * Be careful here because iov_len is a size_t not an ssize_t - */ - tot_len = 0; - ret = -EINVAL; - for (seg = 0; seg < nr_segs; seg++) { - void __user *buf = iov[seg].iov_base; - ssize_t len = (ssize_t)iov[seg].iov_len; - - if (len < 0) /* size_t not fitting an ssize_t .. */ - goto out; - if (unlikely(!access_ok(vrfy_dir(type), buf, len))) - goto Efault; - tot_len += len; - if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ - goto out; - } - if (tot_len == 0) { - ret = 0; + ret = rw_copy_check_uvector(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), iovstack, &iov); + if (ret <= 0) goto out; - } + tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; @@ -520,39 +611,18 @@ fnv = NULL; if (type == READ) { fn = file->f_op->read; - fnv = file->f_op->readv; + fnv = file->f_op->aio_read; } else { fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->writev; + fnv = file->f_op->aio_write; } - if (fnv) { - ret = fnv(file, iov, nr_segs, pos); - goto out; - } - - /* Do it by hand, with file-ops */ - ret = 0; - vector = iov; - while (nr_segs > 0) { - void __user * base; - size_t len; - ssize_t nr; - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - nr = fn(file, base, len, pos); + if (fnv) + ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, + pos, fnv); + else + ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); - if (nr < 0) { - if (!ret) ret = nr; - break; - } - ret += nr; - if (nr != len) - break; - } out: if (iov != iovstack) kfree(iov); @@ -563,9 +633,6 @@ fsnotify_modify(file->f_dentry); } return ret; -Efault: - ret = -EFAULT; - goto out; } ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, @@ -573,7 +640,7 @@ { if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) + if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) return -EINVAL; return do_readv_writev(READ, file, vec, vlen, pos); @@ -586,7 +653,7 @@ { if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) + if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) return -EINVAL; return do_readv_writev(WRITE, file, vec, vlen, pos); diff -urN oldtree/fs/read_write.h newtree/fs/read_write.h --- oldtree/fs/read_write.h 1969-12-31 19:00:00.000000000 -0500 +++ newtree/fs/read_write.h 2006-09-30 04:17:01.000000000 -0400 @@ -0,0 +1,14 @@ +/* + * This file is only for sharing some helpers from read_write.c with compat.c. + * Don't use anywhere else. + */ + + +typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); +typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, + unsigned long, loff_t); + +ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); +ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, + unsigned long nr_segs, loff_t *ppos, io_fn_t fn); diff -urN oldtree/fs/reiser4/plugin/file/symlink.c newtree/fs/reiser4/plugin/file/symlink.c --- oldtree/fs/reiser4/plugin/file/symlink.c 2006-09-29 15:29:53.000000000 -0400 +++ newtree/fs/reiser4/plugin/file/symlink.c 2006-09-30 04:41:54.000000000 -0400 @@ -42,7 +42,7 @@ */ reiser4_inode_data(symlink)->extmask |= (1 << SYMLINK_STAT); - assert("vs-838", symlink->u.generic_ip == NULL); + assert("vs-838", symlink->i_private == NULL); symlink->i_private = (void *)data->name; assert("vs-843", symlink->i_size == 0); @@ -51,15 +51,15 @@ /* insert stat data appended with data->name */ result = inode_file_plugin(symlink)->write_sd_by_inode(symlink); if (result) { - /* FIXME-VS: Make sure that symlink->u.generic_ip is not attached + /* FIXME-VS: Make sure that symlink->i_private is not attached to kmalloced data */ INODE_SET_FIELD(symlink, i_size, 0); } else { - assert("vs-849", symlink->u.generic_ip - && reiser4_inode_get_flag(symlink, - REISER4_GENERIC_PTR_USED)); + assert("vs-849", symlink->i_private + && reiser4_inode_get_flag(symlink, + REISER4_GENERIC_PTR_USED)); assert("vs-850", - !memcmp((char *)symlink->u.generic_ip, data->name, + !memcmp((char *)symlink->i_private, data->name, (size_t) symlink->i_size + 1)); } return result; @@ -74,8 +74,8 @@ inode_file_plugin(inode) == file_plugin_by_id(SYMLINK_FILE_PLUGIN_ID)); assert("edward-800", !is_bad_inode(inode) && is_inode_loaded(inode)); - assert("edward-801", reiser4_inode_get_flag(inode, - REISER4_GENERIC_PTR_USED)); + assert("edward-801", reiser4_inode_get_flag(inode, + REISER4_GENERIC_PTR_USED)); assert("vs-839", S_ISLNK(inode->i_mode)); kfree(inode->i_private); diff -urN oldtree/fs/reiserfs/bitmap.c newtree/fs/reiserfs/bitmap.c --- oldtree/fs/reiserfs/bitmap.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/reiserfs/bitmap.c 2006-09-30 04:16:27.000000000 -0400 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -50,16 +51,15 @@ { /* It is in the bitmap block number equal to the block * number divided by the number of bits in a block. */ - *bmap_nr = block / (s->s_blocksize << 3); + *bmap_nr = block >> (s->s_blocksize_bits + 3); /* Within that bitmap block it is located at bit offset *offset. */ *offset = block & ((s->s_blocksize << 3) - 1); - return; } #ifdef CONFIG_REISERFS_CHECK int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) { - int i, j; + int bmap, offset; if (block == 0 || block >= SB_BLOCK_COUNT(s)) { reiserfs_warning(s, @@ -68,36 +68,32 @@ return 0; } - /* it can't be one of the bitmap blocks */ - for (i = 0; i < SB_BMAP_NR(s); i++) - if (block == SB_AP_BITMAP(s)[i].bh->b_blocknr) { + get_bit_address(s, block, &bmap, &offset); + + /* Old format filesystem? Unlikely, but the bitmaps are all up front so + * we need to account for it. */ + if (unlikely(test_bit(REISERFS_OLD_FORMAT, + &(REISERFS_SB(s)->s_properties)))) { + b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; + if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) { + reiserfs_warning(s, "vs: 4019: is_reusable: " + "bitmap block %lu(%u) can't be freed or reused", + block, SB_BMAP_NR(s)); + return 0; + } + } else { + if (offset == 0) { reiserfs_warning(s, "vs: 4020: is_reusable: " "bitmap block %lu(%u) can't be freed or reused", block, SB_BMAP_NR(s)); return 0; } - - get_bit_address(s, block, &i, &j); - - if (i >= SB_BMAP_NR(s)) { - reiserfs_warning(s, - "vs-4030: is_reusable: there is no so many bitmap blocks: " - "block=%lu, bitmap_nr=%d", block, i); - return 0; } - if ((bit_value == 0 && - reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) || - (bit_value == 1 && - reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data) == 0)) { + if (bmap >= SB_BMAP_NR(s)) { reiserfs_warning(s, - "vs-4040: is_reusable: corresponding bit of block %lu does not " - "match required value (i==%d, j==%d) test_bit==%d", - block, i, j, reiserfs_test_le_bit(j, - SB_AP_BITMAP - (s)[i].bh-> - b_data)); - + "vs-4030: is_reusable: there is no so many bitmap blocks: " + "block=%lu, bitmap_nr=%d", block, bmap); return 0; } @@ -141,6 +137,7 @@ { struct super_block *s = th->t_super; struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n]; + struct buffer_head *bh; int end, next; int org = *beg; @@ -159,22 +156,25 @@ bmap_n); return 0; } - if (buffer_locked(bi->bh)) { - PROC_INFO_INC(s, scan_bitmap.wait); - __wait_on_buffer(bi->bh); - } + + bh = reiserfs_read_bitmap_block(s, bmap_n); + if (bh == NULL) + return 0; while (1) { cont: - if (bi->free_count < min) + if (bi->free_count < min) { + brelse(bh); return 0; // No free blocks in this bitmap + } /* search for a first zero bit -- beggining of a window */ *beg = reiserfs_find_next_zero_le_bit - ((unsigned long *)(bi->bh->b_data), boundary, *beg); + ((unsigned long *)(bh->b_data), boundary, *beg); if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block * cannot contain a zero window of minimum size */ + brelse(bh); return 0; } @@ -183,7 +183,7 @@ /* first zero bit found; we check next bits */ for (end = *beg + 1;; end++) { if (end >= *beg + max || end >= boundary - || reiserfs_test_le_bit(end, bi->bh->b_data)) { + || reiserfs_test_le_bit(end, bh->b_data)) { next = end; break; } @@ -197,12 +197,12 @@ * (end) points to one bit after the window end */ if (end - *beg >= min) { /* it seems we have found window of proper size */ int i; - reiserfs_prepare_for_journal(s, bi->bh, 1); + reiserfs_prepare_for_journal(s, bh, 1); /* try to set all blocks used checking are they still free */ for (i = *beg; i < end; i++) { /* It seems that we should not check in journal again. */ if (reiserfs_test_and_set_le_bit - (i, bi->bh->b_data)) { + (i, bh->b_data)) { /* bit was set by another process * while we slept in prepare_for_journal() */ PROC_INFO_INC(s, scan_bitmap.stolen); @@ -214,17 +214,16 @@ /* otherwise we clear all bit were set ... */ while (--i >= *beg) reiserfs_test_and_clear_le_bit - (i, bi->bh->b_data); - reiserfs_restore_prepared_buffer(s, - bi-> - bh); + (i, bh->b_data); + reiserfs_restore_prepared_buffer(s, bh); *beg = org; /* ... and search again in current block from beginning */ goto cont; } } bi->free_count -= (end - *beg); - journal_mark_dirty(th, s, bi->bh); + journal_mark_dirty(th, s, bh); + brelse(bh); /* free block count calculation */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), @@ -266,9 +265,20 @@ */ static inline int block_group_used(struct super_block *s, u32 id) { - int bm; - bm = bmap_hash_id(s, id); - if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100)) { + int bm = bmap_hash_id(s, id); + struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; + + /* If we don't have cached information on this bitmap block, we're + * going to have to load it later anyway. Loading it here allows us + * to make a better decision. This favors long-term performace gain + * with a better on-disk layout vs. a short term gain of skipping the + * read and potentially having a bad placement. */ + if (info->first_zero_hint == 0) { + struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); + brelse(bh); + } + + if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) { return 0; } return 1; @@ -373,7 +383,7 @@ { struct super_block *s = th->t_super; struct reiserfs_super_block *rs; - struct buffer_head *sbh; + struct buffer_head *sbh, *bmbh; struct reiserfs_bitmap_info *apbi; int nr, offset; @@ -394,16 +404,21 @@ return; } - reiserfs_prepare_for_journal(s, apbi[nr].bh, 1); + bmbh = reiserfs_read_bitmap_block(s, nr); + if (!bmbh) + return; + + reiserfs_prepare_for_journal(s, bmbh, 1); /* clear bit for the given block in bit map */ - if (!reiserfs_test_and_clear_le_bit(offset, apbi[nr].bh->b_data)) { + if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) { reiserfs_warning(s, "vs-4080: reiserfs_free_block: " "free_block (%s:%lu)[dev:blocknr]: bit already cleared", reiserfs_bdevname(s), block); } apbi[nr].free_count++; - journal_mark_dirty(th, s, apbi[nr].bh); + journal_mark_dirty(th, s, bmbh); + brelse(bmbh); reiserfs_prepare_for_journal(s, sbh, 1); /* update super block */ @@ -1019,7 +1034,6 @@ b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; int passno = 0; int nr_allocated = 0; - int bigalloc = 0; determine_prealloc_size(hint); if (!hint->formatted_node) { @@ -1046,28 +1060,9 @@ hint->preallocate = hint->prealloc_size = 0; } /* for unformatted nodes, force large allocations */ - bigalloc = amount_needed; } do { - /* in bigalloc mode, nr_allocated should stay zero until - * the entire allocation is filled - */ - if (unlikely(bigalloc && nr_allocated)) { - reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n", - bigalloc, nr_allocated); - /* reset things to a sane value */ - bigalloc = amount_needed - nr_allocated; - } - /* - * try pass 0 and pass 1 looking for a nice big - * contiguous allocation. Then reset and look - * for anything you can find. - */ - if (passno == 2 && bigalloc) { - passno = 0; - bigalloc = 0; - } switch (passno++) { case 0: /* Search from hint->search_start to end of disk */ start = hint->search_start; @@ -1105,8 +1100,7 @@ new_blocknrs + nr_allocated, start, finish, - bigalloc ? - bigalloc : 1, + 1, amount_needed - nr_allocated, hint-> @@ -1263,3 +1257,89 @@ return space > 0 ? space : 0; } + +void reiserfs_cache_bitmap_metadata(struct super_block *sb, + struct buffer_head *bh, + struct reiserfs_bitmap_info *info) +{ + unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); + + info->first_zero_hint = 1 << (sb->s_blocksize_bits + 3); + + while (--cur >= (unsigned long *)bh->b_data) { + int base = ((char *)cur - bh->b_data) << 3; + + /* 0 and ~0 are special, we can optimize for them */ + if (*cur == 0) { + info->first_zero_hint = base; + info->free_count += BITS_PER_LONG; + } else if (*cur != ~0L) { /* A mix, investigate */ + int b; + for (b = BITS_PER_LONG - 1; b >= 0; b--) { + if (!reiserfs_test_le_bit(b, cur)) { + info->first_zero_hint = base + b; + info->free_count++; + } + } + } + } + /* The first bit must ALWAYS be 1 */ + BUG_ON(info->first_zero_hint == 0); +} + +struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, + unsigned int bitmap) +{ + b_blocknr_t block = (sb->s_blocksize << 3) * bitmap; + struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; + struct buffer_head *bh; + + /* Way old format filesystems had the bitmaps packed up front. + * I doubt there are any of these left, but just in case... */ + if (unlikely(test_bit(REISERFS_OLD_FORMAT, + &(REISERFS_SB(sb)->s_properties)))) + block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; + else if (bitmap == 0) + block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; + + bh = sb_bread(sb, block); + if (bh == NULL) + reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%lu) " + "reading failed", __FUNCTION__, bh->b_blocknr); + else { + if (buffer_locked(bh)) { + PROC_INFO_INC(sb, scan_bitmap.wait); + __wait_on_buffer(bh); + } + BUG_ON(!buffer_uptodate(bh)); + BUG_ON(atomic_read(&bh->b_count) == 0); + + if (info->first_zero_hint == 0) + reiserfs_cache_bitmap_metadata(sb, bh, info); + } + + return bh; +} + +int reiserfs_init_bitmap_cache(struct super_block *sb) +{ + struct reiserfs_bitmap_info *bitmap; + + bitmap = vmalloc(sizeof (*bitmap) * SB_BMAP_NR(sb)); + if (bitmap == NULL) + return -ENOMEM; + + memset(bitmap, 0, sizeof (*bitmap) * SB_BMAP_NR(sb)); + + SB_AP_BITMAP(sb) = bitmap; + + return 0; +} + +void reiserfs_free_bitmap_cache(struct super_block *sb) +{ + if (SB_AP_BITMAP(sb)) { + vfree(SB_AP_BITMAP(sb)); + SB_AP_BITMAP(sb) = NULL; + } +} diff -urN oldtree/fs/reiserfs/file.c newtree/fs/reiserfs/file.c --- oldtree/fs/reiserfs/file.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/reiserfs/file.c 2006-09-30 04:16:37.000000000 -0400 @@ -1333,7 +1333,7 @@ if (err) return err; } - result = generic_file_write(file, buf, count, ppos); + result = do_sync_write(file, buf, count, ppos); if (after_file_end) { /* Now update i_size and remove the savelink */ struct reiserfs_transaction_handle th; @@ -1565,10 +1565,11 @@ } const struct file_operations reiserfs_file_operations = { - .read = generic_file_read, + .read = do_sync_read, .write = reiserfs_file_write, .ioctl = reiserfs_ioctl, .mmap = generic_file_mmap, + .open = generic_file_open, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, .sendfile = generic_file_sendfile, diff -urN oldtree/fs/reiserfs/resize.c newtree/fs/reiserfs/resize.c --- oldtree/fs/reiserfs/resize.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/reiserfs/resize.c 2006-09-30 04:16:18.000000000 -0400 @@ -22,6 +22,7 @@ int err = 0; struct reiserfs_super_block *sb; struct reiserfs_bitmap_info *bitmap; + struct reiserfs_bitmap_info *info; struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s); struct buffer_head *bh; struct reiserfs_transaction_handle th; @@ -127,16 +128,20 @@ * transaction begins, and the new bitmaps don't matter if the * transaction fails. */ for (i = bmap_nr; i < bmap_nr_new; i++) { - bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8); - memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb)); - reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data); - - set_buffer_uptodate(bitmap[i].bh); - mark_buffer_dirty(bitmap[i].bh); - sync_dirty_buffer(bitmap[i].bh); + /* don't use read_bitmap_block since it will cache + * the uninitialized bitmap */ + bh = sb_bread(s, i * s->s_blocksize * 8); + memset(bh->b_data, 0, sb_blocksize(sb)); + reiserfs_test_and_set_le_bit(0, bh->b_data); + reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + sync_dirty_buffer(bh); // update bitmap_info stuff bitmap[i].first_zero_hint = 1; bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; + brelse(bh); } /* free old bitmap blocks array */ SB_AP_BITMAP(s) = bitmap; @@ -150,30 +155,46 @@ if (err) return err; - /* correct last bitmap blocks in old and new disk layout */ - reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1].bh, 1); - for (i = block_r; i < s->s_blocksize * 8; i++) - reiserfs_test_and_clear_le_bit(i, - SB_AP_BITMAP(s)[bmap_nr - - 1].bh->b_data); - SB_AP_BITMAP(s)[bmap_nr - 1].free_count += s->s_blocksize * 8 - block_r; - if (!SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint) - SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r; + /* Extend old last bitmap block - new blocks have been made available */ + info = SB_AP_BITMAP(s) + bmap_nr - 1; + bh = reiserfs_read_bitmap_block(s, bmap_nr - 1); + if (!bh) { + int jerr = journal_end(&th, s, 10); + if (jerr) + return jerr; + return -EIO; + } - journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1].bh); + reiserfs_prepare_for_journal(s, bh, 1); + for (i = block_r; i < s->s_blocksize * 8; i++) + reiserfs_test_and_clear_le_bit(i, bh->b_data); + info->free_count += s->s_blocksize * 8 - block_r; + if (!info->first_zero_hint) + info->first_zero_hint = block_r; + + journal_mark_dirty(&th, s, bh); + brelse(bh); + + /* Correct new last bitmap block - It may not be full */ + info = SB_AP_BITMAP(s) + bmap_nr_new - 1; + bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1); + if (!bh) { + int jerr = journal_end(&th, s, 10); + if (jerr) + return jerr; + return -EIO; + } - reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh, 1); + reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r_new; i < s->s_blocksize * 8; i++) - reiserfs_test_and_set_le_bit(i, - SB_AP_BITMAP(s)[bmap_nr_new - - 1].bh->b_data); - journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh); + reiserfs_test_and_set_le_bit(i, bh->b_data); + journal_mark_dirty(&th, s, bh); + brelse(bh); - SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -= - s->s_blocksize * 8 - block_r_new; + info->free_count -= s->s_blocksize * 8 - block_r_new; /* Extreme case where last bitmap is the only valid block in itself. */ - if (!SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count) - SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0; + if (!info->free_count) + info->first_zero_hint = 0; /* update super */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); free_blocks = SB_FREE_BLOCKS(s); diff -urN oldtree/fs/reiserfs/super.c newtree/fs/reiserfs/super.c --- oldtree/fs/reiserfs/super.c 2006-09-29 14:03:21.000000000 -0400 +++ newtree/fs/reiserfs/super.c 2006-09-30 04:21:55.000000000 -0400 @@ -430,21 +430,29 @@ return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); } -static void reiserfs_put_super(struct super_block *s) +static void reiserfs_kill_sb(struct super_block *s) { - int i; - struct reiserfs_transaction_handle th; - th.t_trans_id = 0; + if (REISERFS_SB(s)) { + if (REISERFS_SB(s)->xattr_root) { + d_invalidate(REISERFS_SB(s)->xattr_root); + dput(REISERFS_SB(s)->xattr_root); + REISERFS_SB(s)->xattr_root = NULL; + } - if (REISERFS_SB(s)->xattr_root) { - d_invalidate(REISERFS_SB(s)->xattr_root); - dput(REISERFS_SB(s)->xattr_root); + if (REISERFS_SB(s)->priv_root) { + d_invalidate(REISERFS_SB(s)->priv_root); + dput(REISERFS_SB(s)->priv_root); + REISERFS_SB(s)->priv_root = NULL; + } } - if (REISERFS_SB(s)->priv_root) { - d_invalidate(REISERFS_SB(s)->priv_root); - dput(REISERFS_SB(s)->priv_root); - } + kill_block_super(s); +} + +static void reiserfs_put_super(struct super_block *s) +{ + struct reiserfs_transaction_handle th; + th.t_trans_id = 0; /* change file system state to current state if it was mounted with read-write permissions */ if (!(s->s_flags & MS_RDONLY)) { @@ -462,10 +470,7 @@ */ journal_release(&th, s); - for (i = 0; i < SB_BMAP_NR(s); i++) - brelse(SB_AP_BITMAP(s)[i].bh); - - vfree(SB_AP_BITMAP(s)); + reiserfs_free_bitmap_cache(s); brelse(SB_BUFFER_WITH_SB(s)); @@ -1243,118 +1248,6 @@ return 0; } -/* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk. - * @sb - superblock for this filesystem - * @bi - the bitmap info to be loaded. Requires that bi->bh is valid. - * - * This routine counts how many free bits there are, finding the first zero - * as a side effect. Could also be implemented as a loop of test_bit() calls, or - * a loop of find_first_zero_bit() calls. This implementation is similar to - * find_first_zero_bit(), but doesn't return after it finds the first bit. - * Should only be called on fs mount, but should be fairly efficient anyways. - * - * bi->first_zero_hint is considered unset if it == 0, since the bitmap itself - * will * invariably occupt block 0 represented in the bitmap. The only - * exception to this is when free_count also == 0, since there will be no - * free blocks at all. - */ - -static void load_bitmap_info_data(struct super_block *sb, - struct reiserfs_bitmap_info *bi) -{ - unsigned long *cur = (unsigned long *)bi->bh->b_data; - - while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) { - - /* No need to scan if all 0's or all 1's. - * Since we're only counting 0's, we can simply ignore all 1's */ - if (*cur == 0) { - if (bi->first_zero_hint == 0) { - bi->first_zero_hint = - ((char *)cur - bi->bh->b_data) << 3; - } - bi->free_count += sizeof(unsigned long) * 8; - } else if (*cur != ~0L) { - int b; - for (b = 0; b < sizeof(unsigned long) * 8; b++) { - if (!reiserfs_test_le_bit(b, cur)) { - bi->free_count++; - if (bi->first_zero_hint == 0) - bi->first_zero_hint = - (((char *)cur - - bi->bh->b_data) << 3) + b; - } - } - } - cur++; - } - -#ifdef CONFIG_REISERFS_CHECK -// This outputs a lot of unneded info on big FSes -// reiserfs_warning ("bitmap loaded from block %d: %d free blocks", -// bi->bh->b_blocknr, bi->free_count); -#endif -} - -static int read_bitmaps(struct super_block *s) -{ - int i, bmap_nr; - - SB_AP_BITMAP(s) = - vmalloc(sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); - if (SB_AP_BITMAP(s) == 0) - return 1; - memset(SB_AP_BITMAP(s), 0, - sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); - for (i = 0, bmap_nr = - REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; - i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) { - SB_AP_BITMAP(s)[i].bh = sb_getblk(s, bmap_nr); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) - ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh); - } - for (i = 0; i < SB_BMAP_NR(s); i++) { - wait_on_buffer(SB_AP_BITMAP(s)[i].bh); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { - reiserfs_warning(s, "sh-2029: reiserfs read_bitmaps: " - "bitmap block (#%lu) reading failed", - SB_AP_BITMAP(s)[i].bh->b_blocknr); - for (i = 0; i < SB_BMAP_NR(s); i++) - brelse(SB_AP_BITMAP(s)[i].bh); - vfree(SB_AP_BITMAP(s)); - SB_AP_BITMAP(s) = NULL; - return 1; - } - load_bitmap_info_data(s, SB_AP_BITMAP(s) + i); - } - return 0; -} - -static int read_old_bitmaps(struct super_block *s) -{ - int i; - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ - - /* read true bitmap */ - SB_AP_BITMAP(s) = - vmalloc(sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); - if (SB_AP_BITMAP(s) == 0) - return 1; - - memset(SB_AP_BITMAP(s), 0, - sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); - - for (i = 0; i < sb_bmap_nr(rs); i++) { - SB_AP_BITMAP(s)[i].bh = sb_bread(s, bmp1 + i); - if (!SB_AP_BITMAP(s)[i].bh) - return 1; - load_bitmap_info_data(s, SB_AP_BITMAP(s) + i); - } - - return 0; -} - static int read_super_block(struct super_block *s, int offset) { struct buffer_head *bh; @@ -1456,7 +1349,6 @@ /* after journal replay, reread all bitmap and super blocks */ static int reread_meta_blocks(struct super_block *s) { - int i; ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); wait_on_buffer(SB_BUFFER_WITH_SB(s)); if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { @@ -1465,20 +1357,7 @@ return 1; } - for (i = 0; i < SB_BMAP_NR(s); i++) { - ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh)); - wait_on_buffer(SB_AP_BITMAP(s)[i].bh); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { - reiserfs_warning(s, - "reread_meta_blocks, error reading bitmap block number %d at %llu", - i, - (unsigned long long)SB_AP_BITMAP(s)[i]. - bh->b_blocknr); - return 1; - } - } return 0; - } ///////////////////////////////////////////////////// @@ -1659,7 +1538,6 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) { struct inode *root_inode; - int j; struct reiserfs_transaction_handle th; int old_format = 0; unsigned long blocks; @@ -1736,7 +1614,7 @@ sbi->s_mount_state = SB_REISERFS_STATE(s); sbi->s_mount_state = REISERFS_VALID_FS; - if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) { + if ((errval = reiserfs_init_bitmap_cache(s))) { SWARN(silent, s, "jmacd-8: reiserfs_fill_super: unable to read bitmap"); goto error; @@ -1818,6 +1696,8 @@ if (is_reiserfs_3_5(rs) || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) set_bit(REISERFS_3_5, &(sbi->s_properties)); + else if (old_format) + set_bit(REISERFS_OLD_FORMAT, &(sbi->s_properties)); else set_bit(REISERFS_3_6, &(sbi->s_properties)); @@ -1903,19 +1783,17 @@ if (jinit_done) { /* kill the commit thread, free journal ram */ journal_release_error(NULL, s); } - if (SB_DISK_SUPER_BLOCK(s)) { - for (j = 0; j < SB_BMAP_NR(s); j++) { - if (SB_AP_BITMAP(s)) - brelse(SB_AP_BITMAP(s)[j].bh); - } - vfree(SB_AP_BITMAP(s)); - } + + reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); #ifdef CONFIG_QUOTA - for (j = 0; j < MAXQUOTAS; j++) { - kfree(sbi->s_qf_names[j]); - sbi->s_qf_names[j] = NULL; + { + int j; + for (j = 0; j < MAXQUOTAS; j++) { + kfree(sbi->s_qf_names[j]); + sbi->s_qf_names[j] = NULL; + } } #endif kfree(sbi); @@ -2287,7 +2165,7 @@ .owner = THIS_MODULE, .name = "reiserfs", .get_sb = get_super_block, - .kill_sb = kill_block_super, + .kill_sb = reiserfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; diff -urN oldtree/fs/smbfs/file.c newtree/fs/smbfs/file.c --- oldtree/fs/smbfs/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/smbfs/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -214,13 +214,15 @@ } static ssize_t -smb_file_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) +smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { + struct file * file = iocb->ki_filp; struct dentry * dentry = file->f_dentry; ssize_t status; VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry), - (unsigned long) count, (unsigned long) *ppos); + (unsigned long) iocb->ki_left, (unsigned long) pos); status = smb_revalidate_inode(dentry); if (status) { @@ -233,7 +235,7 @@ (long)dentry->d_inode->i_size, dentry->d_inode->i_flags, dentry->d_inode->i_atime); - status = generic_file_read(file, buf, count, ppos); + status = generic_file_aio_read(iocb, iov, nr_segs, pos); out: return status; } @@ -317,14 +319,16 @@ * Write to a file (through the page cache). */ static ssize_t -smb_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { + struct file * file = iocb->ki_filp; struct dentry * dentry = file->f_dentry; ssize_t result; VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry), - (unsigned long) count, (unsigned long) *ppos); + (unsigned long) iocb->ki_left, (unsigned long) pos); result = smb_revalidate_inode(dentry); if (result) { @@ -337,8 +341,8 @@ if (result) goto out; - if (count > 0) { - result = generic_file_write(file, buf, count, ppos); + if (iocb->ki_left > 0) { + result = generic_file_aio_write(iocb, iov, nr_segs, pos); VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n", (long) file->f_pos, (long) dentry->d_inode->i_size, dentry->d_inode->i_mtime, dentry->d_inode->i_atime); @@ -402,8 +406,10 @@ const struct file_operations smb_file_operations = { .llseek = remote_llseek, - .read = smb_file_read, - .write = smb_file_write, + .read = do_sync_read, + .aio_read = smb_file_aio_read, + .write = do_sync_write, + .aio_write = smb_file_aio_write, .ioctl = smb_ioctl, .mmap = smb_file_mmap, .open = smb_file_open, diff -urN oldtree/fs/sysv/file.c newtree/fs/sysv/file.c --- oldtree/fs/sysv/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/sysv/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -21,8 +21,10 @@ */ const struct file_operations sysv_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .fsync = sysv_sync_file, .sendfile = generic_file_sendfile, diff -urN oldtree/fs/udf/file.c newtree/fs/udf/file.c --- oldtree/fs/udf/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/udf/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -103,19 +103,21 @@ .commit_write = udf_adinicb_commit_write, }; -static ssize_t udf_file_write(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t ppos) { ssize_t retval; + struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode; int err, pos; + size_t count = iocb->ki_left; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { if (file->f_flags & O_APPEND) pos = inode->i_size; else - pos = *ppos; + pos = ppos; if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) + pos + count)) @@ -136,7 +138,7 @@ } } - retval = generic_file_write(file, buf, count, ppos); + retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); if (retval > 0) mark_inode_dirty(inode); @@ -249,11 +251,13 @@ } const struct file_operations udf_file_operations = { - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .ioctl = udf_ioctl, .open = generic_file_open, .mmap = generic_file_mmap, - .write = udf_file_write, + .write = do_sync_write, + .aio_write = udf_file_aio_write, .release = udf_release_file, .fsync = udf_fsync_file, .sendfile = generic_file_sendfile, diff -urN oldtree/fs/ufs/file.c newtree/fs/ufs/file.c --- oldtree/fs/ufs/file.c 2006-09-29 13:50:42.000000000 -0400 +++ newtree/fs/ufs/file.c 2006-09-30 04:17:37.000000000 -0400 @@ -53,8 +53,10 @@ const struct file_operations ufs_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .open = generic_file_open, .fsync = ufs_sync_file, diff -urN oldtree/fs/xfs/linux-2.6/xfs_file.c newtree/fs/xfs/linux-2.6/xfs_file.c --- oldtree/fs/xfs/linux-2.6/xfs_file.c 2006-09-29 14:03:22.000000000 -0400 +++ newtree/fs/xfs/linux-2.6/xfs_file.c 2006-09-30 04:17:01.000000000 -0400 @@ -49,50 +49,49 @@ STATIC inline ssize_t __xfs_file_read( struct kiocb *iocb, - char __user *buf, + const struct iovec *iov, + unsigned long nr_segs, int ioflags, - size_t count, loff_t pos) { - struct iovec iov = {buf, count}; struct file *file = iocb->ki_filp; bhv_vnode_t *vp = vn_from_inode(file->f_dentry->d_inode); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - return bhv_vop_read(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL); + return bhv_vop_read(vp, iocb, iov, nr_segs, &iocb->ki_pos, + ioflags, NULL); } STATIC ssize_t xfs_file_aio_read( struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos); + return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos); } STATIC ssize_t xfs_file_aio_read_invis( struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); + return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); } STATIC inline ssize_t __xfs_file_write( - struct kiocb *iocb, - const char __user *buf, - int ioflags, - size_t count, - loff_t pos) + struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, + int ioflags, + loff_t pos) { - struct iovec iov = {(void __user *)buf, count}; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; bhv_vnode_t *vp = vn_from_inode(inode); @@ -100,117 +99,28 @@ BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - return bhv_vop_write(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL); + return bhv_vop_write(vp, iocb, iov, nr_segs, &iocb->ki_pos, + ioflags, NULL); } STATIC ssize_t xfs_file_aio_write( struct kiocb *iocb, - const char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos); + return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos); } STATIC ssize_t xfs_file_aio_write_invis( struct kiocb *iocb, - const char __user *buf, - size_t count, - loff_t pos) -{ - return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); -} - -STATIC inline ssize_t -__xfs_file_readv( - struct file *file, - const struct iovec *iov, - int ioflags, - unsigned long nr_segs, - loff_t *ppos) -{ - struct inode *inode = file->f_mapping->host; - bhv_vnode_t *vp = vn_from_inode(inode); - struct kiocb kiocb; - ssize_t rval; - - init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *ppos; - - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; - rval = bhv_vop_read(vp, &kiocb, iov, nr_segs, - &kiocb.ki_pos, ioflags, NULL); - - *ppos = kiocb.ki_pos; - return rval; -} - -STATIC ssize_t -xfs_file_readv( - struct file *file, - const struct iovec *iov, - unsigned long nr_segs, - loff_t *ppos) -{ - return __xfs_file_readv(file, iov, 0, nr_segs, ppos); -} - -STATIC ssize_t -xfs_file_readv_invis( - struct file *file, - const struct iovec *iov, - unsigned long nr_segs, - loff_t *ppos) -{ - return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos); -} - -STATIC inline ssize_t -__xfs_file_writev( - struct file *file, - const struct iovec *iov, - int ioflags, - unsigned long nr_segs, - loff_t *ppos) -{ - struct inode *inode = file->f_mapping->host; - bhv_vnode_t *vp = vn_from_inode(inode); - struct kiocb kiocb; - ssize_t rval; - - init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *ppos; - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; - - rval = bhv_vop_write(vp, &kiocb, iov, nr_segs, - &kiocb.ki_pos, ioflags, NULL); - - *ppos = kiocb.ki_pos; - return rval; -} - -STATIC ssize_t -xfs_file_writev( - struct file *file, - const struct iovec *iov, + const struct iovec *iov, unsigned long nr_segs, - loff_t *ppos) -{ - return __xfs_file_writev(file, iov, 0, nr_segs, ppos); -} - -STATIC ssize_t -xfs_file_writev_invis( - struct file *file, - const struct iovec *iov, - unsigned long nr_segs, - loff_t *ppos) + loff_t pos) { - return __xfs_file_writev(file, iov, IO_INVIS, nr_segs, ppos); + return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); } STATIC ssize_t @@ -540,8 +450,6 @@ .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, - .readv = xfs_file_readv, - .writev = xfs_file_writev, .aio_read = xfs_file_aio_read, .aio_write = xfs_file_aio_write, .sendfile = xfs_file_sendfile, @@ -565,8 +473,6 @@ .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, - .readv = xfs_file_readv_invis, - .writev = xfs_file_writev_invis, .aio_read = xfs_file_aio_read_invis, .aio_write = xfs_file_aio_write_invis, .sendfile = xfs_file_sendfile_invis, diff -urN oldtree/fs/xfs/linux-2.6/xfs_lrw.c newtree/fs/xfs/linux-2.6/xfs_lrw.c --- oldtree/fs/xfs/linux-2.6/xfs_lrw.c 2006-09-29 14:03:22.000000000 -0400 +++ newtree/fs/xfs/linux-2.6/xfs_lrw.c 2006-09-30 04:17:37.000000000 -0400 @@ -279,7 +279,9 @@ xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, (void *)iovp, segs, *offset, ioflags); - ret = __generic_file_aio_read(iocb, iovp, segs, offset); + + iocb->ki_pos = *offset; + ret = generic_file_aio_read(iocb, iovp, segs, *offset); if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if (ret > 0) diff -urN oldtree/include/linux/acct.h newtree/include/linux/acct.h --- oldtree/include/linux/acct.h 2006-09-29 13:50:42.000000000 -0400 +++ newtree/include/linux/acct.h 2006-09-30 04:19:34.000000000 -0400 @@ -124,16 +124,12 @@ extern void acct_init_pacct(struct pacct_struct *pacct); extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); -extern void acct_update_integrals(struct task_struct *tsk); -extern void acct_clear_integrals(struct task_struct *tsk); #else #define acct_auto_close_mnt(x) do { } while (0) #define acct_auto_close(x) do { } while (0) #define acct_init_pacct(x) do { } while (0) #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) -#define acct_update_integrals(x) do { } while (0) -#define acct_clear_integrals(task) do { } while (0) #endif /* diff -urN oldtree/include/linux/aio.h newtree/include/linux/aio.h --- oldtree/include/linux/aio.h 2006-09-29 13:50:42.000000000 -0400 +++ newtree/include/linux/aio.h 2006-09-30 04:18:09.000000000 -0400 @@ -4,8 +4,10 @@ #include #include #include +#include #include +#include #define AIO_MAXSEGS 4 #define AIO_KIOGRP_NR_ATOMIC 8 @@ -110,8 +112,10 @@ char __user *ki_buf; /* remaining iocb->aio_buf */ size_t ki_left; /* remaining bytes */ long ki_retried; /* just for testing */ - long ki_kicked; /* just for testing */ - long ki_queued; /* just for testing */ + struct iovec ki_inline_vec; /* inline vector */ + struct iovec *ki_iovec; + unsigned long ki_nr_segs; + unsigned long ki_cur_seg; struct list_head ki_list; /* the aio core uses this * for cancellation */ diff -urN oldtree/include/linux/aio_abi.h newtree/include/linux/aio_abi.h --- oldtree/include/linux/aio_abi.h 2006-09-29 13:50:42.000000000 -0400 +++ newtree/include/linux/aio_abi.h 2006-09-30 04:17:58.000000000 -0400 @@ -41,6 +41,8 @@ * IOCB_CMD_POLL = 5, */ IOCB_CMD_NOOP = 6, + IOCB_CMD_PREADV = 7, + IOCB_CMD_PWRITEV = 8, }; /* read() from /dev/aio returns these structures. */ diff -urN oldtree/include/linux/fs.h newtree/include/linux/fs.h --- oldtree/include/linux/fs.h 2006-09-29 15:29:53.000000000 -0400 +++ newtree/include/linux/fs.h 2006-09-30 04:18:01.000000000 -0400 @@ -1082,9 +1082,9 @@ struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); @@ -1098,8 +1098,6 @@ int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); - ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); - ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); @@ -1137,6 +1135,11 @@ struct seq_file; +ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, + struct iovec **ret_pointer); + extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, @@ -1671,22 +1674,17 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); -extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); -extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *); -extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t); -extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *); -extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t); +extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); +extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, - unsigned long, loff_t *); + unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, loff_t *, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); -ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, @@ -1704,10 +1702,6 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); -extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); -ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); diff -urN oldtree/include/linux/nfs_fs.h newtree/include/linux/nfs_fs.h --- oldtree/include/linux/nfs_fs.h 2006-09-29 14:03:22.000000000 -0400 +++ newtree/include/linux/nfs_fs.h 2006-09-30 04:16:37.000000000 -0400 @@ -367,10 +367,12 @@ */ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, unsigned long); -extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, - size_t count, loff_t pos); -extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos); +extern ssize_t nfs_file_direct_read(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, + loff_t pos); +extern ssize_t nfs_file_direct_write(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, + loff_t pos); /* * linux/fs/nfs/dir.c diff -urN oldtree/include/linux/reiserfs_fs.h newtree/include/linux/reiserfs_fs.h --- oldtree/include/linux/reiserfs_fs.h 2006-09-29 14:03:22.000000000 -0400 +++ newtree/include/linux/reiserfs_fs.h 2006-09-30 04:16:14.000000000 -0400 @@ -2075,6 +2075,10 @@ */ __le32 reiserfs_choose_packing(struct inode *dir); +int reiserfs_init_bitmap_cache(struct super_block *sb); +void reiserfs_free_bitmap_cache(struct super_block *sb); +void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); +struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap); int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *, b_blocknr_t, int for_unformatted); diff -urN oldtree/include/linux/reiserfs_fs_sb.h newtree/include/linux/reiserfs_fs_sb.h --- oldtree/include/linux/reiserfs_fs_sb.h 2006-09-29 13:50:42.000000000 -0400 +++ newtree/include/linux/reiserfs_fs_sb.h 2006-09-30 04:16:18.000000000 -0400 @@ -267,7 +267,6 @@ // FIXME: Won't work with block sizes > 8K __u16 first_zero_hint; __u16 free_count; - struct buffer_head *bh; /* the actual bitmap */ }; struct proc_dir_entry; @@ -414,6 +413,7 @@ /* Definitions of reiserfs on-disk properties: */ #define REISERFS_3_5 0 #define REISERFS_3_6 1 +#define REISERFS_OLD_FORMAT 2 enum reiserfs_mount_options { /* Mount options */ diff -urN oldtree/include/linux/sched.h newtree/include/linux/sched.h --- oldtree/include/linux/sched.h 2006-09-29 14:47:25.000000000 -0400 +++ newtree/include/linux/sched.h 2006-09-30 04:20:31.000000000 -0400 @@ -1062,10 +1062,10 @@ wait_queue_t *io_wait; /* i/o counters(bytes read/written, #syscalls */ u64 rchar, wchar, syscr, syscw; -#if defined(CONFIG_BSD_PROCESS_ACCT) +#if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ - clock_t acct_stimexpd; /* clock_t-converted stime since last update */ + cputime_t acct_stimexpd;/* stime since last update */ #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; diff -urN oldtree/include/linux/taskstats.h newtree/include/linux/taskstats.h --- oldtree/include/linux/taskstats.h 2006-09-29 13:50:42.000000000 -0400 +++ newtree/include/linux/taskstats.h 2006-09-30 04:20:35.000000000 -0400 @@ -2,6 +2,7 @@ * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 * (C) Balbir Singh, IBM Corp. 2006 + * (C) Jay Lan, SGI, 2006 * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License @@ -29,16 +30,25 @@ * c) add new fields after version comment; maintain 64-bit alignment */ -#define TASKSTATS_VERSION 1 + +#define TASKSTATS_VERSION 2 +#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN + * in linux/sched.h */ struct taskstats { - /* Version 1 */ + /* The version number of this struct. This field is always set to + * TAKSTATS_VERSION, which is defined in . + * Each time the struct is changed, the value should be incremented. + */ __u16 version; - __u16 padding[3]; /* Userspace should not interpret the padding - * field which can be replaced by useful - * fields if struct taskstats is extended. - */ + __u32 ac_exitcode; /* Exit status */ + + /* The accounting flags of a task as defined in + * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. + */ + __u8 ac_flag; /* Record flags */ + __u8 ac_nice; /* task_nice */ /* Delay accounting fields start * @@ -88,6 +98,48 @@ __u64 cpu_run_virtual_total; /* Delay accounting fields end */ /* version 1 ends here */ + + /* Basic Accounting Fields start */ + char ac_comm[TS_COMM_LEN]; /* Command name */ + __u8 ac_sched; /* Scheduling discipline */ + __u8 ac_pad[3]; + __u32 ac_uid; /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + __u32 ac_btime; /* Begin time [sec since 1970] */ + __u64 ac_etime; /* Elapsed time [usec] */ + __u64 ac_utime; /* User CPU time [usec] */ + __u64 ac_stime; /* SYstem CPU time [usec] */ + __u64 ac_minflt; /* Minor Page Fault Count */ + __u64 ac_majflt; /* Major Page Fault Count */ + /* Basic Accounting Fields end */ + + /* Extended accounting fields start */ + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. + * The current rss usage is added to this counter every time + * a tick is charged to a task's system time. So, at the end we + * will have memory usage multiplied by system time. Thus an + * average usage per system time unit can be calculated. + */ + __u64 coremem; /* accumulated RSS usage in MB-usec */ + /* Accumulated virtual memory usage in duration of a task. + * Same as acct_rss_mem1 above except that we keep track of VM usage. + */ + __u64 virtmem; /* accumulated VM usage in MB-usec */ + + /* High watermark of RSS and virtual memory usage in duration of + * a task, in KBytes. + */ + __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */ + __u64 hiwater_vm; /* High-water VM usage, in KB */ + + /* The following four fields are I/O statistics of a task. */ + __u64 read_char; /* bytes read */ + __u64 write_char; /* bytes written */ + __u64 read_syscalls; /* read syscalls */ + __u64 write_syscalls; /* write syscalls */ + /* Extended accounting fields end */ }; diff -urN oldtree/include/linux/tsacct_kern.h newtree/include/linux/tsacct_kern.h --- oldtree/include/linux/tsacct_kern.h 1969-12-31 19:00:00.000000000 -0500 +++ newtree/include/linux/tsacct_kern.h 2006-09-30 04:19:34.000000000 -0400 @@ -0,0 +1,34 @@ +/* + * tsacct_kern.h - kernel header for system accounting over taskstats interface + * + * Copyright (C) Jay Lan SGI + */ + +#ifndef _LINUX_TSACCT_KERN_H +#define _LINUX_TSACCT_KERN_H + +#include + +#ifdef CONFIG_TASKSTATS +extern void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk); +#else +static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +{} +#endif /* CONFIG_TASKSTATS */ + +#ifdef CONFIG_TASK_XACCT +extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); +extern void acct_update_integrals(struct task_struct *tsk); +extern void acct_clear_integrals(struct task_struct *tsk); +#else +static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) +{} +static inline void acct_update_integrals(struct task_struct *tsk) +{} +static inline void acct_clear_integrals(struct task_struct *tsk) +{} +#endif /* CONFIG_TASK_XACCT */ + +#endif + + diff -urN oldtree/include/net/genetlink.h newtree/include/net/genetlink.h --- oldtree/include/net/genetlink.h 2006-09-29 14:03:22.000000000 -0400 +++ newtree/include/net/genetlink.h 2006-09-30 04:18:15.000000000 -0400 @@ -169,4 +169,22 @@ return (nlh->nlmsg_len - GENL_HDRLEN - NLMSG_HDRLEN); } +/** + * genlmsg_msg_size - length of genetlink message not including padding + * @payload: length of message payload + */ +static inline int genlmsg_msg_size(int payload) +{ + return GENL_HDRLEN + payload; +} + +/** + * genlmsg_total_size - length of genetlink message including padding + * @payload: length of message payload + */ +static inline int genlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(genlmsg_msg_size(payload)); +} + #endif /* __NET_GENERIC_NETLINK_H */ diff -urN oldtree/include/net/sock.h newtree/include/net/sock.h --- oldtree/include/net/sock.h 2006-09-29 14:03:22.000000000 -0400 +++ newtree/include/net/sock.h 2006-09-30 04:16:37.000000000 -0400 @@ -665,7 +665,6 @@ struct sock *sk; struct scm_cookie *scm; struct msghdr *msg, async_msg; - struct iovec async_iov; struct kiocb *kiocb; }; diff -urN oldtree/init/Kconfig newtree/init/Kconfig --- oldtree/init/Kconfig 2006-09-29 15:02:32.000000000 -0400 +++ newtree/init/Kconfig 2006-09-30 04:19:28.000000000 -0400 @@ -316,6 +316,15 @@ help This enables the legacy 16-bit UID syscall wrappers. +config TASK_XACCT + bool "Enable extended accounting over taskstats (EXPERIMENTAL)" + depends on TASKSTATS + help + Collect extended task accounting data and send the data + to userland for processing over the taskstats interface. + + Say N if unsure. + config SYSCTL bool diff -urN oldtree/kernel/Makefile newtree/kernel/Makefile --- oldtree/kernel/Makefile 2006-09-29 14:21:52.000000000 -0400 +++ newtree/kernel/Makefile 2006-09-30 04:19:21.000000000 -0400 @@ -52,7 +52,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o -obj-$(CONFIG_TASKSTATS) += taskstats.o +obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -urN oldtree/kernel/acct.c newtree/kernel/acct.c --- oldtree/kernel/acct.c 2006-09-29 14:03:22.000000000 -0400 +++ newtree/kernel/acct.c 2006-09-30 04:19:34.000000000 -0400 @@ -602,33 +602,3 @@ do_acct_process(file); fput(file); } - - -/** - * acct_update_integrals - update mm integral fields in task_struct - * @tsk: task_struct for accounting - */ -void acct_update_integrals(struct task_struct *tsk) -{ - if (likely(tsk->mm)) { - long delta = - cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; - - if (delta == 0) - return; - tsk->acct_stimexpd = tsk->stime; - tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); - tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; - } -} - -/** - * acct_clear_integrals - clear the mm integral fields in task_struct - * @tsk: task_struct whose accounting fields are cleared - */ -void acct_clear_integrals(struct task_struct *tsk) -{ - tsk->acct_stimexpd = 0; - tsk->acct_rss_mem1 = 0; - tsk->acct_vm_mem1 = 0; -} diff -urN oldtree/kernel/exit.c newtree/kernel/exit.c --- oldtree/kernel/exit.c 2006-09-29 14:41:27.000000000 -0400 +++ newtree/kernel/exit.c 2006-09-30 04:19:34.000000000 -0400 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff -urN oldtree/kernel/fork.c newtree/kernel/fork.c --- oldtree/kernel/fork.c 2006-09-29 14:47:25.000000000 -0400 +++ newtree/kernel/fork.c 2006-09-30 04:19:34.000000000 -0400 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff -urN oldtree/kernel/sched_ingosched.c newtree/kernel/sched_ingosched.c --- oldtree/kernel/sched_ingosched.c 2006-09-29 14:45:59.000000000 -0400 +++ newtree/kernel/sched_ingosched.c 2006-09-30 04:20:14.000000000 -0400 @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include diff -urN oldtree/kernel/sched_staircase.c newtree/kernel/sched_staircase.c --- oldtree/kernel/sched_staircase.c 2006-09-29 15:29:53.000000000 -0400 +++ newtree/kernel/sched_staircase.c 2006-09-30 04:20:21.000000000 -0400 @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include diff -urN oldtree/kernel/taskstats.c newtree/kernel/taskstats.c --- oldtree/kernel/taskstats.c 2006-09-29 14:03:22.000000000 -0400 +++ newtree/kernel/taskstats.c 2006-09-30 04:19:28.000000000 -0400 @@ -18,7 +18,9 @@ #include #include +#include #include +#include #include #include #include @@ -75,7 +77,7 @@ /* * If new attributes are added, please revisit this allocation */ - skb = nlmsg_new(size, GFP_KERNEL); + skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL); if (!skb) return -ENOMEM; @@ -198,7 +200,13 @@ */ delayacct_add_tsk(stats, tsk); + + /* fill in basic acct fields */ stats->version = TASKSTATS_VERSION; + bacct_add_tsk(stats, tsk); + + /* fill in extended acct fields */ + xacct_add_tsk(stats, tsk); /* Define err: label here if needed */ put_task_struct(tsk); diff -urN oldtree/kernel/tsacct.c newtree/kernel/tsacct.c --- oldtree/kernel/tsacct.c 1969-12-31 19:00:00.000000000 -0500 +++ newtree/kernel/tsacct.c 2006-09-30 04:20:31.000000000 -0400 @@ -0,0 +1,124 @@ +/* + * tsacct.c - System accounting over taskstats interface + * + * Copyright (C) Jay Lan, + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include + + +#define USEC_PER_TICK (USEC_PER_SEC/HZ) +/* + * fill in basic accounting fields + */ +void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +{ + struct timespec uptime, ts; + s64 ac_etime; + + BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); + + /* calculate task elapsed time in timespec */ + do_posix_clock_monotonic_gettime(&uptime); + ts = timespec_sub(uptime, current->group_leader->start_time); + /* rebase elapsed time to usec */ + ac_etime = timespec_to_ns(&ts); + do_div(ac_etime, NSEC_PER_USEC); + stats->ac_etime = ac_etime; + stats->ac_btime = xtime.tv_sec - ts.tv_sec; + if (thread_group_leader(tsk)) { + stats->ac_exitcode = tsk->exit_code; + if (tsk->flags & PF_FORKNOEXEC) + stats->ac_flag |= AFORK; + } + if (tsk->flags & PF_SUPERPRIV) + stats->ac_flag |= ASU; + if (tsk->flags & PF_DUMPCORE) + stats->ac_flag |= ACORE; + if (tsk->flags & PF_SIGNALED) + stats->ac_flag |= AXSIG; + stats->ac_nice = task_nice(tsk); + stats->ac_sched = tsk->policy; + stats->ac_uid = tsk->uid; + stats->ac_gid = tsk->gid; + stats->ac_pid = tsk->pid; + stats->ac_ppid = (tsk->parent) ? tsk->parent->pid : 0; + stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; + stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; + stats->ac_minflt = tsk->min_flt; + stats->ac_majflt = tsk->maj_flt; + + strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); +} + + +#ifdef CONFIG_TASK_XACCT + +#define KB 1024 +#define MB (1024*KB) +/* + * fill in extended accounting fields + */ +void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) +{ + /* convert pages-jiffies to Mbyte-usec */ + stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; + stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; + if (p->mm) { + /* adjust to KB unit */ + stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB; + stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB; + } + stats->read_char = p->rchar; + stats->write_char = p->wchar; + stats->read_syscalls = p->syscr; + stats->write_syscalls = p->syscw; +} +#undef KB +#undef MB + +/** + * acct_update_integrals - update mm integral fields in task_struct + * @tsk: task_struct for accounting + */ +void acct_update_integrals(struct task_struct *tsk) +{ + if (likely(tsk->mm)) { + long delta = cputime_to_jiffies( + cputime_sub(tsk->stime, tsk->acct_stimexpd)); + + if (delta == 0) + return; + tsk->acct_stimexpd = tsk->stime; + tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; + } +} + +/** + * acct_clear_integrals - clear the mm integral fields in task_struct + * @tsk: task_struct whose accounting fields are cleared + */ +void acct_clear_integrals(struct task_struct *tsk) +{ + tsk->acct_stimexpd = 0; + tsk->acct_rss_mem1 = 0; + tsk->acct_vm_mem1 = 0; +} +#endif diff -urN oldtree/mm/filemap.c newtree/mm/filemap.c --- oldtree/mm/filemap.c 2006-09-29 15:59:29.000000000 -0400 +++ newtree/mm/filemap.c 2006-09-30 05:06:07.000000000 -0400 @@ -924,8 +924,8 @@ */ static int large_isize(unsigned long nr_pages) { - if (nr_pages * 6 > total_pages) { - unsigned long unmapped_ram = total_pages - nr_mapped(); + if (nr_pages * 6 > vm_total_pages) { + unsigned long unmapped_ram = vm_total_pages - nr_mapped(); if (nr_pages * 2 > unmapped_ram) return 1; @@ -1225,13 +1225,14 @@ * that can use the page cache directly. */ ssize_t -__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) +generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *filp = iocb->ki_filp; ssize_t retval; unsigned long seg; size_t count; + loff_t *ppos = &iocb->ki_pos; count = 0; for (seg = 0; seg < nr_segs; seg++) { @@ -1255,7 +1256,7 @@ /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { - loff_t pos = *ppos, size; + loff_t size; struct address_space *mapping; struct inode *inode; @@ -1301,33 +1302,8 @@ out: return retval; } -EXPORT_SYMBOL(__generic_file_aio_read); - -ssize_t -generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) -{ - struct iovec local_iov = { .iov_base = buf, .iov_len = count }; - - BUG_ON(iocb->ki_pos != pos); - return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); -} EXPORT_SYMBOL(generic_file_aio_read); -ssize_t -generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) -{ - struct iovec local_iov = { .iov_base = buf, .iov_len = count }; - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - return ret; -} -EXPORT_SYMBOL(generic_file_read); - int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { ssize_t written; @@ -2432,22 +2408,22 @@ current->backing_dev_info = NULL; return written ? written : err; } -EXPORT_SYMBOL(generic_file_aio_write_nolock); -ssize_t -generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) +ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - loff_t pos = *ppos; - ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos); + BUG_ON(iocb->ki_pos != pos); + + ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, + &iocb->ki_pos); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; + ssize_t err; err = sync_page_range_nolock(inode, mapping, pos, ret); if (err < 0) @@ -2455,51 +2431,21 @@ } return ret; } +EXPORT_SYMBOL(generic_file_aio_write_nolock); -static ssize_t -__generic_file_write_nolock(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, file); - ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - return ret; -} - -ssize_t -generic_file_write_nolock(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, file); - ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - return ret; -} -EXPORT_SYMBOL(generic_file_write_nolock); - -ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); - ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1, - &iocb->ki_pos); + ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, + &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { @@ -2513,66 +2459,6 @@ } EXPORT_SYMBOL(generic_file_aio_write); -ssize_t generic_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; - - mutex_lock(&inode->i_mutex); - ret = __generic_file_write_nolock(file, &local_iov, 1, ppos); - mutex_unlock(&inode->i_mutex); - - if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - ssize_t err; - - err = sync_page_range(inode, mapping, *ppos - ret, ret); - if (err < 0) - ret = err; - } - return ret; -} -EXPORT_SYMBOL(generic_file_write); - -ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); - return ret; -} -EXPORT_SYMBOL(generic_file_readv); - -ssize_t generic_file_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - ssize_t ret; - - mutex_lock(&inode->i_mutex); - ret = __generic_file_write_nolock(file, iov, nr_segs, ppos); - mutex_unlock(&inode->i_mutex); - - if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; - - err = sync_page_range(inode, mapping, *ppos - ret, ret); - if (err < 0) - ret = err; - } - return ret; -} -EXPORT_SYMBOL(generic_file_writev); - /* * Called under i_mutex for writes to S_ISREG files. Returns -EIO if something * went wrong during pagecache shootdown. diff -urN oldtree/net/socket.c newtree/net/socket.c --- oldtree/net/socket.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/net/socket.c 2006-09-30 04:17:01.000000000 -0400 @@ -95,10 +95,10 @@ #include static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, - size_t size, loff_t pos); -static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, - size_t size, loff_t pos); +static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); @@ -110,10 +110,6 @@ unsigned int cmd, unsigned long arg); #endif static int sock_fasync(int fd, struct file *filp, int on); -static ssize_t sock_readv(struct file *file, const struct iovec *vector, - unsigned long count, loff_t *ppos); -static ssize_t sock_writev(struct file *file, const struct iovec *vector, - unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); @@ -136,8 +132,6 @@ .open = sock_no_open, /* special open code to disallow open via /proc */ .release = sock_close, .fasync = sock_fasync, - .readv = sock_readv, - .writev = sock_writev, .sendpage = sock_sendpage, .splice_write = generic_splice_sendpage, }; @@ -664,7 +658,6 @@ } static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - char __user *ubuf, size_t size, struct sock_iocb *siocb) { if (!is_sync_kiocb(iocb)) { @@ -675,16 +668,13 @@ } siocb->kiocb = iocb; - siocb->async_iov.iov_base = ubuf; - siocb->async_iov.iov_len = size; - iocb->private = siocb; return siocb; } static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, - unsigned long nr_segs) + struct file *file, const struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; @@ -704,43 +694,27 @@ return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); } -static ssize_t sock_readv(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct kiocb iocb; - struct sock_iocb siocb; - struct msghdr msg; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - - ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - -static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, - size_t count, loff_t pos) +static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct sock_iocb siocb, *x; if (pos != 0) return -ESPIPE; - if (count == 0) /* Match SYS5 behaviour */ + + if (iocb->ki_left == 0) /* Match SYS5 behaviour */ return 0; - x = alloc_sock_iocb(iocb, ubuf, count, &siocb); + + x = alloc_sock_iocb(iocb, &siocb); if (!x) return -ENOMEM; - return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); } static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, - unsigned long nr_segs) + struct file *file, const struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; @@ -762,39 +736,22 @@ return __sock_sendmsg(iocb, sock, msg, size); } -static ssize_t sock_writev(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) -{ - struct msghdr msg; - struct kiocb iocb; - struct sock_iocb siocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - - ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - -static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, - size_t count, loff_t pos) +static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct sock_iocb siocb, *x; if (pos != 0) return -ESPIPE; - if (count == 0) /* Match SYS5 behaviour */ + + if (iocb->ki_left == 0) /* Match SYS5 behaviour */ return 0; - x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb); + x = alloc_sock_iocb(iocb, &siocb); if (!x) return -ENOMEM; - return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); } /* diff -urN oldtree/sound/core/pcm_native.c newtree/sound/core/pcm_native.c --- oldtree/sound/core/pcm_native.c 2006-09-29 14:03:23.000000000 -0400 +++ newtree/sound/core/pcm_native.c 2006-09-30 04:17:01.000000000 -0400 @@ -2831,8 +2831,8 @@ return result; } -static ssize_t snd_pcm_readv(struct file *file, const struct iovec *_vector, - unsigned long count, loff_t * offset) +static ssize_t snd_pcm_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct snd_pcm_file *pcm_file; @@ -2843,22 +2843,22 @@ void __user **bufs; snd_pcm_uframes_t frames; - pcm_file = file->private_data; + pcm_file = iocb->ki_filp->private_data; substream = pcm_file->substream; snd_assert(substream != NULL, return -ENXIO); runtime = substream->runtime; if (runtime->status->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; - if (count > 1024 || count != runtime->channels) + if (nr_segs > 1024 || nr_segs != runtime->channels) return -EINVAL; - if (!frame_aligned(runtime, _vector->iov_len)) + if (!frame_aligned(runtime, iov->iov_len)) return -EINVAL; - frames = bytes_to_samples(runtime, _vector->iov_len); - bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL); + frames = bytes_to_samples(runtime, iov->iov_len); + bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL); if (bufs == NULL) return -ENOMEM; - for (i = 0; i < count; ++i) - bufs[i] = _vector[i].iov_base; + for (i = 0; i < nr_segs; ++i) + bufs[i] = iov[i].iov_base; result = snd_pcm_lib_readv(substream, bufs, frames); if (result > 0) result = frames_to_bytes(runtime, result); @@ -2866,8 +2866,8 @@ return result; } -static ssize_t snd_pcm_writev(struct file *file, const struct iovec *_vector, - unsigned long count, loff_t * offset) +static ssize_t snd_pcm_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream; @@ -2877,7 +2877,7 @@ void __user **bufs; snd_pcm_uframes_t frames; - pcm_file = file->private_data; + pcm_file = iocb->ki_filp->private_data; substream = pcm_file->substream; snd_assert(substream != NULL, result = -ENXIO; goto end); runtime = substream->runtime; @@ -2885,17 +2885,17 @@ result = -EBADFD; goto end; } - if (count > 128 || count != runtime->channels || - !frame_aligned(runtime, _vector->iov_len)) { + if (nr_segs > 128 || nr_segs != runtime->channels || + !frame_aligned(runtime, iov->iov_len)) { result = -EINVAL; goto end; } - frames = bytes_to_samples(runtime, _vector->iov_len); - bufs = kmalloc(sizeof(void *) * count, GFP_KERNEL); + frames = bytes_to_samples(runtime, iov->iov_len); + bufs = kmalloc(sizeof(void *) * nr_segs, GFP_KERNEL); if (bufs == NULL) return -ENOMEM; - for (i = 0; i < count; ++i) - bufs[i] = _vector[i].iov_base; + for (i = 0; i < nr_segs; ++i) + bufs[i] = iov[i].iov_base; result = snd_pcm_lib_writev(substream, bufs, frames); if (result > 0) result = frames_to_bytes(runtime, result); @@ -3405,7 +3405,7 @@ { .owner = THIS_MODULE, .write = snd_pcm_write, - .writev = snd_pcm_writev, + .aio_write = snd_pcm_aio_write, .open = snd_pcm_playback_open, .release = snd_pcm_release, .poll = snd_pcm_playback_poll, @@ -3417,7 +3417,7 @@ { .owner = THIS_MODULE, .read = snd_pcm_read, - .readv = snd_pcm_readv, + .aio_read = snd_pcm_aio_read, .open = snd_pcm_capture_open, .release = snd_pcm_release, .poll = snd_pcm_capture_poll,