From d7e5a5462f68270ed66efff22b1981be57a28c19 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Wed, 2 May 2007 12:18:41 +0200 Subject: [RSLIB] Support non-canonical GF representations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the CAFÉ NAND controller, we need to support non-canonical representations of the Galois field. Allow the caller to provide its own function for generating the field, and CAFÉ can use rslib instead of its own implementation. Signed-off-by: Segher Boessenkool Signed-off-by: David Woodhouse --- include/linux/rslib.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rslib.h b/include/linux/rslib.h index ace25acfdc97..746580c1939c 100644 --- a/include/linux/rslib.h +++ b/include/linux/rslib.h @@ -34,6 +34,7 @@ * @prim: Primitive element, index form * @iprim: prim-th root of 1, index form * @gfpoly: The primitive generator polynominal + * @gffunc: Function to generate the field, if non-canonical representation * @users: Users of this structure * @list: List entry for the rs control list */ @@ -48,6 +49,7 @@ struct rs_control { int prim; int iprim; int gfpoly; + int (*gffunc)(int); int users; struct list_head list; }; @@ -77,6 +79,8 @@ int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len, /* Create or get a matching rs control structure */ struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim, int nroots); +struct rs_control *init_rs_non_canonical(int symsize, int (*func)(int), + int fcr, int prim, int nroots); /* Release a rs control structure */ void free_rs(struct rs_control *rs); -- cgit v1.2.3 From 972edcb79ec8c8512ed5b29ca6718065328d6992 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Sun, 6 May 2007 18:46:57 +0400 Subject: [MTD] [NAND] platform NAND driver: update header This patch extends nand.h in order to enable platform NAND driver. Signed-off-by: Vitaly Wool Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index cf197ad62da6..d2365c8dcacc 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -560,6 +560,7 @@ extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, * @chip_delay: R/B delay value in us * @options: Option flags, e.g. 16bit buswidth * @ecclayout: ecc layout info structure + * @part_probe_types: NULL-terminated array of probe types * @priv: hardware controller specific settings */ struct platform_nand_chip { @@ -570,6 +571,7 @@ struct platform_nand_chip { struct nand_ecclayout *ecclayout; int chip_delay; unsigned int options; + const char **part_probe_types; void *priv; }; @@ -578,6 +580,8 @@ struct platform_nand_chip { * @hwcontrol: platform specific hardware control structure * @dev_ready: platform specific function to read ready/busy pin * @select_chip: platform specific chip select function + * @cmd_ctrl: platform specific function for controlling + * ALE/CLE/nCE. Also used to write command and address * @priv: private data to transport driver specific settings * * All fields are optional and depend on the hardware driver requirements @@ -586,9 +590,21 @@ struct platform_nand_ctrl { void (*hwcontrol)(struct mtd_info *mtd, int cmd); int (*dev_ready)(struct mtd_info *mtd); void (*select_chip)(struct mtd_info *mtd, int chip); + void (*cmd_ctrl)(struct mtd_info *mtd, int dat, + unsigned int ctrl); void *priv; }; +/** + * struct platform_nand_data - container structure for platform-specific data + * @chip: chip level chip structure + * @ctrl: controller level device structure + */ +struct platform_nand_data { + struct platform_nand_chip chip; + struct platform_nand_ctrl ctrl; +}; + /* Some helpers to access the data structures */ static inline struct platform_nand_chip *get_platform_nandchip(struct mtd_info *mtd) -- cgit v1.2.3 From 225c7b1feef1b41170f7037a5b10a65cd8a42c54 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 8 May 2007 18:00:38 -0700 Subject: IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters Add an InfiniBand driver for Mellanox ConnectX adapters. Because these adapters can also be used as ethernet NICs and Fibre Channel HBAs, the driver is split into two modules: mlx4_core: Handles low-level things like device initialization and processing firmware commands. Also controls resource allocation so that the InfiniBand, ethernet and FC functions can share a device without stepping on each other. mlx4_ib: Handles InfiniBand-specific things; plugs into the InfiniBand midlayer. Signed-off-by: Roland Dreier --- include/linux/mlx4/cmd.h | 178 +++++++++++++++++++++++ include/linux/mlx4/cq.h | 123 ++++++++++++++++ include/linux/mlx4/device.h | 331 ++++++++++++++++++++++++++++++++++++++++++ include/linux/mlx4/doorbell.h | 97 +++++++++++++ include/linux/mlx4/driver.h | 59 ++++++++ include/linux/mlx4/qp.h | 288 ++++++++++++++++++++++++++++++++++++ include/linux/mlx4/srq.h | 42 ++++++ 7 files changed, 1118 insertions(+) create mode 100644 include/linux/mlx4/cmd.h create mode 100644 include/linux/mlx4/cq.h create mode 100644 include/linux/mlx4/device.h create mode 100644 include/linux/mlx4/doorbell.h create mode 100644 include/linux/mlx4/driver.h create mode 100644 include/linux/mlx4/qp.h create mode 100644 include/linux/mlx4/srq.h (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h new file mode 100644 index 000000000000..4fb552d12f7a --- /dev/null +++ b/include/linux/mlx4/cmd.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_CMD_H +#define MLX4_CMD_H + +#include + +enum { + /* initialization and general commands */ + MLX4_CMD_SYS_EN = 0x1, + MLX4_CMD_SYS_DIS = 0x2, + MLX4_CMD_MAP_FA = 0xfff, + MLX4_CMD_UNMAP_FA = 0xffe, + MLX4_CMD_RUN_FW = 0xff6, + MLX4_CMD_MOD_STAT_CFG = 0x34, + MLX4_CMD_QUERY_DEV_CAP = 0x3, + MLX4_CMD_QUERY_FW = 0x4, + MLX4_CMD_ENABLE_LAM = 0xff8, + MLX4_CMD_DISABLE_LAM = 0xff7, + MLX4_CMD_QUERY_DDR = 0x5, + MLX4_CMD_QUERY_ADAPTER = 0x6, + MLX4_CMD_INIT_HCA = 0x7, + MLX4_CMD_CLOSE_HCA = 0x8, + MLX4_CMD_INIT_PORT = 0x9, + MLX4_CMD_CLOSE_PORT = 0xa, + MLX4_CMD_QUERY_HCA = 0xb, + MLX4_CMD_SET_PORT = 0xc, + MLX4_CMD_ACCESS_DDR = 0x2e, + MLX4_CMD_MAP_ICM = 0xffa, + MLX4_CMD_UNMAP_ICM = 0xff9, + MLX4_CMD_MAP_ICM_AUX = 0xffc, + MLX4_CMD_UNMAP_ICM_AUX = 0xffb, + MLX4_CMD_SET_ICM_SIZE = 0xffd, + + /* TPT commands */ + MLX4_CMD_SW2HW_MPT = 0xd, + MLX4_CMD_QUERY_MPT = 0xe, + MLX4_CMD_HW2SW_MPT = 0xf, + MLX4_CMD_READ_MTT = 0x10, + MLX4_CMD_WRITE_MTT = 0x11, + MLX4_CMD_SYNC_TPT = 0x2f, + + /* EQ commands */ + MLX4_CMD_MAP_EQ = 0x12, + MLX4_CMD_SW2HW_EQ = 0x13, + MLX4_CMD_HW2SW_EQ = 0x14, + MLX4_CMD_QUERY_EQ = 0x15, + + /* CQ commands */ + MLX4_CMD_SW2HW_CQ = 0x16, + MLX4_CMD_HW2SW_CQ = 0x17, + MLX4_CMD_QUERY_CQ = 0x18, + MLX4_CMD_RESIZE_CQ = 0x2c, + + /* SRQ commands */ + MLX4_CMD_SW2HW_SRQ = 0x35, + MLX4_CMD_HW2SW_SRQ = 0x36, + MLX4_CMD_QUERY_SRQ = 0x37, + MLX4_CMD_ARM_SRQ = 0x40, + + /* QP/EE commands */ + MLX4_CMD_RST2INIT_QP = 0x19, + MLX4_CMD_INIT2RTR_QP = 0x1a, + MLX4_CMD_RTR2RTS_QP = 0x1b, + MLX4_CMD_RTS2RTS_QP = 0x1c, + MLX4_CMD_SQERR2RTS_QP = 0x1d, + MLX4_CMD_2ERR_QP = 0x1e, + MLX4_CMD_RTS2SQD_QP = 0x1f, + MLX4_CMD_SQD2SQD_QP = 0x38, + MLX4_CMD_SQD2RTS_QP = 0x20, + MLX4_CMD_2RST_QP = 0x21, + MLX4_CMD_QUERY_QP = 0x22, + MLX4_CMD_INIT2INIT_QP = 0x2d, + MLX4_CMD_SUSPEND_QP = 0x32, + MLX4_CMD_UNSUSPEND_QP = 0x33, + /* special QP and management commands */ + MLX4_CMD_CONF_SPECIAL_QP = 0x23, + MLX4_CMD_MAD_IFC = 0x24, + + /* multicast commands */ + MLX4_CMD_READ_MCG = 0x25, + MLX4_CMD_WRITE_MCG = 0x26, + MLX4_CMD_MGID_HASH = 0x27, + + /* miscellaneous commands */ + MLX4_CMD_DIAG_RPRT = 0x30, + MLX4_CMD_NOP = 0x31, + + /* debug commands */ + MLX4_CMD_QUERY_DEBUG_MSG = 0x2a, + MLX4_CMD_SET_DEBUG_MSG = 0x2b, +}; + +enum { + MLX4_CMD_TIME_CLASS_A = 10000, + MLX4_CMD_TIME_CLASS_B = 10000, + MLX4_CMD_TIME_CLASS_C = 10000, +}; + +enum { + MLX4_MAILBOX_SIZE = 4096 +}; + +struct mlx4_dev; + +struct mlx4_cmd_mailbox { + void *buf; + dma_addr_t dma; +}; + +int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + int out_is_imm, u32 in_modifier, u8 op_modifier, + u16 op, unsigned long timeout); + +/* Invoke a command with no output parameter */ +static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier, + u8 op_modifier, u16 op, unsigned long timeout) +{ + return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier, + op_modifier, op, timeout); +} + +/* Invoke a command with an output mailbox */ +static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param, + u32 in_modifier, u8 op_modifier, u16 op, + unsigned long timeout) +{ + return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier, + op_modifier, op, timeout); +} + +/* + * Invoke a command with an immediate output parameter (and copy the + * output into the caller's out_param pointer after the command + * executes). + */ +static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param, + u32 in_modifier, u8 op_modifier, u16 op, + unsigned long timeout) +{ + return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier, + op_modifier, op, timeout); +} + +struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev); +void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox); + +#endif /* MLX4_CMD_H */ diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h new file mode 100644 index 000000000000..0181e0a57cbf --- /dev/null +++ b/include/linux/mlx4/cq.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_CQ_H +#define MLX4_CQ_H + +#include + +#include +#include + +struct mlx4_cqe { + __be32 my_qpn; + __be32 immed_rss_invalid; + __be32 g_mlpath_rqpn; + u8 sl; + u8 reserved1; + __be16 rlid; + u32 reserved2; + __be32 byte_cnt; + __be16 wqe_index; + __be16 checksum; + u8 reserved3[3]; + u8 owner_sr_opcode; +}; + +struct mlx4_err_cqe { + __be32 my_qpn; + u32 reserved1[5]; + __be16 wqe_index; + u8 vendor_err_syndrome; + u8 syndrome; + u8 reserved2[3]; + u8 owner_sr_opcode; +}; + +enum { + MLX4_CQE_OWNER_MASK = 0x80, + MLX4_CQE_IS_SEND_MASK = 0x40, + MLX4_CQE_OPCODE_MASK = 0x1f +}; + +enum { + MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, + MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, + MLX4_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, + MLX4_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, + MLX4_CQE_SYNDROME_MW_BIND_ERR = 0x06, + MLX4_CQE_SYNDROME_BAD_RESP_ERR = 0x10, + MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, + MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, + MLX4_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, + MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, + MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, +}; + +static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd, + void __iomem *uar_page, + spinlock_t *doorbell_lock) +{ + __be32 doorbell[2]; + u32 sn; + u32 ci; + + sn = cq->arm_sn & 3; + ci = cq->cons_index & 0xffffff; + + *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); + + /* + * Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = cpu_to_be32(sn << 28 | cmd | cq->cqn); + doorbell[1] = cpu_to_be32(ci); + + mlx4_write64(doorbell, uar_page + MLX4_CQ_DOORBELL, doorbell_lock); +} + +static inline void mlx4_cq_set_ci(struct mlx4_cq *cq) +{ + *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); +} + +enum { + MLX4_CQ_DB_REQ_NOT_SOL = 1 << 24, + MLX4_CQ_DB_REQ_NOT = 2 << 24 +}; + +#endif /* MLX4_CQ_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h new file mode 100644 index 000000000000..8c5f8fd86841 --- /dev/null +++ b/include/linux/mlx4/device.h @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_DEVICE_H +#define MLX4_DEVICE_H + +#include +#include +#include + +#include + +enum { + MLX4_FLAG_MSI_X = 1 << 0, +}; + +enum { + MLX4_MAX_PORTS = 2 +}; + +enum { + MLX4_DEV_CAP_FLAG_RC = 1 << 0, + MLX4_DEV_CAP_FLAG_UC = 1 << 1, + MLX4_DEV_CAP_FLAG_UD = 1 << 2, + MLX4_DEV_CAP_FLAG_SRQ = 1 << 6, + MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7, + MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8, + MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9, + MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16, + MLX4_DEV_CAP_FLAG_APM = 1 << 17, + MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18, + MLX4_DEV_CAP_FLAG_RAW_MCAST = 1 << 19, + MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1 << 20, + MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21 +}; + +enum mlx4_event { + MLX4_EVENT_TYPE_COMP = 0x00, + MLX4_EVENT_TYPE_PATH_MIG = 0x01, + MLX4_EVENT_TYPE_COMM_EST = 0x02, + MLX4_EVENT_TYPE_SQ_DRAINED = 0x03, + MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MLX4_EVENT_TYPE_SRQ_LIMIT = 0x14, + MLX4_EVENT_TYPE_CQ_ERROR = 0x04, + MLX4_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MLX4_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, + MLX4_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MLX4_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, + MLX4_EVENT_TYPE_PORT_CHANGE = 0x09, + MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f, + MLX4_EVENT_TYPE_ECC_DETECT = 0x0e, + MLX4_EVENT_TYPE_CMD = 0x0a +}; + +enum { + MLX4_PORT_CHANGE_SUBTYPE_DOWN = 1, + MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4 +}; + +enum { + MLX4_PERM_LOCAL_READ = 1 << 10, + MLX4_PERM_LOCAL_WRITE = 1 << 11, + MLX4_PERM_REMOTE_READ = 1 << 12, + MLX4_PERM_REMOTE_WRITE = 1 << 13, + MLX4_PERM_ATOMIC = 1 << 14 +}; + +enum { + MLX4_OPCODE_NOP = 0x00, + MLX4_OPCODE_SEND_INVAL = 0x01, + MLX4_OPCODE_RDMA_WRITE = 0x08, + MLX4_OPCODE_RDMA_WRITE_IMM = 0x09, + MLX4_OPCODE_SEND = 0x0a, + MLX4_OPCODE_SEND_IMM = 0x0b, + MLX4_OPCODE_LSO = 0x0e, + MLX4_OPCODE_RDMA_READ = 0x10, + MLX4_OPCODE_ATOMIC_CS = 0x11, + MLX4_OPCODE_ATOMIC_FA = 0x12, + MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, + MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, + MLX4_OPCODE_BIND_MW = 0x18, + MLX4_OPCODE_FMR = 0x19, + MLX4_OPCODE_LOCAL_INVAL = 0x1b, + MLX4_OPCODE_CONFIG_CMD = 0x1f, + + MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, + MLX4_RECV_OPCODE_SEND = 0x01, + MLX4_RECV_OPCODE_SEND_IMM = 0x02, + MLX4_RECV_OPCODE_SEND_INVAL = 0x03, + + MLX4_CQE_OPCODE_ERROR = 0x1e, + MLX4_CQE_OPCODE_RESIZE = 0x16, +}; + +enum { + MLX4_STAT_RATE_OFFSET = 5 +}; + +struct mlx4_caps { + u64 fw_ver; + int num_ports; + int vl_cap; + int mtu_cap; + int gid_table_len; + int pkey_table_len; + int local_ca_ack_delay; + int num_uars; + int bf_reg_size; + int bf_regs_per_page; + int max_sq_sg; + int max_rq_sg; + int num_qps; + int max_wqes; + int max_sq_desc_sz; + int max_rq_desc_sz; + int max_qp_init_rdma; + int max_qp_dest_rdma; + int reserved_qps; + int sqp_start; + int num_srqs; + int max_srq_wqes; + int max_srq_sge; + int reserved_srqs; + int num_cqs; + int max_cqes; + int reserved_cqs; + int num_eqs; + int reserved_eqs; + int num_mpts; + int num_mtt_segs; + int fmr_reserved_mtts; + int reserved_mtts; + int reserved_mrws; + int reserved_uars; + int num_mgms; + int num_amgms; + int reserved_mcgs; + int num_qp_per_mgm; + int num_pds; + int reserved_pds; + int mtt_entry_sz; + u32 page_size_cap; + u32 flags; + u16 stat_rate_support; + u8 port_width_cap; +}; + +struct mlx4_buf_list { + void *buf; + dma_addr_t map; +}; + +struct mlx4_buf { + union { + struct mlx4_buf_list direct; + struct mlx4_buf_list *page_list; + } u; + int nbufs; + int npages; + int page_shift; +}; + +struct mlx4_mtt { + u32 first_seg; + int order; + int page_shift; +}; + +struct mlx4_mr { + struct mlx4_mtt mtt; + u64 iova; + u64 size; + u32 key; + u32 pd; + u32 access; + int enabled; +}; + +struct mlx4_uar { + unsigned long pfn; + int index; +}; + +struct mlx4_cq { + void (*comp) (struct mlx4_cq *); + void (*event) (struct mlx4_cq *, enum mlx4_event); + + struct mlx4_uar *uar; + + u32 cons_index; + + __be32 *set_ci_db; + __be32 *arm_db; + int arm_sn; + + int cqn; + + atomic_t refcount; + struct completion free; +}; + +struct mlx4_qp { + void (*event) (struct mlx4_qp *, enum mlx4_event); + + int qpn; + + atomic_t refcount; + struct completion free; +}; + +struct mlx4_srq { + void (*event) (struct mlx4_srq *, enum mlx4_event); + + int srqn; + int max; + int max_gs; + int wqe_shift; + + atomic_t refcount; + struct completion free; +}; + +struct mlx4_av { + __be32 port_pd; + u8 reserved1; + u8 g_slid; + __be16 dlid; + u8 reserved2; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 dgid[16]; +}; + +struct mlx4_dev { + struct pci_dev *pdev; + unsigned long flags; + struct mlx4_caps caps; + struct radix_tree_root qp_table_tree; +}; + +struct mlx4_init_port_param { + int set_guid0; + int set_node_guid; + int set_si_guid; + u16 mtu; + int port_width_cap; + u16 vl_cap; + u16 max_gid; + u16 max_pkey; + u64 guid0; + u64 node_guid; + u64 si_guid; +}; + +int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, + struct mlx4_buf *buf); +void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); + +int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); +void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); + +int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar); +void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar); + +int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, + struct mlx4_mtt *mtt); +void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt); +u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt); + +int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, + int npages, int page_shift, struct mlx4_mr *mr); +void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + int start_index, int npages, u64 *page_list); +int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + struct mlx4_buf *buf); + +int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq); +void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); + +int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp); +void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); + +int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, + u64 db_rec, struct mlx4_srq *srq); +void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq); +int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark); + +int mlx4_INIT_PORT(struct mlx4_dev *dev, struct mlx4_init_port_param *param, int port); +int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); + +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); + +#endif /* MLX4_DEVICE_H */ diff --git a/include/linux/mlx4/doorbell.h b/include/linux/mlx4/doorbell.h new file mode 100644 index 000000000000..3f2da442d7cb --- /dev/null +++ b/include/linux/mlx4/doorbell.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_DOORBELL_H +#define MLX4_DOORBELL_H + +#include +#include + +#define MLX4_SEND_DOORBELL 0x14 +#define MLX4_CQ_DOORBELL 0x20 + +#if BITS_PER_LONG == 64 +/* + * Assume that we can just write a 64-bit doorbell atomically. s390 + * actually doesn't have writeq() but S/390 systems don't even have + * PCI so we won't worry about it. + */ + +#define MLX4_DECLARE_DOORBELL_LOCK(name) +#define MLX4_INIT_DOORBELL_LOCK(ptr) do { } while (0) +#define MLX4_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline void mlx4_write64_raw(__be64 val, void __iomem *dest) +{ + __raw_writeq((__force u64) val, dest); +} + +static inline void mlx4_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + __raw_writeq(*(u64 *) val, dest); +} + +#else + +/* + * Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ + +#define MLX4_DECLARE_DOORBELL_LOCK(name) spinlock_t name; +#define MLX4_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) +#define MLX4_GET_DOORBELL_LOCK(ptr) (ptr) + +static inline void mlx4_write64_raw(__be64 val, void __iomem *dest) +{ + __raw_writel(((__force u32 *) &val)[0], dest); + __raw_writel(((__force u32 *) &val)[1], dest + 4); +} + +static inline void mlx4_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + unsigned long flags; + + spin_lock_irqsave(doorbell_lock, flags); + __raw_writel((__force u32) val[0], dest); + __raw_writel((__force u32) val[1], dest + 4); + spin_unlock_irqrestore(doorbell_lock, flags); +} + +#endif + +#endif /* MLX4_DOORBELL_H */ diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h new file mode 100644 index 000000000000..1b835ca49df1 --- /dev/null +++ b/include/linux/mlx4/driver.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_DRIVER_H +#define MLX4_DRIVER_H + +#include + +struct mlx4_dev; + +enum mlx4_dev_event { + MLX4_DEV_EVENT_CATASTROPHIC_ERROR, + MLX4_DEV_EVENT_PORT_UP, + MLX4_DEV_EVENT_PORT_DOWN, + MLX4_DEV_EVENT_PORT_REINIT, +}; + +struct mlx4_interface { + void * (*add) (struct mlx4_dev *dev); + void (*remove)(struct mlx4_dev *dev, void *context); + void (*event) (struct mlx4_dev *dev, void *context, + enum mlx4_dev_event event, int subtype, + int port); + struct list_head list; +}; + +int mlx4_register_interface(struct mlx4_interface *intf); +void mlx4_unregister_interface(struct mlx4_interface *intf); + +#endif /* MLX4_DRIVER_H */ diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h new file mode 100644 index 000000000000..9eeb61adf6a3 --- /dev/null +++ b/include/linux/mlx4/qp.h @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_QP_H +#define MLX4_QP_H + +#include + +#include + +#define MLX4_INVALID_LKEY 0x100 + +enum mlx4_qp_optpar { + MLX4_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, + MLX4_QP_OPTPAR_RRE = 1 << 1, + MLX4_QP_OPTPAR_RAE = 1 << 2, + MLX4_QP_OPTPAR_RWE = 1 << 3, + MLX4_QP_OPTPAR_PKEY_INDEX = 1 << 4, + MLX4_QP_OPTPAR_Q_KEY = 1 << 5, + MLX4_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, + MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, + MLX4_QP_OPTPAR_SRA_MAX = 1 << 8, + MLX4_QP_OPTPAR_RRA_MAX = 1 << 9, + MLX4_QP_OPTPAR_PM_STATE = 1 << 10, + MLX4_QP_OPTPAR_RETRY_COUNT = 1 << 12, + MLX4_QP_OPTPAR_RNR_RETRY = 1 << 13, + MLX4_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16 +}; + +enum mlx4_qp_state { + MLX4_QP_STATE_RST = 0, + MLX4_QP_STATE_INIT = 1, + MLX4_QP_STATE_RTR = 2, + MLX4_QP_STATE_RTS = 3, + MLX4_QP_STATE_SQER = 4, + MLX4_QP_STATE_SQD = 5, + MLX4_QP_STATE_ERR = 6, + MLX4_QP_STATE_SQ_DRAINING = 7, + MLX4_QP_NUM_STATE +}; + +enum { + MLX4_QP_ST_RC = 0x0, + MLX4_QP_ST_UC = 0x1, + MLX4_QP_ST_RD = 0x2, + MLX4_QP_ST_UD = 0x3, + MLX4_QP_ST_MLX = 0x7 +}; + +enum { + MLX4_QP_PM_MIGRATED = 0x3, + MLX4_QP_PM_ARMED = 0x0, + MLX4_QP_PM_REARM = 0x1 +}; + +enum { + /* params1 */ + MLX4_QP_BIT_SRE = 1 << 15, + MLX4_QP_BIT_SWE = 1 << 14, + MLX4_QP_BIT_SAE = 1 << 13, + /* params2 */ + MLX4_QP_BIT_RRE = 1 << 15, + MLX4_QP_BIT_RWE = 1 << 14, + MLX4_QP_BIT_RAE = 1 << 13, + MLX4_QP_BIT_RIC = 1 << 4, +}; + +struct mlx4_qp_path { + u8 fl; + u8 reserved1[2]; + u8 pkey_index; + u8 reserved2; + u8 grh_mylmc; + __be16 rlid; + u8 ackto; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 tclass_flowlabel; + u8 rgid[16]; + u8 sched_queue; + u8 snooper_flags; + u8 reserved3[2]; + u8 counter_index; + u8 reserved4[7]; +}; + +struct mlx4_qp_context { + __be32 flags; + __be32 pd; + u8 mtu_msgmax; + u8 rq_size_stride; + u8 sq_size_stride; + u8 rlkey; + __be32 usr_page; + __be32 local_qpn; + __be32 remote_qpn; + struct mlx4_qp_path pri_path; + struct mlx4_qp_path alt_path; + __be32 params1; + u32 reserved1; + __be32 next_send_psn; + __be32 cqn_send; + u32 reserved2[2]; + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 srcd; + __be32 cqn_recv; + __be64 db_rec_addr; + __be32 qkey; + __be32 srqn; + __be32 msn; + __be16 rq_wqe_counter; + __be16 sq_wqe_counter; + u32 reserved3[2]; + __be32 param3; + __be32 nummmcpeers_basemkey; + u8 log_page_size; + u8 reserved4[2]; + u8 mtt_base_addr_h; + __be32 mtt_base_addr_l; + u32 reserved5[10]; +}; + +enum { + MLX4_WQE_CTRL_FENCE = 1 << 6, + MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, + MLX4_WQE_CTRL_SOLICITED = 1 << 1, +}; + +struct mlx4_wqe_ctrl_seg { + __be32 owner_opcode; + u8 reserved2[3]; + u8 fence_size; + /* + * High 24 bits are SRC remote buffer; low 8 bits are flags: + * [7] SO (strong ordering) + * [5] TCP/UDP checksum + * [4] IP checksum + * [3:2] C (generate completion queue entry) + * [1] SE (solicited event) + */ + __be32 srcrb_flags; + /* + * imm is immediate data for send/RDMA write w/ immediate; + * also invalidation key for send with invalidate; input + * modifier for WQEs on CCQs. + */ + __be32 imm; +}; + +enum { + MLX4_WQE_MLX_VL15 = 1 << 17, + MLX4_WQE_MLX_SLR = 1 << 16 +}; + +struct mlx4_wqe_mlx_seg { + u8 owner; + u8 reserved1[2]; + u8 opcode; + u8 reserved2[3]; + u8 size; + /* + * [17] VL15 + * [16] SLR + * [15:12] static rate + * [11:8] SL + * [4] ICRC + * [3:2] C + * [0] FL (force loopback) + */ + __be32 flags; + __be16 rlid; + u16 reserved3; +}; + +struct mlx4_wqe_datagram_seg { + __be32 av[8]; + __be32 dqpn; + __be32 qkey; + __be32 reservd[2]; +}; + +struct mlx4_wqe_bind_seg { + __be32 flags1; + __be32 flags2; + __be32 new_rkey; + __be32 lkey; + __be64 addr; + __be64 length; +}; + +struct mlx4_wqe_fmr_seg { + __be32 flags; + __be32 mem_key; + __be64 buf_list; + __be64 start_addr; + __be64 reg_len; + __be32 offset; + __be32 page_size; + u32 reserved[2]; +}; + +struct mlx4_wqe_fmr_ext_seg { + u8 flags; + u8 reserved; + __be16 app_mask; + __be16 wire_app_tag; + __be16 mem_app_tag; + __be32 wire_ref_tag_base; + __be32 mem_ref_tag_base; +}; + +struct mlx4_wqe_local_inval_seg { + u8 flags; + u8 reserved1[3]; + __be32 mem_key; + u8 reserved2[3]; + u8 guest_id; + __be64 pa; +}; + +struct mlx4_wqe_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mlx4_wqe_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mlx4_wqe_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mlx4_wqe_inline_seg { + __be32 byte_count; +}; + +int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, + struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, + int sqd_event, struct mlx4_qp *qp); + +static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) +{ + return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1)); +} + +void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); + +#endif /* MLX4_QP_H */ diff --git a/include/linux/mlx4/srq.h b/include/linux/mlx4/srq.h new file mode 100644 index 000000000000..799a0697a383 --- /dev/null +++ b/include/linux/mlx4/srq.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX4_SRQ_H +#define MLX4_SRQ_H + +struct mlx4_wqe_srq_next_seg { + u16 reserved1; + __be16 next_wqe_index; + u32 reserved2[3]; +}; + +#endif /* MLX4_SRQ_H */ -- cgit v1.2.3 From beb7dd86a101263bf63a78c7c6d4da3849b35bd6 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 9 May 2007 07:14:03 +0200 Subject: Fix misspellings collected by members of KJ list. Fix the misspellings of "propogate", "writting" and (oh, the shame :-) "kenrel" in the source tree. Signed-off-by: Robert P. J. Day Signed-off-by: Adrian Bunk --- include/linux/mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index dab69afee2fa..6d3047d8c91c 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -33,7 +33,7 @@ struct mnt_namespace; #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ -#define MNT_PNODE_MASK 0x3000 /* propogation flag mask */ +#define MNT_PNODE_MASK 0x3000 /* propagation flag mask */ struct vfsmount { struct list_head mnt_hash; -- cgit v1.2.3 From 59c51591a0ac7568824f541f57de967e88adaa07 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Wed, 9 May 2007 08:57:56 +0200 Subject: Fix occurrences of "the the " Signed-off-by: Michael Opdenacker Signed-off-by: Adrian Bunk --- include/linux/ext3_fs_i.h | 2 +- include/linux/ext4_fs_i.h | 2 +- include/linux/radix-tree.h | 4 ++-- include/linux/security.h | 2 +- include/linux/usb.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h index 4395e5206746..7894dd0f3b77 100644 --- a/include/linux/ext3_fs_i.h +++ b/include/linux/ext3_fs_i.h @@ -54,7 +54,7 @@ struct ext3_block_alloc_info { /* * Was i_next_alloc_goal in ext3_inode_info * is the *physical* companion to i_next_alloc_block. - * it the the physical block number of the block which was most-recentl + * it the physical block number of the block which was most-recentl * allocated to this file. This give us the goal (target) for the next * allocation when we detect linearly ascending requests. */ diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index bb42379cb7fd..d5b177e5b395 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -52,7 +52,7 @@ struct ext4_block_alloc_info { /* * Was i_next_alloc_goal in ext4_inode_info * is the *physical* companion to i_next_alloc_block. - * it the the physical block number of the block which was most-recentl + * it the physical block number of the block which was most-recentl * allocated to this file. This give us the goal (target) for the next * allocation when we detect linearly ascending requests. */ diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0deb842541ac..f9e77d2ee320 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -87,10 +87,10 @@ do { \ * management of their lifetimes must be completely managed by API users. * * For API usage, in general, - * - any function _modifying_ the the tree or tags (inserting or deleting + * - any function _modifying_ the tree or tags (inserting or deleting * items, setting or clearing tags must exclude other modifications, and * exclude any functions reading the tree. - * - any function _reading_ the the tree or tags (looking up items or tags, + * - any function _reading_ the tree or tags (looking up items or tags, * gang lookups) must exclude modifications to the tree, but may occur * concurrently with other readers. * diff --git a/include/linux/security.h b/include/linux/security.h index 47e82c120f9a..9eb9e0fe0331 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -322,7 +322,7 @@ struct request_sock; * @dir contains the inode structure of parent of the new file. * @dentry contains the dentry structure of the new file. * @mode contains the mode of the new file. - * @dev contains the the device number. + * @dev contains the device number. * Return 0 if permission is granted. * @inode_rename: * Check for permission to rename a file or directory. diff --git a/include/linux/usb.h b/include/linux/usb.h index cfbd2bb8fa2c..94bd38a6d947 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -126,7 +126,7 @@ enum usb_interface_condition { * Each interface may have alternate settings. The initial configuration * of a device sets altsetting 0, but the device driver can change * that setting using usb_set_interface(). Alternate settings are often - * used to control the the use of periodic endpoints, such as by having + * used to control the use of periodic endpoints, such as by having * different endpoints use different amounts of reserved USB bandwidth. * All standards-conformant USB devices that use isochronous endpoints * will use them in non-default settings. -- cgit v1.2.3 From 5886269962f94fa9185c32db3ec936c612503235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 9 May 2007 07:51:49 +0200 Subject: fix file specification in comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many files include the filename at the beginning, serveral used a wrong one. Signed-off-by: Uwe Kleine-König Signed-off-by: Adrian Bunk --- include/linux/generic_acl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h index 80764f40be75..886f5faa08cb 100644 --- a/include/linux/generic_acl.h +++ b/include/linux/generic_acl.h @@ -1,5 +1,5 @@ /* - * fs/generic_acl.c + * include/linux/generic_acl.h * * (C) 2005 Andreas Gruenbacher * -- cgit v1.2.3 From 121e70b69aef898a3c02fa90d0a2108381bcf975 Mon Sep 17 00:00:00 2001 From: John Anthony Kazos Jr Date: Wed, 9 May 2007 08:30:57 +0200 Subject: include files: convert "include" subdirectory to UTF-8 Convert the "include" subdirectory to UTF-8. Signed-off-by: John Anthony Kazos Jr. Signed-off-by: Adrian Bunk --- include/linux/i2c-algo-bit.h | 2 +- include/linux/i2c-algo-pcf.h | 2 +- include/linux/irda.h | 2 +- include/linux/meye.h | 2 +- include/linux/sonypi.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h index 9ee0f800592f..111334f5b922 100644 --- a/include/linux/i2c-algo-bit.h +++ b/include/linux/i2c-algo-bit.h @@ -18,7 +18,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* ------------------------------------------------------------------------- */ -/* With some changes from Kyösti Mälkki and even +/* With some changes from Kyösti Mälkki and even Frodo Looijaard */ #ifndef _LINUX_I2C_ALGO_BIT_H diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h index 994eb86f882c..77afbb60fd11 100644 --- a/include/linux/i2c-algo-pcf.h +++ b/include/linux/i2c-algo-pcf.h @@ -19,7 +19,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* ------------------------------------------------------------------------- */ -/* With some changes from Kyösti Mälkki and even +/* With some changes from Kyösti Mälkki and even Frodo Looijaard */ #ifndef _LINUX_I2C_ALGO_PCF_H diff --git a/include/linux/irda.h b/include/linux/irda.h index 09d8f105a5a8..945ba3110874 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/include/linux/meye.h b/include/linux/meye.h index 11ec45e9a132..39fd9c8ddd4b 100644 --- a/include/linux/meye.h +++ b/include/linux/meye.h @@ -3,7 +3,7 @@ * * Copyright (C) 2001-2003 Stelian Pop * - * Copyright (C) 2001-2002 Alcôve + * Copyright (C) 2001-2002 Alcôve * * Copyright (C) 2000 Andrew Tridgell * diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h index f56d24734950..34d4b075f7b8 100644 --- a/include/linux/sonypi.h +++ b/include/linux/sonypi.h @@ -5,7 +5,7 @@ * * Copyright (C) 2005 Narayanan R S - * Copyright (C) 2001-2002 Alcôve + * Copyright (C) 2001-2002 Alcôve * * Copyright (C) 2001 Michael Ashley * -- cgit v1.2.3 From 36200b76008d52d16b170d4f7dae9cfe00f5eb2b Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 3 May 2007 15:58:49 -0400 Subject: [MTD] Remove unnecessary user space check from mtd.h. Since the header file include/linux/mtd/mtd.h is not exported to user space, remove the user space check and error. Signed-off-by: Robert P. J. Day Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 45d482ce8397..12a9a18f6e16 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -9,10 +9,6 @@ #ifndef __MTD_MTD_H__ #define __MTD_MTD_H__ -#ifndef __KERNEL__ -#error This is a kernel header. Perhaps include mtd-user.h instead? -#endif - #include #include #include -- cgit v1.2.3 From 42f209d3c94516affeb5e578fae62925f531a2d9 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Fri, 4 May 2007 15:49:38 -0400 Subject: [MTD] Delete allegedly obsolete "bank_size" field of mtd_info. Delete the allegedly obsolete "bank_size" member of struct mtd_info. Signed-off-by: Robert P. J. Day Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 12a9a18f6e16..fd64ccfbce02 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -133,9 +133,6 @@ struct mtd_info { int numeraseregions; struct mtd_erase_region_info *eraseregions; - /* This really shouldn't be here. It can go away in 2.5 */ - u_int32_t bank_size; - int (*erase) (struct mtd_info *mtd, struct erase_info *instr); /* This stuff for eXecute-In-Place */ -- cgit v1.2.3 From 97416ce82e20a9511ec369822098a8d20998398a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 9 May 2007 02:32:35 -0700 Subject: Declare {compat_}sys_utimensat This is needed before Powerpc can wire up the syscall. Signed-off-by: Stephen Rothwell Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 3 +++ include/linux/syscalls.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index ccd863dd77fa..70a157a130bb 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -253,5 +253,8 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); +asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, + struct compat_timespec __user *t, int flags); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1912c6cbef55..3139f4412297 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -576,6 +576,8 @@ asmlinkage long sys_fstatat64(int dfd, char __user *filename, struct stat64 __user *statbuf, int flag); asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, int bufsiz); +asmlinkage long sys_utimensat(int dfd, char __user *filename, + struct timespec __user *utimes, int flags); asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t); asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, -- cgit v1.2.3 From a3d25c275d383975504dc53c25b691df59bd3c48 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 May 2007 02:33:18 -0700 Subject: PM: Separate hibernation code from suspend code [ With Johannes Berg ] Separate the hibernation (aka suspend to disk code) from the other suspend code. In particular: * Remove the definitions related to hibernation from include/linux/pm.h * Introduce struct hibernation_ops and a new hibernate() function to hibernate the system, defined in include/linux/suspend.h * Separate suspend code in kernel/power/main.c from hibernation-related code in kernel/power/disk.c and kernel/power/user.c (with the help of hibernation_ops) * Switch ACPI (the only user of pm_ops.pm_disk_mode) to hibernation_ops Signed-off-by: Rafael J. Wysocki Cc: Greg KH Cc: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pm.h | 31 +------------------------------ include/linux/suspend.h | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 6e8fa3049e5d..87545e0f0b58 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -107,26 +107,11 @@ typedef int __bitwise suspend_state_t; #define PM_SUSPEND_ON ((__force suspend_state_t) 0) #define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1) #define PM_SUSPEND_MEM ((__force suspend_state_t) 3) -#define PM_SUSPEND_DISK ((__force suspend_state_t) 4) -#define PM_SUSPEND_MAX ((__force suspend_state_t) 5) - -typedef int __bitwise suspend_disk_method_t; - -/* invalid must be 0 so struct pm_ops initialisers can leave it out */ -#define PM_DISK_INVALID ((__force suspend_disk_method_t) 0) -#define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 1) -#define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 2) -#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 3) -#define PM_DISK_TEST ((__force suspend_disk_method_t) 4) -#define PM_DISK_TESTPROC ((__force suspend_disk_method_t) 5) -#define PM_DISK_MAX ((__force suspend_disk_method_t) 6) +#define PM_SUSPEND_MAX ((__force suspend_state_t) 4) /** * struct pm_ops - Callbacks for managing platform dependent suspend states. * @valid: Callback to determine whether the given state can be entered. - * If %CONFIG_SOFTWARE_SUSPEND is set then %PM_SUSPEND_DISK is - * always valid and never passed to this call. If not assigned, - * no suspend states are valid. * Valid states are advertised in /sys/power/state but can still * be rejected by prepare or enter if the conditions aren't right. * There is a %pm_valid_only_mem function available that can be assigned @@ -140,24 +125,12 @@ typedef int __bitwise suspend_disk_method_t; * * @finish: Called when the system has left the given state and all devices * are resumed. The return value is ignored. - * - * @pm_disk_mode: The generic code always allows one of the shutdown methods - * %PM_DISK_SHUTDOWN, %PM_DISK_REBOOT, %PM_DISK_TEST and - * %PM_DISK_TESTPROC. If this variable is set, the mode it is set - * to is allowed in addition to those modes and is also made default. - * When this mode is sent selected, the @prepare call will be called - * before suspending to disk (if present), the @enter call should be - * present and will be called after all state has been saved and the - * machine is ready to be powered off; the @finish callback is called - * after state has been restored. All these calls are called with - * %PM_SUSPEND_DISK as the state. */ struct pm_ops { int (*valid)(suspend_state_t state); int (*prepare)(suspend_state_t state); int (*enter)(suspend_state_t state); int (*finish)(suspend_state_t state); - suspend_disk_method_t pm_disk_mode; }; /** @@ -276,8 +249,6 @@ extern void device_power_up(void); extern void device_resume(void); #ifdef CONFIG_PM -extern suspend_disk_method_t pm_disk_mode; - extern int device_suspend(pm_message_t state); extern int device_prepare_suspend(pm_message_t state); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 9d2aa1a12aa0..d74da9122b60 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -32,6 +32,24 @@ static inline int pm_prepare_console(void) { return 0; } static inline void pm_restore_console(void) {} #endif +/** + * struct hibernation_ops - hibernation platform support + * + * The methods in this structure allow a platform to override the default + * mechanism of shutting down the machine during a hibernation transition. + * + * All three methods must be assigned. + * + * @prepare: prepare system for hibernation + * @enter: shut down system after state has been saved to disk + * @finish: finish/clean up after state has been reloaded + */ +struct hibernation_ops { + int (*prepare)(void); + int (*enter)(void); + void (*finish)(void); +}; + #if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) /* kernel/power/snapshot.c */ extern void __init register_nosave_region(unsigned long, unsigned long); @@ -39,11 +57,17 @@ extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); extern unsigned long get_safe_page(gfp_t gfp_mask); + +extern void hibernation_set_ops(struct hibernation_ops *ops); +extern int hibernate(void); #else static inline void register_nosave_region(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} + +static inline void hibernation_set_ops(struct hibernation_ops *ops) {} +static inline int hibernate(void) { return -ENOSYS; } #endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */ void save_processor_state(void); -- cgit v1.2.3 From dd2a345f8f002845636dbf5d2d768bb5cd8a5f59 Mon Sep 17 00:00:00 2001 From: Dave Gilbert Date: Wed, 9 May 2007 02:33:24 -0700 Subject: Display all possible partitions when the root filesystem failed to mount Display all possible partitions when the root filesystem is not mounted. This helps to track spell'o's and missing drivers. Updated to work with newer kernels. Example output: VFS: Cannot open root device "foobar" or unknown-block(0,0) Please append a correct "root=" boot option; here are the available partitions: 0800 8388608 sda driver: sd 0801 192748 sda1 0802 8193150 sda2 0810 4194304 sdb driver: sd Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) [akpm@linux-foundation.org: cleanups, fix printk warnings] Signed-off-by: Jan Engelhardt Cc: Dave Gilbert Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 2c65da7cabb2..f589559cf070 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -413,6 +413,7 @@ char *disk_name (struct gendisk *hd, int part, char *buf); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern void add_partition(struct gendisk *, int, sector_t, sector_t, int); extern void delete_partition(struct gendisk *, int); +extern void printk_all_partitions(void); extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); -- cgit v1.2.3 From 2f4dfe206a2fc07099dfad77a8ea2f4b4ae2140f Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Wed, 9 May 2007 02:33:25 -0700 Subject: Remove hardcoding of hard_smp_processor_id on UP systems With the advent of kdump, the assumption that the boot CPU when booting an UP kernel is always the CPU with a particular hardware ID (often 0) (usually referred to as BSP on some architectures) is not valid anymore. The reason being that the dump capture kernel boots on the crashed CPU (the CPU that invoked crash_kexec), which may be or may not be that particular CPU. Move definition of hard_smp_processor_id for the UP case to architecture-specific code ("asm/smp.h") where it belongs, so that each architecture can provide its own implementation. Signed-off-by: Fernando Luis Vazquez Cao Cc: "Luck, Tony" Acked-by: Andi Kleen Cc: "Eric W. Biederman" Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ba23ec8211b..3f70149eabbb 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -83,7 +83,6 @@ void smp_prepare_boot_cpu(void); * These macros fold the SMP functionality into a single CPU system */ #define raw_smp_processor_id() 0 -#define hard_smp_processor_id() 0 static inline int up_smp_call_function(void) { return 0; -- cgit v1.2.3 From 8813d1c00ca923c1683da625ff85959be1db9a49 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 9 May 2007 02:33:30 -0700 Subject: mca: add integrated device bus matching The MCA bus has a few "integrated" functions, which are effectively virtual slots on the bus. The problem is that these special functions don't have dedicated pos IDs, so we have to manufacture ids for them outside the pos space ... and these ids can't be matched by the standard matching function, so add a special registration that requests a list of pos ids or a particular integrated function. Signed-off-by: James Bottomley Signed-off-by: Linus Torvalds --- include/linux/mca.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mca.h b/include/linux/mca.h index 5cff2923092b..37972704617f 100644 --- a/include/linux/mca.h +++ b/include/linux/mca.h @@ -94,6 +94,7 @@ struct mca_bus { struct mca_driver { const short *id_table; void *driver_data; + int integrated_id; struct device_driver driver; }; #define to_mca_driver(mdriver) container_of(mdriver, struct mca_driver, driver) @@ -125,6 +126,7 @@ extern enum MCA_AdapterStatus mca_device_status(struct mca_device *mca_dev); extern struct bus_type mca_bus_type; extern int mca_register_driver(struct mca_driver *drv); +extern int mca_register_driver_integrated(struct mca_driver *, int); extern void mca_unregister_driver(struct mca_driver *drv); /* WARNING: only called by the boot time device setup */ -- cgit v1.2.3 From 55c0d1f83e481dd6c77f52f7dcfeb043b8b740fa Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 9 May 2007 02:33:37 -0700 Subject: Move sig_kernel_* et al macros to linux/signal.h This patch moves the sig_kernel_* and related macros from kernel/signal.c to linux/signal.h, and cleans them up slightly. I need the sig_kernel_* macros for default signal behavior in the utrace code, and want to avoid duplication or overhead to share the knowledge. Signed-off-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 125 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 14749056dd63..3fa0fab4a04b 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -243,6 +243,131 @@ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, extern struct kmem_cache *sighand_cachep; +/* + * In POSIX a signal is sent either to a specific thread (Linux task) + * or to the process as a whole (Linux thread group). How the signal + * is sent determines whether it's to one thread or the whole group, + * which determines which signal mask(s) are involved in blocking it + * from being delivered until later. When the signal is delivered, + * either it's caught or ignored by a user handler or it has a default + * effect that applies to the whole thread group (POSIX process). + * + * The possible effects an unblocked signal set to SIG_DFL can have are: + * ignore - Nothing Happens + * terminate - kill the process, i.e. all threads in the group, + * similar to exit_group. The group leader (only) reports + * WIFSIGNALED status to its parent. + * coredump - write a core dump file describing all threads using + * the same mm and then kill all those threads + * stop - stop all the threads in the group, i.e. TASK_STOPPED state + * + * SIGKILL and SIGSTOP cannot be caught, blocked, or ignored. + * Other signals when not blocked and set to SIG_DFL behaves as follows. + * The job control signals also have other special effects. + * + * +--------------------+------------------+ + * | POSIX signal | default action | + * +--------------------+------------------+ + * | SIGHUP | terminate | + * | SIGINT | terminate | + * | SIGQUIT | coredump | + * | SIGILL | coredump | + * | SIGTRAP | coredump | + * | SIGABRT/SIGIOT | coredump | + * | SIGBUS | coredump | + * | SIGFPE | coredump | + * | SIGKILL | terminate(+) | + * | SIGUSR1 | terminate | + * | SIGSEGV | coredump | + * | SIGUSR2 | terminate | + * | SIGPIPE | terminate | + * | SIGALRM | terminate | + * | SIGTERM | terminate | + * | SIGCHLD | ignore | + * | SIGCONT | ignore(*) | + * | SIGSTOP | stop(*)(+) | + * | SIGTSTP | stop(*) | + * | SIGTTIN | stop(*) | + * | SIGTTOU | stop(*) | + * | SIGURG | ignore | + * | SIGXCPU | coredump | + * | SIGXFSZ | coredump | + * | SIGVTALRM | terminate | + * | SIGPROF | terminate | + * | SIGPOLL/SIGIO | terminate | + * | SIGSYS/SIGUNUSED | coredump | + * | SIGSTKFLT | terminate | + * | SIGWINCH | ignore | + * | SIGPWR | terminate | + * | SIGRTMIN-SIGRTMAX | terminate | + * +--------------------+------------------+ + * | non-POSIX signal | default action | + * +--------------------+------------------+ + * | SIGEMT | coredump | + * +--------------------+------------------+ + * + * (+) For SIGKILL and SIGSTOP the action is "always", not just "default". + * (*) Special job control effects: + * When SIGCONT is sent, it resumes the process (all threads in the group) + * from TASK_STOPPED state and also clears any pending/queued stop signals + * (any of those marked with "stop(*)"). This happens regardless of blocking, + * catching, or ignoring SIGCONT. When any stop signal is sent, it clears + * any pending/queued SIGCONT signals; this happens regardless of blocking, + * catching, or ignored the stop signal, though (except for SIGSTOP) the + * default action of stopping the process may happen later or never. + */ + +#ifdef SIGEMT +#define SIGEMT_MASK rt_sigmask(SIGEMT) +#else +#define SIGEMT_MASK 0 +#endif + +#if SIGRTMIN > BITS_PER_LONG +#define rt_sigmask(sig) (1ULL << ((sig)-1)) +#else +#define rt_sigmask(sig) sigmask(sig) +#endif +#define siginmask(sig, mask) (rt_sigmask(sig) & (mask)) + +#define SIG_KERNEL_ONLY_MASK (\ + rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) + +#define SIG_KERNEL_STOP_MASK (\ + rt_sigmask(SIGSTOP) | rt_sigmask(SIGTSTP) | \ + rt_sigmask(SIGTTIN) | rt_sigmask(SIGTTOU) ) + +#define SIG_KERNEL_COREDUMP_MASK (\ + rt_sigmask(SIGQUIT) | rt_sigmask(SIGILL) | \ + rt_sigmask(SIGTRAP) | rt_sigmask(SIGABRT) | \ + rt_sigmask(SIGFPE) | rt_sigmask(SIGSEGV) | \ + rt_sigmask(SIGBUS) | rt_sigmask(SIGSYS) | \ + rt_sigmask(SIGXCPU) | rt_sigmask(SIGXFSZ) | \ + SIGEMT_MASK ) + +#define SIG_KERNEL_IGNORE_MASK (\ + rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ + rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) + +#define sig_kernel_only(sig) \ + (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_ONLY_MASK)) +#define sig_kernel_coredump(sig) \ + (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_COREDUMP_MASK)) +#define sig_kernel_ignore(sig) \ + (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_IGNORE_MASK)) +#define sig_kernel_stop(sig) \ + (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_STOP_MASK)) + +#define sig_needs_tasklist(sig) ((sig) == SIGCONT) + +#define sig_user_defined(t, signr) \ + (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ + ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) + +#define sig_fatal(t, signr) \ + (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ + (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) + #endif /* __KERNEL__ */ #endif /* _LINUX_SIGNAL_H */ -- cgit v1.2.3 From 18d8362d517cb2bd97761294924fe6c2a6ee5e3c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 9 May 2007 02:33:39 -0700 Subject: mutex_lock_interruptible(): add __must_check It's not sane to use mutex_lock_interruptible() and to then ignore the result. Ditto down_interruptible(), but I'm lazy. Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mutex.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index b81bc2adaeff..0d50ea3df689 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -121,11 +121,12 @@ static inline int fastcall mutex_is_locked(struct mutex *lock) * Also see Documentation/mutex-design.txt. */ extern void fastcall mutex_lock(struct mutex *lock); -extern int fastcall mutex_lock_interruptible(struct mutex *lock); +extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock); #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); -extern int mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); +extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, + unsigned int subclass); #else # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) -- cgit v1.2.3 From b89deed32ccc96098bd6bc953c64bba6b847774f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 9 May 2007 02:33:52 -0700 Subject: implement flush_work() A basic problem with flush_scheduled_work() is that it blocks behind _all_ presently-queued works, rather than just the work whcih the caller wants to flush. If the caller holds some lock, and if one of the queued work happens to want that lock as well then accidental deadlocks can occur. One example of this is the phy layer: it wants to flush work while holding rtnl_lock(). But if a linkwatch event happens to be queued, the phy code will deadlock because the linkwatch callback function takes rtnl_lock. So we implement a new function which will flush a *single* work - just the one which the caller wants to free up. Thus we avoid the accidental deadlocks which can arise from unrelated subsystems' callbacks taking shared locks. flush_work() non-blockingly dequeues the work_struct which we want to kill, then it waits for its handler to complete on all CPUs. Add ->current_work to the "struct cpu_workqueue_struct", it points to currently running "struct work_struct". When flush_work(work) detects ->current_work == work, it inserts a barrier at the _head_ of ->worklist (and thus right _after_ that work) and waits for completition. This means that the next work fired on that CPU will be this barrier, or another barrier queued by concurrent flush_work(), so the caller of flush_work() will be woken before any "regular" work has a chance to run. When wait_on_work() unlocks workqueue_mutex (or whatever we choose to protect against CPU hotplug), CPU may go away. But in that case take_over_work() will move a barrier we queued to another CPU, it will be fired sometime, and wait_on_work() will be woken. Actually, we are doing cleanup_workqueue_thread()->kthread_stop() before take_over_work(), so cwq->thread should complete its ->worklist (and thus the barrier), because currently we don't check kthread_should_stop() in run_workqueue(). But even if we did, everything should be ok. [akpm@osdl.org: cleanup] [akpm@osdl.org: add flush_work_keventd() wrapper] Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f16ba1e0687d..26a70992dec8 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -178,6 +178,8 @@ extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delay extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq)); +extern void flush_work(struct workqueue_struct *wq, struct work_struct *work); +extern void flush_work_keventd(struct work_struct *work); extern int FASTCALL(schedule_work(struct work_struct *work)); extern int FASTCALL(run_scheduled_work(struct work_struct *work)); @@ -199,7 +201,7 @@ int execute_in_process_context(work_func_t fn, struct execute_work *); * Kill off a pending schedule_delayed_work(). Note that the work callback * function may still be running on return from cancel_delayed_work(), unless * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or - * cancel_work_sync() to wait on it. + * flush_work() or cancel_work_sync() to wait on it. */ static inline int cancel_delayed_work(struct delayed_work *work) { -- cgit v1.2.3 From 19a75d83ffeab004cfcfac64024ad3997bac7220 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 9 May 2007 02:33:56 -0700 Subject: kblockd: use flush_work Switch the kblockd flushing from a global flush to a more specific flush_work(). (akpm: bypassed maintainers, sorry. There are other patches which depend on this) Cc: "Maciej W. Rozycki" Cc: David Howells Cc: Jens Axboe Cc: Nick Piggin Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a686eabe22d6..db5b00a792f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -854,7 +854,7 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct work_struct *work); -void kblockd_flush(void); +void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From 7c9cb38302e78d24e37f7d8a2ea7eed4ae5f2fa7 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 9 May 2007 02:34:01 -0700 Subject: relay: use plain timer instead of delayed work relay doesn't need to use schedule_delayed_work() for waking readers when a simple timer will do. Signed-off-by: Tom Zanussi Cc: Satyam Sharma Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index 759a0f97bec2..6cd8c4425fc7 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -38,7 +39,7 @@ struct rchan_buf size_t subbufs_consumed; /* count of sub-buffers consumed */ struct rchan *chan; /* associated channel */ wait_queue_head_t read_wait; /* reader wait queue */ - struct delayed_work wake_readers; /* reader wake-up work struct */ + struct timer_list timer; /* reader wake-up timer */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ struct page **page_array; /* array of current buffer pages */ -- cgit v1.2.3 From 6f7cc11aa6c7d5002e16096c7590944daece70ed Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Wed, 9 May 2007 02:34:02 -0700 Subject: Extend notifier_call_chain to count nr_calls made Since 2.6.18-something, the community has been bugged by the problem to provide a clean and a stable mechanism to postpone a cpu-hotplug event as lock_cpu_hotplug was badly broken. This is another proposal towards solving that problem. This one is along the lines of the solution provided in kernel/workqueue.c Instead of having a global mechanism like lock_cpu_hotplug, we allow the subsytems to define their own per-subsystem hot cpu mutexes. These would be taken(released) where ever we are currently calling lock_cpu_hotplug(unlock_cpu_hotplug). Also, in the per-subsystem hotcpu callback function,we take this mutex before we handle any pre-cpu-hotplug events and release it once we finish handling the post-cpu-hotplug events. A standard means for doing this has been provided in [PATCH 2/4] and demonstrated in [PATCH 3/4]. The ordering of these per-subsystem mutexes might still prove to be a problem, but hopefully lockdep should help us get out of that muddle. The patch set to be applied against linux-2.6.19-rc5 is as follows: [PATCH 1/4] : Extend notifier_call_chain with an option to specify the number of notifications to be sent and also count the number of notifications actually sent. [PATCH 2/4] : Define events CPU_LOCK_ACQUIRE and CPU_LOCK_RELEASE and send out notifications for these in _cpu_up and _cpu_down. This would help us standardise the acquire and release of the subsystem locks in the hotcpu callback functions of these subsystems. [PATCH 3/4] : Eliminate lock_cpu_hotplug from kernel/sched.c. [PATCH 4/4] : In workqueue_cpu_callback function, acquire(release) the workqueue_mutex while handling CPU_LOCK_ACQUIRE(CPU_LOCK_RELEASE). If the per-subsystem-locking approach survives the test of time, we can expect a slow phasing out of lock_cpu_hotplug, which has not yet been eliminated in these patches :) This patch: Provide notifier_call_chain with an option to call only a specified number of notifiers and also record the number of call to notifiers made. The need for this enhancement was identified in the post entitled "Slab - Eliminate lock_cpu_hotplug from slab" (http://lkml.org/lkml/2006/10/28/92) by Ravikiran G Thirumalai and Andrew Morton. This patch adds two additional parameters to notifier_call_chain API namely - int nr_to_calls : Number of notifier_functions to be called. The don't care value is -1. - unsigned int *nr_calls : Records the total number of notifier_funtions called by notifier_call_chain. The don't care value is NULL. [michal.k.k.piotrowski@gmail.com: build fix] Credit: Andrew Morton Signed-off-by: Gautham R Shenoy Signed-off-by: Michal Piotrowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/notifier.h | 52 ++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 10a43ed0527e..e34221bf8946 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -112,32 +112,40 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #ifdef __KERNEL__ -extern int atomic_notifier_chain_register(struct atomic_notifier_head *, - struct notifier_block *); -extern int blocking_notifier_chain_register(struct blocking_notifier_head *, - struct notifier_block *); -extern int raw_notifier_chain_register(struct raw_notifier_head *, - struct notifier_block *); -extern int srcu_notifier_chain_register(struct srcu_notifier_head *, - struct notifier_block *); - -extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *, - struct notifier_block *); -extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *, - struct notifier_block *); -extern int raw_notifier_chain_unregister(struct raw_notifier_head *, - struct notifier_block *); -extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *, - struct notifier_block *); - -extern int atomic_notifier_call_chain(struct atomic_notifier_head *, +extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, + struct notifier_block *nb); +extern int blocking_notifier_chain_register(struct blocking_notifier_head *nh, + struct notifier_block *nb); +extern int raw_notifier_chain_register(struct raw_notifier_head *nh, + struct notifier_block *nb); +extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, + struct notifier_block *nb); + +extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, + struct notifier_block *nb); +extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, + struct notifier_block *nb); +extern int raw_notifier_chain_unregister(struct raw_notifier_head *nh, + struct notifier_block *nb); +extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, + struct notifier_block *nb); + +extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh, unsigned long val, void *v); -extern int blocking_notifier_call_chain(struct blocking_notifier_head *, +extern int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v, int nr_to_call, int *nr_calls); +extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh, unsigned long val, void *v); -extern int raw_notifier_call_chain(struct raw_notifier_head *, +extern int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, + unsigned long val, void *v, int nr_to_call, int *nr_calls); +extern int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v); -extern int srcu_notifier_call_chain(struct srcu_notifier_head *, +extern int __raw_notifier_call_chain(struct raw_notifier_head *nh, + unsigned long val, void *v, int nr_to_call, int *nr_calls); +extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, unsigned long val, void *v); +extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, + unsigned long val, void *v, int nr_to_call, int *nr_calls); #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ -- cgit v1.2.3 From baaca49f415b25fdbe2a8f3c22b39929e450fbfd Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Wed, 9 May 2007 02:34:03 -0700 Subject: Define and use new events,CPU_LOCK_ACQUIRE and CPU_LOCK_RELEASE This is an attempt to provide an alternate mechanism for postponing a hotplug event instead of using a global mechanism like lock_cpu_hotplug. The proposal is to add two new events namely CPU_LOCK_ACQUIRE and CPU_LOCK_RELEASE. The notification for these two events would be sent out before and after a cpu_hotplug event respectively. During the CPU_LOCK_ACQUIRE event, a cpu-hotplug-aware subsystem is supposed to acquire any per-subsystem hotcpu mutex ( Eg. workqueue_mutex in kernel/workqueue.c ). During the CPU_LOCK_RELEASE release event the cpu-hotplug-aware subsystem is supposed to release the per-subsystem hotcpu mutex. The reasons for defining new events as opposed to reusing the existing events like CPU_UP_PREPARE/CPU_UP_FAILED/CPU_ONLINE for locking/unlocking of per-subsystem hotcpu mutexes are as follow: - CPU_LOCK_ACQUIRE: All hotcpu mutexes are taken before subsystems start handling pre-hotplug events like CPU_UP_PREPARE/CPU_DOWN_PREPARE etc, thus ensuring a clean handling of these events. - CPU_LOCK_RELEASE: The hotcpu mutexes will be released only after all subsystems have handled post-hotplug events like CPU_DOWN_FAILED, CPU_DEAD,CPU_ONLINE etc thereby ensuring that there are no subsequent clashes amongst the interdependent subsystems after a cpu hotplugs. This patch also uses __raw_notifier_call chain in _cpu_up to take care of the dependency between the two consequetive calls to raw_notifier_call_chain. [akpm@linux-foundation.org: fix a bug] Signed-off-by: Gautham R Shenoy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/notifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index e34221bf8946..1903e5490c04 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -194,6 +194,8 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, #define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ +#define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */ +#define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */ #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ -- cgit v1.2.3 From 7097a87afe937a5879528d52880c2d95f089e96c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 9 May 2007 02:34:10 -0700 Subject: workqueue: kill run_scheduled_work() Because it has no callers. Actually, I think the whole idea of run_scheduled_work() was not right, not good to mix "unqueue this work and execute its ->func()" in one function. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 26a70992dec8..2a58f16e1961 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -182,7 +182,6 @@ extern void flush_work(struct workqueue_struct *wq, struct work_struct *work); extern void flush_work_keventd(struct work_struct *work); extern int FASTCALL(schedule_work(struct work_struct *work)); -extern int FASTCALL(run_scheduled_work(struct work_struct *work)); extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay)); extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); -- cgit v1.2.3 From 1634c48f8b85dcb05101f1eb2eab9af40b5976da Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 9 May 2007 02:34:18 -0700 Subject: make cancel_rearming_delayed_work() work on any workqueue, not just keventd_wq cancel_rearming_delayed_workqueue(wq, dwork) doesn't need the first parameter. We don't hang on un-queued dwork any longer, and work->data doesn't change its type. This means we can always figure out "wq" from dwork when it is needed. Remove this parameter, and rename the function to cancel_rearming_delayed_work(). Re-create an inline "obsolete" cancel_rearming_delayed_workqueue(wq) which just calls cancel_rearming_delayed_work(). Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2a58f16e1961..27110c04f21e 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -191,9 +191,6 @@ extern int current_is_keventd(void); extern int keventd_up(void); extern void init_workqueues(void); -void cancel_rearming_delayed_work(struct delayed_work *work); -void cancel_rearming_delayed_workqueue(struct workqueue_struct *, - struct delayed_work *); int execute_in_process_context(work_func_t fn, struct execute_work *); /* @@ -212,4 +209,14 @@ static inline int cancel_delayed_work(struct delayed_work *work) return ret; } +extern void cancel_rearming_delayed_work(struct delayed_work *work); + +/* Obsolete. use cancel_rearming_delayed_work() */ +static inline +void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, + struct delayed_work *work) +{ + cancel_rearming_delayed_work(work); +} + #endif -- cgit v1.2.3 From 23b2e5991afde5af91a1a661d7f47ee56120759e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 9 May 2007 02:34:19 -0700 Subject: workqueue: kill NOAUTOREL works We don't have any users, and it is not so trivial to use NOAUTOREL works correctly. It is better to simplify API. Delete NOAUTOREL support and rename work_release to work_clear_pending to avoid a confusion. Signed-off-by: Oleg Nesterov Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 64 +++++++---------------------------------------- 1 file changed, 9 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 27110c04f21e..e1581dce5890 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -24,15 +24,13 @@ typedef void (*work_func_t)(struct work_struct *work); struct work_struct { atomic_long_t data; #define WORK_STRUCT_PENDING 0 /* T if work item pending execution */ -#define WORK_STRUCT_NOAUTOREL 1 /* F if work item automatically released on exec */ #define WORK_STRUCT_FLAG_MASK (3UL) #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) struct list_head entry; work_func_t func; }; -#define WORK_DATA_INIT(autorelease) \ - ATOMIC_LONG_INIT((autorelease) << WORK_STRUCT_NOAUTOREL) +#define WORK_DATA_INIT() ATOMIC_LONG_INIT(0) struct delayed_work { struct work_struct work; @@ -44,14 +42,8 @@ struct execute_work { }; #define __WORK_INITIALIZER(n, f) { \ - .data = WORK_DATA_INIT(0), \ - .entry = { &(n).entry, &(n).entry }, \ - .func = (f), \ - } - -#define __WORK_INITIALIZER_NAR(n, f) { \ - .data = WORK_DATA_INIT(1), \ - .entry = { &(n).entry, &(n).entry }, \ + .data = WORK_DATA_INIT(), \ + .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ } @@ -60,23 +52,12 @@ struct execute_work { .timer = TIMER_INITIALIZER(NULL, 0, 0), \ } -#define __DELAYED_WORK_INITIALIZER_NAR(n, f) { \ - .work = __WORK_INITIALIZER_NAR((n).work, (f)), \ - .timer = TIMER_INITIALIZER(NULL, 0, 0), \ - } - #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) -#define DECLARE_WORK_NAR(n, f) \ - struct work_struct n = __WORK_INITIALIZER_NAR(n, f) - #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) -#define DECLARE_DELAYED_WORK_NAR(n, f) \ - struct dwork_struct n = __DELAYED_WORK_INITIALIZER_NAR(n, f) - /* * initialize a work item's function pointer */ @@ -95,16 +76,9 @@ struct execute_work { * assignment of the work data initializer allows the compiler * to generate better code. */ -#define INIT_WORK(_work, _func) \ - do { \ - (_work)->data = (atomic_long_t) WORK_DATA_INIT(0); \ - INIT_LIST_HEAD(&(_work)->entry); \ - PREPARE_WORK((_work), (_func)); \ - } while (0) - -#define INIT_WORK_NAR(_work, _func) \ +#define INIT_WORK(_work, _func) \ do { \ - (_work)->data = (atomic_long_t) WORK_DATA_INIT(1); \ + (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ PREPARE_WORK((_work), (_func)); \ } while (0) @@ -115,12 +89,6 @@ struct execute_work { init_timer(&(_work)->timer); \ } while (0) -#define INIT_DELAYED_WORK_NAR(_work, _func) \ - do { \ - INIT_WORK_NAR(&(_work)->work, (_func)); \ - init_timer(&(_work)->timer); \ - } while (0) - #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ @@ -143,24 +111,10 @@ struct execute_work { work_pending(&(w)->work) /** - * work_release - Release a work item under execution - * @work: The work item to release - * - * This is used to release a work item that has been initialised with automatic - * release mode disabled (WORK_STRUCT_NOAUTOREL is set). This gives the work - * function the opportunity to grab auxiliary data from the container of the - * work_struct before clearing the pending bit as the work_struct may be - * subject to deallocation the moment the pending bit is cleared. - * - * In such a case, this should be called in the work function after it has - * fetched any data it may require from the containter of the work_struct. - * After this function has been called, the work_struct may be scheduled for - * further execution or it may be deallocated unless other precautions are - * taken. - * - * This should also be used to release a delayed work item. + * work_clear_pending - for internal use only, mark a work item as not pending + * @work: The work item in question */ -#define work_release(work) \ +#define work_clear_pending(work) \ clear_bit(WORK_STRUCT_PENDING, work_data_bits(work)) @@ -205,7 +159,7 @@ static inline int cancel_delayed_work(struct delayed_work *work) ret = del_timer(&work->timer); if (ret) - work_release(&work->work); + work_clear_pending(&work->work); return ret; } -- cgit v1.2.3 From 28e53bddf814485699a4142bc056fd37d4e11dd4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 9 May 2007 02:34:22 -0700 Subject: unify flush_work/flush_work_keventd and rename it to cancel_work_sync flush_work(wq, work) doesn't need the first parameter, we can use cwq->wq (this was possible from the very beginnig, I missed this). So we can unify flush_work_keventd and flush_work. Also, rename flush_work() to cancel_work_sync() and fix all callers. Perhaps this is not the best name, but "flush_work" is really bad. (akpm: this is why the earlier patches bypassed maintainers) Signed-off-by: Oleg Nesterov Cc: Jeff Garzik Cc: "David S. Miller" Cc: Jens Axboe Cc: Tejun Heo Cc: Auke Kok , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index e1581dce5890..d555f31c0746 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -128,30 +128,33 @@ extern struct workqueue_struct *__create_workqueue(const char *name, extern void destroy_workqueue(struct workqueue_struct *wq); extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work)); -extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay)); +extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, + struct delayed_work *work, unsigned long delay)); extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, - struct delayed_work *work, unsigned long delay); + struct delayed_work *work, unsigned long delay); + extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq)); -extern void flush_work(struct workqueue_struct *wq, struct work_struct *work); -extern void flush_work_keventd(struct work_struct *work); +extern void flush_scheduled_work(void); extern int FASTCALL(schedule_work(struct work_struct *work)); -extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay)); - -extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); +extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, + unsigned long delay)); +extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, + unsigned long delay); extern int schedule_on_each_cpu(work_func_t func); -extern void flush_scheduled_work(void); extern int current_is_keventd(void); extern int keventd_up(void); extern void init_workqueues(void); int execute_in_process_context(work_func_t fn, struct execute_work *); +extern void cancel_work_sync(struct work_struct *work); + /* * Kill off a pending schedule_delayed_work(). Note that the work callback * function may still be running on return from cancel_delayed_work(), unless * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or - * flush_work() or cancel_work_sync() to wait on it. + * cancel_work_sync() to wait on it. */ static inline int cancel_delayed_work(struct delayed_work *work) { -- cgit v1.2.3 From 73c279927f89561ecb45b2dfdf9314bafcfd9f67 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 May 2007 02:34:32 -0700 Subject: kthread: don't depend on work queues Currently there is a circular reference between work queue initialization and kthread initialization. This prevents the kthread infrastructure from initializing until after work queues have been initialized. We want the properties of tasks created with kthread_create to be as close as possible to the init_task and to not be contaminated by user processes. The later we start our kthreadd that creates these tasks the harder it is to avoid contamination from user processes and the more of a mess we have to clean up because the defaults have changed on us. So this patch modifies the kthread support to not use work queues but to instead use a simple list of structures, and to have kthreadd start from init_task immediately after our kernel thread that execs /sbin/init. By being a true child of init_task we only have to change those process settings that we want to have different from init_task, such as our process name, the cpus that are allowed, blocking all signals and setting SIGCHLD to SIG_IGN so that all of our children are reaped automatically. By being a true child of init_task we also naturally get our ppid set to 0 and do not wind up as a child of PID == 1. Ensuring that tasks generated by kthread_create will not slow down the functioning of the wait family of functions. [akpm@linux-foundation.org: use interruptible sleeps] Signed-off-by: Eric W. Biederman Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 1c65e7a9f186..00dd957e245b 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -30,4 +30,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu); int kthread_stop(struct task_struct *k); int kthread_should_stop(void); +int kthreadd(void *unused); +extern struct task_struct *kthreadd_task; + #endif /* _LINUX_KTHREAD_H */ -- cgit v1.2.3 From 10ab825bdef8df510f99c703a5a2d9b13a4e31a5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 9 May 2007 02:34:37 -0700 Subject: change kernel threads to ignore signals instead of blocking them Currently kernel threads use sigprocmask(SIG_BLOCK) to protect against signals. This doesn't prevent the signal delivery, this only blocks signal_wake_up(). Every "killall -33 kthreadd" means a "struct siginfo" leak. Change kthreadd_setup() to set all handlers to SIG_IGN instead of blocking them (make a new helper ignore_signals() for that). If the kernel thread needs some signal, it should use allow_signal() anyway, and in that case it should not use CLONE_SIGHAND. Note that we can't change daemonize() (should die!) in the same way, because it can be used along with CLONE_SIGHAND. This means that allow_signal() still should unblock the signal to work correctly with daemonize()ed threads. However, disallow_signal() doesn't block the signal any longer but ignores it. NOTE: with or without this patch the kernel threads are not protected from handle_stop_signal(), this seems harmless, but not good. Signed-off-by: Oleg Nesterov Acked-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d95c480f58d..28000b1658f9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1317,6 +1317,7 @@ extern int in_egroup_p(gid_t); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); +extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); -- cgit v1.2.3 From 8842c9655b2b7f0e8e6c50a773b649e5d8a57678 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 9 May 2007 02:34:46 -0700 Subject: remove nfs4_acl_add_ace() nfs4_acl_add_ace() can now be removed. Signed-off-by: Adrian Bunk Acked-by: Neil Brown Acked-by: J. Bruce Fields Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfs4_acl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs4_acl.h b/include/linux/nfs4_acl.h index 409b6e02f337..c9c05a78e9bb 100644 --- a/include/linux/nfs4_acl.h +++ b/include/linux/nfs4_acl.h @@ -44,7 +44,6 @@ #define NFS4_ACL_MAX 170 struct nfs4_acl *nfs4_acl_new(int); -void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); int nfs4_acl_get_whotype(char *, u32); int nfs4_acl_write_who(int who, char *p); int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, -- cgit v1.2.3 From 7ac1bea5507218da03f6005d228789da5a831c3f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 May 2007 02:34:48 -0700 Subject: knfsd: rename sk_defer_lock to sk_lock Now that sk_defer_lock protects two different things, make the name more generic. Also don't bother with disabling _bh as the lock is only ever taken from process context. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/svcsock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 7909687557bf..e21dd93ac4b7 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -37,7 +37,8 @@ struct svc_sock { atomic_t sk_reserved; /* space on outq that is reserved */ - spinlock_t sk_defer_lock; /* protects sk_deferred */ + spinlock_t sk_lock; /* protects sk_deferred and + * sk_info_authunix */ struct list_head sk_deferred; /* deferred requests that need to * be revisted */ struct mutex sk_mutex; /* to serialize sending data */ -- cgit v1.2.3 From cd123012d99fde4759500fee611e724e4f3016e3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 9 May 2007 02:34:50 -0700 Subject: RPC: add wrapper for svc_reserve to account for checksum When the kernel calls svc_reserve to downsize the expected size of an RPC reply, it fails to account for the possibility of a checksum at the end of the packet. If a client mounts a NFSv2/3 with sec=krb5i/p, and does I/O then you'll generally see messages similar to this in the server's ring buffer: RPC request reserved 164 but used 208 While I was never able to verify it, I suspect that this problem is also the root cause of some oopses I've seen under these conditions: https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=227726 This is probably also a problem for other sec= types and for NFSv4. The large reserved size for NFSv4 compound packets seems to generally paper over the problem, however. This patch adds a wrapper for svc_reserve that accounts for the possibility of a checksum. It also fixes up the appropriate callers of svc_reserve to call the wrapper. For now, it just uses a hardcoded value that I determined via testing. That value may need to be revised upward as things change, or we may want to eventually add a new auth_op that attempts to calculate this somehow. Unfortunately, there doesn't seem to be a good way to reliably determine the expected checksum length prior to actually calculating it, particularly with schemes like spkm3. Signed-off-by: Jeff Layton Acked-by: Neil Brown Cc: Trond Myklebust Acked-by: J. Bruce Fields Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/svc.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 35fa4d5aadd0..4a7ae8ab6eb8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -396,4 +396,23 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t); #define RPC_MAX_ADDRBUFLEN (63U) +/* + * When we want to reduce the size of the reserved space in the response + * buffer, we need to take into account the size of any checksum data that + * may be at the end of the packet. This is difficult to determine exactly + * for all cases without actually generating the checksum, so we just use a + * static value. + */ +static inline void +svc_reserve_auth(struct svc_rqst *rqstp, int space) +{ + int added_space = 0; + + switch(rqstp->rq_authop->flavour) { + case RPC_AUTH_GSS: + added_space = RPC_MAX_AUTH_SIZE; + } + return svc_reserve(rqstp, space + added_space); +} + #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3 From b8522ead3534c6cd06752b47a3bc380956191a2a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 9 May 2007 02:34:58 -0700 Subject: aio is unlikely Stick an unlikely() around is_aio(): I assert that most IO is synchronous. Cc: Suparna Bhattacharya Cc: Ingo Molnar Cc: Benjamin LaHaise Cc: Zach Brown Cc: Ulrich Drepper Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index a30ef13c9e62..43dc2ebfaa0e 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -226,7 +226,8 @@ int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, __put_ioctx(kioctx); \ } while (0) -#define in_aio() !is_sync_wait(current->io_wait) +#define in_aio() (unlikely(!is_sync_wait(current->io_wait))) + /* may be used for debugging */ #define warn_if_async() \ do { \ -- cgit v1.2.3 From f34c506b0385b43abd25c490335036ecbb173aed Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 9 May 2007 02:34:59 -0700 Subject: declare struct ktime Some smarty went and inflicted ktime_t as a typedef upon us, so we cannot forward declare it. Create a new `union ktime', map ktime_t onto that. Now we need to kill off this ktime_t thing. Cc: Ingo Molnar Cc: Thomas Gleixner Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 81bb9c7a4eb3..c762954bda14 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -43,7 +43,7 @@ * plain scalar nanosecond based representation can be selected by the * config switch CONFIG_KTIME_SCALAR. */ -typedef union { +union ktime { s64 tv64; #if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR) struct { @@ -54,7 +54,9 @@ typedef union { # endif } tv; #endif -} ktime_t; +}; + +typedef union ktime ktime_t; /* Kill this */ #define KTIME_MAX ((s64)~((u64)1 << 63)) #if (BITS_PER_LONG == 64) -- cgit v1.2.3 From c19384b5b296905d4988c7c684ff540a0f9d65be Mon Sep 17 00:00:00 2001 From: Pierre Peiffer Date: Wed, 9 May 2007 02:35:02 -0700 Subject: Make futex_wait() use an hrtimer for timeout This patch modifies futex_wait() to use an hrtimer + schedule() in place of schedule_timeout(). schedule_timeout() is tick based, therefore the timeout granularity is the tick (1 ms, 4 ms or 10 ms depending on HZ). By using a high resolution timer for timeout wakeup, we can attain a much finer timeout granularity (in the microsecond range). This parallels what is already done for futex_lock_pi(). The timeout passed to the syscall is no longer converted to jiffies and is therefore passed to do_futex() and futex_wait() as an absolute ktime_t therefore keeping nanosecond resolution. Also this removes the need to pass the nanoseconds timeout part to futex_lock_pi() in val2. In futex_wait(), if there is no timeout then a regular schedule() is performed. Otherwise, an hrtimer is fired before schedule() is called. [akpm@linux-foundation.org: fix `make headers_check'] Signed-off-by: Sebastien Dugue Signed-off-by: Pierre Peiffer Cc: Ingo Molnar Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 820125c628c1..34e54f2b8997 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -3,6 +3,8 @@ #include +union ktime; + /* Second argument to futex syscall */ @@ -94,7 +96,7 @@ struct robust_list_head { #define ROBUST_LIST_LIMIT 2048 #ifdef __KERNEL__ -long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, +long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, u32 __user *uaddr2, u32 val2, u32 val3); extern int -- cgit v1.2.3 From d0aa7a70bf03b9de9e995ab272293be1f7937822 Mon Sep 17 00:00:00 2001 From: Pierre Peiffer Date: Wed, 9 May 2007 02:35:02 -0700 Subject: futex_requeue_pi optimization This patch provides the futex_requeue_pi functionality, which allows some threads waiting on a normal futex to be requeued on the wait-queue of a PI-futex. This provides an optimization, already used for (normal) futexes, to be used with the PI-futexes. This optimization is currently used by the glibc in pthread_broadcast, when using "normal" mutexes. With futex_requeue_pi, it can be used with PRIO_INHERIT mutexes too. Signed-off-by: Pierre Peiffer Cc: Ingo Molnar Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 34e54f2b8997..1bd8dfcb037b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -17,6 +17,7 @@ union ktime; #define FUTEX_LOCK_PI 6 #define FUTEX_UNLOCK_PI 7 #define FUTEX_TRYLOCK_PI 8 +#define FUTEX_CMP_REQUEUE_PI 9 /* * Support for robust futexes: the kernel cleans up held futexes at @@ -84,10 +85,15 @@ struct robust_list_head { */ #define FUTEX_OWNER_DIED 0x40000000 +/* + * Some processes have been requeued on this PI-futex + */ +#define FUTEX_WAITER_REQUEUED 0x20000000 + /* * The rest of the robust-futex field is for the TID: */ -#define FUTEX_TID_MASK 0x3fffffff +#define FUTEX_TID_MASK 0x0fffffff /* * This limit protects against a deliberately circular list. @@ -111,6 +117,7 @@ handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); * We set bit 0 to indicate if it's an inode-based key. */ union futex_key { + u32 __user *uaddr; struct { unsigned long pgoff; struct inode *inode; -- cgit v1.2.3 From 34f01cc1f512fa783302982776895c73714ebbc2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 May 2007 02:35:04 -0700 Subject: FUTEX: new PRIVATE futexes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Analysis of current linux futex code : -------------------------------------- A central hash table futex_queues[] holds all contexts (futex_q) of waiting threads. Each futex_wait()/futex_wait() has to obtain a spinlock on a hash slot to perform lookups or insert/deletion of a futex_q. When a futex_wait() is done, calling thread has to : 1) - Obtain a read lock on mmap_sem to be able to validate the user pointer (calling find_vma()). This validation tells us if the futex uses an inode based store (mapped file), or mm based store (anonymous mem) 2) - compute a hash key 3) - Atomic increment of reference counter on an inode or a mm_struct 4) - lock part of futex_queues[] hash table 5) - perform the test on value of futex. (rollback is value != expected_value, returns EWOULDBLOCK) (various loops if test triggers mm faults) 6) queue the context into hash table, release the lock got in 4) 7) - release the read_lock on mmap_sem 8) Eventually unqueue the context (but rarely, as this part  may be done by the futex_wake()) Futexes were designed to improve scalability but current implementation has various problems : - Central hashtable : This means scalability problems if many processes/threads want to use futexes at the same time. This means NUMA unbalance because this hashtable is located on one node. - Using mmap_sem on every futex() syscall : Even if mmap_sem is a rw_semaphore, up_read()/down_read() are doing atomic ops on mmap_sem, dirtying cache line : - lot of cache line ping pongs on SMP configurations. mmap_sem is also extensively used by mm code (page faults, mmap()/munmap()) Highly threaded processes might suffer from mmap_sem contention. mmap_sem is also used by oprofile code. Enabling oprofile hurts threaded programs because of contention on the mmap_sem cache line. - Using an atomic_inc()/atomic_dec() on inode ref counter or mm ref counter: It's also a cache line ping pong on SMP. It also increases mmap_sem hold time because of cache misses. Most of these scalability problems come from the fact that futexes are in one global namespace. As we use a central hash table, we must make sure they are all using the same reference (given by the mm subsystem). We chose to force all futexes be 'shared'. This has a cost. But fact is POSIX defined PRIVATE and SHARED, allowing clear separation, and optimal performance if carefuly implemented. Time has come for linux to have better threading performance. The goal is to permit new futex commands to avoid : - Taking the mmap_sem semaphore, conflicting with other subsystems. - Modifying a ref_count on mm or an inode, still conflicting with mm or fs. This is possible because, for one process using PTHREAD_PROCESS_PRIVATE futexes, we only need to distinguish futexes by their virtual address, no matter the underlying mm storage is. If glibc wants to exploit this new infrastructure, it should use new _PRIVATE futex subcommands for PTHREAD_PROCESS_PRIVATE futexes. And be prepared to fallback on old subcommands for old kernels. Using one global variable with the FUTEX_PRIVATE_FLAG or 0 value should be OK. PTHREAD_PROCESS_SHARED futexes should still use the old subcommands. Compatibility with old applications is preserved, they still hit the scalability problems, but new applications can fly :) Note : the same SHARED futex (mapped on a file) can be used by old binaries *and* new binaries, because both binaries will use the old subcommands. Note : Vast majority of futexes should be using PROCESS_PRIVATE semantic, as this is the default semantic. Almost all applications should benefit of this changes (new kernel and updated libc) Some bench results on a Pentium M 1.6 GHz (SMP kernel on a UP machine) /* calling futex_wait(addr, value) with value != *addr */ 433 cycles per futex(FUTEX_WAIT) call (mixing 2 futexes) 424 cycles per futex(FUTEX_WAIT) call (using one futex) 334 cycles per futex(FUTEX_WAIT_PRIVATE) call (mixing 2 futexes) 334 cycles per futex(FUTEX_WAIT_PRIVATE) call (using one futex) For reference : 187 cycles per getppid() call 188 cycles per umask() call 181 cycles per ni_syscall() call Signed-off-by: Eric Dumazet Pierre Peiffer Cc: "Ulrich Drepper" Cc: "Nick Piggin" Cc: "Ingo Molnar" Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 1bd8dfcb037b..899fc7f20edd 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -19,6 +19,18 @@ union ktime; #define FUTEX_TRYLOCK_PI 8 #define FUTEX_CMP_REQUEUE_PI 9 +#define FUTEX_PRIVATE_FLAG 128 +#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG + +#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) +#define FUTEX_REQUEUE_PRIVATE (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG) +#define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG) +#define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. @@ -114,8 +126,18 @@ handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); * Don't rearrange members without looking at hash_futex(). * * offset is aligned to a multiple of sizeof(u32) (== 4) by definition. - * We set bit 0 to indicate if it's an inode-based key. - */ + * We use the two low order bits of offset to tell what is the kind of key : + * 00 : Private process futex (PTHREAD_PROCESS_PRIVATE) + * (no reference on an inode or mm) + * 01 : Shared futex (PTHREAD_PROCESS_SHARED) + * mapped on a file (reference on the underlying inode) + * 10 : Shared futex (PTHREAD_PROCESS_SHARED) + * (but private mapping on an mm, and reference taken on it) +*/ + +#define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */ +#define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */ + union futex_key { u32 __user *uaddr; struct { @@ -134,7 +156,8 @@ union futex_key { int offset; } both; }; -int get_futex_key(u32 __user *uaddr, union futex_key *key); +int get_futex_key(u32 __user *uaddr, struct rw_semaphore *shared, + union futex_key *key); void get_futex_key_refs(union futex_key *key); void drop_futex_key_refs(union futex_key *key); -- cgit v1.2.3 From 01f2705daf5a36208e69d7cf95db9c330f843af6 Mon Sep 17 00:00:00 2001 From: Nate Diller Date: Wed, 9 May 2007 02:35:07 -0700 Subject: fs: convert core functions to zero_user_page It's very common for file systems to need to zero part or all of a page, the simplist way is just to use kmap_atomic() and memset(). There's actually a library function in include/linux/highmem.h that does exactly that, but it's confusingly named memclear_highpage_flush(), which is descriptive of *how* it does the work rather than what the *purpose* is. So this patchset renames the function to zero_user_page(), and calls it from the various places that currently open code it. This first patch introduces the new function call, and converts all the core kernel callsites, both the open-coded ones and the old memclear_highpage_flush() ones. Following this patch is a series of conversions for each file system individually, per AKPM, and finally a patch deprecating the old call. The diffstat below shows the entire patchset. [akpm@linux-foundation.org: fix a few things] Signed-off-by: Nate Diller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index a515eb0afdfb..b5f2ab42d984 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -94,17 +94,27 @@ static inline void clear_highpage(struct page *page) /* * Same but also flushes aliased cache contents to RAM. + * + * This must be a macro because KM_USER0 and friends aren't defined if + * !CONFIG_HIGHMEM */ -static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) +#define zero_user_page(page, offset, size, km_type) \ + do { \ + void *kaddr; \ + \ + BUG_ON((offset) + (size) > PAGE_SIZE); \ + \ + kaddr = kmap_atomic(page, km_type); \ + memset((char *)kaddr + (offset), 0, (size)); \ + flush_dcache_page(page); \ + kunmap_atomic(kaddr, (km_type)); \ + } while (0) + + +static inline void memclear_highpage_flush(struct page *page, + unsigned int offset, unsigned int size) { - void *kaddr; - - BUG_ON(offset + size > PAGE_SIZE); - - kaddr = kmap_atomic(page, KM_USER0); - memset((char *)kaddr + offset, 0, size); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, size, KM_USER0); } #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE -- cgit v1.2.3 From f37bc2712b54ec641e0c0c8634f1a4b61d9956c0 Mon Sep 17 00:00:00 2001 From: Nate Diller Date: Wed, 9 May 2007 02:35:09 -0700 Subject: fs: deprecate memclear_highpage_flush Now that all the in-tree users are converted over to zero_user_page(), deprecate the old memclear_highpage_flush() call. Signed-off-by: Nate Diller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b5f2ab42d984..98e2cce996a4 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -110,8 +110,7 @@ static inline void clear_highpage(struct page *page) kunmap_atomic(kaddr, (km_type)); \ } while (0) - -static inline void memclear_highpage_flush(struct page *page, +static inline void __deprecated memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) { zero_user_page(page, offset, size, KM_USER0); -- cgit v1.2.3 From 8bb7844286fb8c9fce6f65d8288aeb09d03a5e0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 May 2007 02:35:10 -0700 Subject: Add suspend-related notifications for CPU hotplug Since nonboot CPUs are now disabled after tasks and devices have been frozen and the CPU hotplug infrastructure is used for this purpose, we need special CPU hotplug notifications that will help the CPU-hotplug-aware subsystems distinguish normal CPU hotplug events from CPU hotplug events related to a system-wide suspend or resume operation in progress. This patch introduces such notifications and causes them to be used during suspend and resume transitions. It also changes all of the CPU-hotplug-aware subsystems to take these notifications into consideration (for now they are handled in the same way as the corresponding "normal" ones). [oleg@tv-sign.ru: cleanups] Signed-off-by: Rafael J. Wysocki Cc: Gautham R Shenoy Cc: Pavel Machek Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/notifier.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 1903e5490c04..9431101bf876 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -197,5 +197,17 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, #define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */ #define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */ +/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend + * operation in progress + */ +#define CPU_TASKS_FROZEN 0x0010 + +#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) +#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) +#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) +#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) +#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) +#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) + #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ -- cgit v1.2.3 From d1187ed21026fd512b87851d0ca26d9ae16f9059 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2007 02:35:12 -0700 Subject: vmstat: use our own timer events vmstat is currently using the cache reaper to periodically bring the statistics up to date. The cache reaper does only exists in SLUB as a way to provide compatibility with SLAB. This patch removes the vmstat calls from the slab allocators and provides its own handling. The advantage is also that we can use a different frequency for the updates. Refreshing vm stats is a pretty fast job so we can run this every second and stagger this by only one tick. This will lead to some overlap in large systems. F.e a system running at 250 HZ with 1024 processors will have 4 vm updates occurring at once. However, the vm stats update only accesses per node information. It is only necessary to stagger the vm statistics updates per processor in each node. Vm counter updates occurring on distant nodes will not cause cacheline contention. We could implement an alternate approach that runs the first processor on each node at the second and then each of the other processor on a node on a subsequent tick. That may be useful to keep a large amount of the second free of timer activity. Maybe the timer folks will have some feedback on this one? [jirislaby@gmail.com: add missing break] Cc: Arjan van de Ven Signed-off-by: Christoph Lameter Signed-off-by: Jiri Slaby Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index acb1f105870c..d9325cf8a134 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -212,8 +212,6 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); void refresh_cpu_vm_stats(int); -void refresh_vm_stats(void); - #else /* CONFIG_SMP */ /* @@ -260,7 +258,6 @@ static inline void __dec_zone_page_state(struct page *page, #define mod_zone_page_state __mod_zone_page_state static inline void refresh_cpu_vm_stats(int cpu) { } -static inline void refresh_vm_stats(void) { } #endif #endif /* _LINUX_VMSTAT_H */ -- cgit v1.2.3 From 4037d452202e34214e8a939fa5621b2b3bbb45b7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2007 02:35:14 -0700 Subject: Move remote node draining out of slab allocators Currently the slab allocators contain callbacks into the page allocator to perform the draining of pagesets on remote nodes. This requires SLUB to have a whole subsystem in order to be compatible with SLAB. Moving node draining out of the slab allocators avoids a section of code in SLUB. Move the node draining so that is is done when the vm statistics are updated. At that point we are already touching all the cachelines with the pagesets of a processor. Add a expire counter there. If we have to update per zone or global vm statistics then assume that the pageset will require subsequent draining. The expire counter will be decremented on each vm stats update pass until it reaches zero. Then we will drain one batch from the pageset. The draining will cause vm counter updates which will then cause another expiration until the pcp is empty. So we will drain a batch every 3 seconds. Note that remote node draining is a somewhat esoteric feature that is required on large NUMA systems because otherwise significant portions of system memory can become trapped in pcp queues. The number of pcp is determined by the number of processors and nodes in a system. A system with 4 processors and 2 nodes has 8 pcps which is okay. But a system with 1024 processors and 512 nodes has 512k pcps with a high potential for large amount of memory being caught in them. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 +----- include/linux/mmzone.h | 3 +++ 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a36c3d96e2..0d2ef0b082a6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -176,10 +176,6 @@ extern void FASTCALL(free_cold_page(struct page *page)); #define free_page(addr) free_pages((addr),0) void page_alloc_init(void); -#ifdef CONFIG_NUMA -void drain_node_pages(int node); -#else -static inline void drain_node_pages(int node) { }; -#endif +void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); #endif /* __LINUX_GFP_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2f1544e83042..d09b1345a3a1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -83,6 +83,9 @@ struct per_cpu_pages { struct per_cpu_pageset { struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ +#ifdef CONFIG_NUMA + s8 expire; +#endif #ifdef CONFIG_SMP s8 stat_threshold; s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; -- cgit v1.2.3 From b52f52a093bb1e841e014c2087b5bee7162da413 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 May 2007 02:35:15 -0700 Subject: clocksource: fix resume logic We need to make sure that the clocksources are resumed, when timekeeping is resumed. The current resume logic does not guarantee this. Add a resume function pointer to the clocksource struct, so clocksource drivers which need to reinitialize the clocksource can provide a resume function. Add a resume function, which calls the maybe available clocksource resume functions and resets the watchdog function, so a stable TSC can be used accross suspend/resume. Signed-off-by: Thomas Gleixner Cc: john stultz Cc: Andi Kleen Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clocksource.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 2665ca04cf8f..bf297b03a4e4 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -49,6 +49,7 @@ struct clocksource; * @shift: cycle to nanosecond divisor (power of two) * @flags: flags describing special properties * @vread: vsyscall based read + * @resume: resume function for the clocksource, if necessary * @cycle_interval: Used internally by timekeeping core, please ignore. * @xtime_interval: Used internally by timekeeping core, please ignore. */ @@ -65,6 +66,7 @@ struct clocksource { u32 shift; unsigned long flags; cycle_t (*vread)(void); + void (*resume)(void); /* timekeeping specific data, ignore */ cycle_t cycle_interval; @@ -209,6 +211,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c, extern int clocksource_register(struct clocksource*); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); +extern void clocksource_resume(void); #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); -- cgit v1.2.3 From e61a1c1c4f240cec61300c8f27518c3e47570fd4 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Wed, 9 May 2007 02:35:15 -0700 Subject: Allow arch to initialize arch field of the module structure This will later allow an arch to add module specific information via linker generated tables instead of poking directly in the module object structure. Signed-off-by: Roman Zippel Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 6d3dc9c4ff96..792d483c9af7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -356,6 +356,9 @@ struct module keeping pointers to this stuff */ char *args; }; +#ifndef MODULE_ARCH_INIT +#define MODULE_ARCH_INIT {} +#endif /* FIXME: It'd be nice to isolate modules during init, too, so they aren't used before they (may) fail. But presently too much code -- cgit v1.2.3 From f7e4217b007d1f73e7e3cf10ba4fea4a608c603f Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Wed, 9 May 2007 02:35:17 -0700 Subject: rename thread_info to stack This finally renames the thread_info field in task structure to stack, so that the assumptions about this field are gone and archs have more freedom about placing the thread_info structure. Nonbroken archs which have a proper thread pointer can do the access to both current thread and task structure via a single pointer. It'll allow for a few more cleanups of the fork code, from which e.g. ia64 could benefit. Signed-off-by: Roman Zippel [akpm@linux-foundation.org: build fix] Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Russell King Cc: Ian Molton Cc: Haavard Skinnemoen Cc: Mikael Starvik Cc: David Howells Cc: Yoshinori Sato Cc: "Luck, Tony" Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: Greg Ungerer Cc: Ralf Baechle Cc: Ralf Baechle Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Paul Mundt Cc: Kazumoto Kojima Cc: Richard Curnow Cc: William Lee Irwin III Cc: "David S. Miller" Cc: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Cc: Miles Bader Cc: Andi Kleen Cc: Chris Zankel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 2 +- include/linux/sched.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 795102309bf1..45170b2fa253 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -95,7 +95,7 @@ extern struct group_info init_groups; #define INIT_TASK(tsk) \ { \ .state = 0, \ - .thread_info = &init_thread_info, \ + .stack = &init_thread_info, \ .usage = ATOMIC_INIT(2), \ .flags = 0, \ .lock_depth = -1, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 28000b1658f9..17b72d88c4cb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -817,7 +817,7 @@ struct prio_array; struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - struct thread_info *thread_info; + void *stack; atomic_t usage; unsigned int flags; /* per process flags, defined below */ unsigned int ptrace; @@ -1513,8 +1513,8 @@ static inline void unlock_task_sighand(struct task_struct *tsk, #ifndef __HAVE_THREAD_FUNCTIONS -#define task_thread_info(task) (task)->thread_info -#define task_stack_page(task) ((void*)((task)->thread_info)) +#define task_thread_info(task) ((struct thread_info *)(task)->stack) +#define task_stack_page(task) ((task)->stack) static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) { @@ -1524,7 +1524,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct static inline unsigned long *end_of_stack(struct task_struct *p) { - return (unsigned long *)(p->thread_info + 1); + return (unsigned long *)(task_thread_info(p) + 1); } #endif -- cgit v1.2.3 From 0d7ebbbc6eaa5539f78ab20ed6ff1725a4e332ef Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 9 May 2007 02:35:27 -0700 Subject: compiler: introduce __used and __maybe_unused __used is defined to be __attribute__((unused)) for all pre-3.3 gcc compilers to suppress warnings for unused functions because perhaps they are referenced only in inline assembly. It is defined to be __attribute__((used)) for gcc 3.3 and later so that the code is still emitted for such functions. __maybe_unused is defined to be __attribute__((unused)) for both function and variable use if it could possibly be unreferenced due to the evaluation of preprocessor macros. Function prototypes shall be marked with __maybe_unused if the actual definition of the function is dependant on preprocessor macros. No update to compiler-intel.h is necessary because ICC supports both __attribute__((used)) and __attribute__((unused)) as specified by the gcc manual. __attribute_used__ is deprecated and will be removed once all current code is converted to using __used. Cc: Rusty Russell Cc: Adrian Bunk Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 1 + include/linux/compiler-gcc3.h | 6 ++++-- include/linux/compiler-gcc4.h | 3 ++- include/linux/compiler.h | 21 ++++++++++++++++++--- 4 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index a9f794716a81..03ec2311fb29 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -40,3 +40,4 @@ #define noinline __attribute__((noinline)) #define __attribute_pure__ __attribute__((pure)) #define __attribute_const__ __attribute__((__const__)) +#define __maybe_unused __attribute__((unused)) diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h index ecd621fd27d2..a9e2863c2dbf 100644 --- a/include/linux/compiler-gcc3.h +++ b/include/linux/compiler-gcc3.h @@ -4,9 +4,11 @@ #include #if __GNUC_MINOR__ >= 3 -# define __attribute_used__ __attribute__((__used__)) +# define __used __attribute__((__used__)) +# define __attribute_used__ __used /* deprecated */ #else -# define __attribute_used__ __attribute__((__unused__)) +# define __used __attribute__((__unused__)) +# define __attribute_used__ __used /* deprecated */ #endif #if __GNUC_MINOR__ >= 4 diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index fd0cc7c4a636..a03e9398a6c2 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -12,7 +12,8 @@ # define __inline __inline __attribute__((always_inline)) #endif -#define __attribute_used__ __attribute__((__used__)) +#define __used __attribute__((__used__)) +#define __attribute_used__ __used /* deprecated */ #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) #define __always_inline inline __attribute__((always_inline)) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 3b6949b41745..498c35920762 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -108,15 +108,30 @@ extern void __chk_io_ptr(const void __iomem *); * Allow us to avoid 'defined but not used' warnings on functions and data, * as well as force them to be emitted to the assembly file. * - * As of gcc 3.3, static functions that are not marked with attribute((used)) - * may be elided from the assembly file. As of gcc 3.3, static data not so + * As of gcc 3.4, static functions that are not marked with attribute((used)) + * may be elided from the assembly file. As of gcc 3.4, static data not so * marked will not be elided, but this may change in a future gcc version. * + * NOTE: Because distributions shipped with a backported unit-at-a-time + * compiler in gcc 3.3, we must define __used to be __attribute__((used)) + * for gcc >=3.3 instead of 3.4. + * * In prior versions of gcc, such functions and data would be emitted, but * would be warned about except with attribute((unused)). + * + * Mark functions that are referenced only in inline assembly as __used so + * the code is emitted even though it appears to be unreferenced. */ #ifndef __attribute_used__ -# define __attribute_used__ /* unimplemented */ +# define __attribute_used__ /* deprecated */ +#endif + +#ifndef __used +# define __used /* unimplemented */ +#endif + +#ifndef __maybe_unused +# define __maybe_unused /* unimplemented */ #endif /* -- cgit v1.2.3 From 5a87ede94595f58934000e26e8b13398e63868b5 Mon Sep 17 00:00:00 2001 From: "Antonino A. Daplas" Date: Wed, 9 May 2007 02:35:32 -0700 Subject: svgalib: move fb_get_caps to svgalib Move fb_get_caps() method to svgalib.c as svga_get_caps() so it can be used by s3fb, arkfb and vt8623fb. Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/svga.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/svga.h b/include/linux/svga.h index e1cc552e04fe..13ad0b82ac28 100644 --- a/include/linux/svga.h +++ b/include/linux/svga.h @@ -113,6 +113,8 @@ void svga_tilefill(struct fb_info *info, struct fb_tilerect *rect); void svga_tileblit(struct fb_info *info, struct fb_tileblit *blit); void svga_tilecursor(struct fb_info *info, struct fb_tilecursor *cursor); int svga_get_tilemax(struct fb_info *info); +void svga_get_caps(struct fb_info *info, struct fb_blit_caps *caps, + struct fb_var_screeninfo *var); int svga_compute_pll(const struct svga_pll *pll, u32 f_wanted, u16 *m, u16 *n, u16 *r, int node); int svga_check_timings(const struct svga_timing_regs *tm, struct fb_var_screeninfo *var, int node); -- cgit v1.2.3 From 880169dd2edc4297b7811a0542be9766ca6945bc Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Wed, 9 May 2007 02:35:33 -0700 Subject: fbdev: add support for AVR32 Provide framebuffer page protection flags and definitions of fb_readl/fb_writel for AVR32. Signed-off-by: Haavard Skinnemoen Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index dff7a728948c..c654d0e9ce33 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -868,7 +868,7 @@ struct fb_info { #define fb_writeq sbus_writeq #define fb_memset sbus_memset_io -#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__) +#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__) || defined(__avr32__) #define fb_readb __raw_readb #define fb_readw __raw_readw -- cgit v1.2.3 From 5b479c91da90eef605f851508744bfe8269591a0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 May 2007 02:35:39 -0700 Subject: md: improve partition detection in md array md currently uses ->media_changed to make sure rescan_partitions is call on md array after they are assembled. However that doesn't happen until the array is opened, which is later than some people would like. So use blkdev_ioctl to do the rescan immediately that the array has been assembled. This means we can remove all the ->change infrastructure as it was only used to trigger a partition rescan. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index de72c49747c8..a121f36f4437 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -201,7 +201,6 @@ struct mddev_s struct mutex reconfig_mutex; atomic_t active; - int changed; /* true if we might need to reread partition info */ int degraded; /* whether md should consider * adding a spare */ -- cgit v1.2.3 From 18137207236285989dfc0ee7f929b954199228f3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:07 +0200 Subject: ide: fix UDMA/MWDMA/SWDMA masks (v3) * use 0x00 instead of 0x80 to disable ->{ultra,mwdma,swdma}_mask * add udma_mask field to ide_pci_device_t and use it to initialize ->ultra_mask in aec62xx, cmd64x, pdc202xx_{new,old} and piix drivers * fix UDMA masks to match with chipset specific *_ratemask() (alim15x3, hpt366, serverworks and siimage drivers need UDMA mask filtering method - done in the next patch) v2: * piix: fix cable detection for 82801AA_1 and 82372FB_1 [ Noticed by Sergei Shtylyov . ] * cmd64x: use hwif->cds->udma_mask [ Suggested by Sergei Shtylyov . ] * aec62xx: fix newly introduced bug - check DMA status not command register [ Noticed by Sergei Shtylyov . ] v3: * piix: use hwif->cds->udma_mask [ Suggested by Sergei Shtylyov . ] Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 418dfb5adadd..c9375c863584 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1257,6 +1257,7 @@ typedef struct ide_pci_device_s { unsigned int extra; struct ide_pci_device_s *next; u8 flags; + u8 udma_mask; } ide_pci_device_t; extern int ide_setup_pci_device(struct pci_dev *, ide_pci_device_t *); -- cgit v1.2.3 From 2d5eaa6dd744a641e75503232a01f52d0768884c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:08 +0200 Subject: ide: rework the code for selecting the best DMA transfer mode (v3) Depends on the "ide: fix UDMA/MWDMA/SWDMA masks" patch. * add ide_hwif_t.udma_filter hook for filtering UDMA mask (use it in alim15x3, hpt366, siimage and serverworks drivers) * add ide_max_dma_mode() for finding best DMA mode for the device (loosely based on some older libata-core.c code) * convert ide_dma_speed() users to use ide_max_dma_mode() * make ide_rate_filter() take "ide_drive_t *drive" as an argument instead of "u8 mode" and teach it to how to use UDMA mask to do filtering * use ide_rate_filter() in hpt366 driver * remove no longer needed ide_dma_speed() and *_ratemask() * unexport eighty_ninty_three() v2: * rename ->filter_udma_mask to ->udma_filter [ Suggested by Sergei Shtylyov . ] v3: * updated for scc_pata driver (fixes XFER_UDMA_6 filtering for user-space originated transfer mode change requests when 100MHz clock is used) Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index c9375c863584..23ab4dc05009 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -717,11 +717,8 @@ typedef struct hwif_s { int (*quirkproc)(ide_drive_t *); /* driver soft-power interface */ int (*busproc)(ide_drive_t *, int); -// /* host rate limiter */ -// u8 (*ratemask)(ide_drive_t *); -// /* device rate limiter */ -// u8 (*ratefilter)(ide_drive_t *, u8); #endif + u8 (*udma_filter)(ide_drive_t *); void (*ata_input_data)(ide_drive_t *, void *, u32); void (*ata_output_data)(ide_drive_t *, void *, u32); @@ -1279,6 +1276,7 @@ int ide_in_drive_list(struct hd_driveid *, const struct drive_list_entry *); int __ide_dma_bad_drive(ide_drive_t *); int __ide_dma_good_drive(ide_drive_t *); int ide_use_dma(ide_drive_t *); +u8 ide_max_dma_mode(ide_drive_t *); void ide_dma_off(ide_drive_t *); void ide_dma_verbose(ide_drive_t *); int ide_set_dma(ide_drive_t *); @@ -1305,6 +1303,7 @@ extern int __ide_dma_timeout(ide_drive_t *); #else static inline int ide_use_dma(ide_drive_t *drive) { return 0; } +static inline u8 ide_max_dma_mode(ide_drive_t *drive) { return 0; } static inline void ide_dma_off(ide_drive_t *drive) { ; } static inline void ide_dma_verbose(ide_drive_t *drive) { ; } static inline int ide_set_dma(ide_drive_t *drive) { return 1; } @@ -1349,8 +1348,7 @@ static inline void ide_set_hwifdata (ide_hwif_t * hwif, void *data) } /* ide-lib.c */ -extern u8 ide_dma_speed(ide_drive_t *drive, u8 mode); -extern u8 ide_rate_filter(u8 mode, u8 speed); +u8 ide_rate_filter(ide_drive_t *, u8); extern int ide_dma_enable(ide_drive_t *drive); extern char *ide_xfer_verbose(u8 xfer_rate); extern void ide_toggle_bounce(ide_drive_t *drive, int on); -- cgit v1.2.3 From 29e744d088e3555f4efbdf390f01088dd66993b6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:09 +0200 Subject: ide: add ide_tune_dma() helper After reworking the code responsible for selecting the best DMA transfer mode it is now possible to add generic ide_tune_dma() helper. Convert some IDE PCI host drivers to use it (the ones left need more work). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 23ab4dc05009..d03fa2d5d75a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1277,6 +1277,7 @@ int __ide_dma_bad_drive(ide_drive_t *); int __ide_dma_good_drive(ide_drive_t *); int ide_use_dma(ide_drive_t *); u8 ide_max_dma_mode(ide_drive_t *); +int ide_tune_dma(ide_drive_t *); void ide_dma_off(ide_drive_t *); void ide_dma_verbose(ide_drive_t *); int ide_set_dma(ide_drive_t *); @@ -1304,6 +1305,7 @@ extern int __ide_dma_timeout(ide_drive_t *); #else static inline int ide_use_dma(ide_drive_t *drive) { return 0; } static inline u8 ide_max_dma_mode(ide_drive_t *drive) { return 0; } +static inline int ide_tune_dma(ide_drive_t *drive) { return 0; } static inline void ide_dma_off(ide_drive_t *drive) { ; } static inline void ide_dma_verbose(ide_drive_t *drive) { ; } static inline int ide_set_dma(ide_drive_t *drive) { return 1; } -- cgit v1.2.3 From ecfd80e4a514123070b4cfb674b817ba75055df2 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:09 +0200 Subject: ide: make /proc/ide/ optional All important information/features should be already available through sysfs and ioctl interfaces. Add CONFIG_IDE_PROC_FS (CONFIG_SCSI_PROC_FS rip-off) config option, disabling it makes IDE driver ~5 kB smaller (on x86-32). While at it add CONFIG_PROC_FS=n versions of proc_ide_{create,destroy}() and remove no longer needed #ifdefs. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index d03fa2d5d75a..697c39dd66a1 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -912,15 +912,15 @@ typedef struct { write_proc_t *write_proc; } ide_proc_entry_t; -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_IDE_PROC_FS extern struct proc_dir_entry *proc_ide_root; -extern void proc_ide_create(void); -extern void proc_ide_destroy(void); -extern void create_proc_ide_interfaces(void); +void proc_ide_create(void); +void proc_ide_destroy(void); +void create_proc_ide_interfaces(void); void destroy_proc_ide_interface(ide_hwif_t *); -extern void ide_add_proc_entries(struct proc_dir_entry *, ide_proc_entry_t *, void *); -extern void ide_remove_proc_entries(struct proc_dir_entry *, ide_proc_entry_t *); +void ide_add_proc_entries(struct proc_dir_entry *, ide_proc_entry_t *, void *); +void ide_remove_proc_entries(struct proc_dir_entry *, ide_proc_entry_t *); read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; @@ -944,6 +944,8 @@ void ide_pci_create_host_proc(const char *, get_info_t *); return len; \ } #else +static inline void proc_ide_create(void) { ; } +static inline void proc_ide_destroy(void) { ; } static inline void create_proc_ide_interfaces(void) { ; } static inline void destroy_proc_ide_interface(ide_hwif_t *hwif) { ; } #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; -- cgit v1.2.3 From 1497943ee692aa7519fa972d0e3a339649bf3a96 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:10 +0200 Subject: ide: split off ioctl handling from IDE settings (v2) * do write permission and min/max checks in ide_procset_t functions * ide-disk.c: drive->id is always available so cleanup "multcount" setting accordingly * ide-disk.c: "address" setting was incorrectly defined as type TYPE_INTA, fix it by using type TYPE_BYTE and updating ide_drive_t->adressing field, the bug didn't trigger because this IDE setting uses custom ->set function * ide.c: add set_ksettings() for handling HDIO_SET_KEEPSETTINGS ioctl * ide.c: add set_unmaskirq() for handling HDIO_SET_UNMASKINTR ioctl * handle ioctls directly in generic_ide_ioclt() and idedisk_ioctl() instead of using IDE settings to deal with them * remove no longer needed ide_find_setting_by_ioctl() and {read,write}_ioctl fields from ide_settings_t, also remove now unused TYPE_INTA handling v2: * add missing EXPORT_SYMBOL_GPL(ide_setting_sem) needed now for ide-disk Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 697c39dd66a1..591a0b55e31c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -601,16 +601,11 @@ typedef struct ide_drive_s { unsigned remap_0_to_1 : 1; /* 0=noremap, 1=remap 0->1 (for EZDrive) */ unsigned blocked : 1; /* 1=powermanagment told us not to do anything, so sleep nicely */ unsigned vdma : 1; /* 1=doing PIO over DMA 0=doing normal DMA */ - unsigned addressing; /* : 3; - * 0=28-bit - * 1=48-bit - * 2=48-bit doing 28-bit - * 3=64-bit - */ unsigned scsi : 1; /* 0=default, 1=ide-scsi emulation */ unsigned sleeping : 1; /* 1=sleeping & sleep field valid */ unsigned post_reset : 1; + u8 addressing; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */ u8 quirk_list; /* considered quirky, set for a specific host */ u8 init_speed; /* transfer rate set at boot */ u8 current_speed; /* current transfer rate set */ @@ -870,9 +865,8 @@ typedef struct hwgroup_s { */ #define TYPE_INT 0 -#define TYPE_INTA 1 -#define TYPE_BYTE 2 -#define TYPE_SHORT 3 +#define TYPE_BYTE 1 +#define TYPE_SHORT 2 #define SETTING_READ (1 << 0) #define SETTING_WRITE (1 << 1) @@ -882,8 +876,6 @@ typedef int (ide_procset_t)(ide_drive_t *, int); typedef struct ide_settings_s { char *name; int rw; - int read_ioctl; - int write_ioctl; int data_type; int min; int max; @@ -896,7 +888,7 @@ typedef struct ide_settings_s { } ide_settings_t; extern struct semaphore ide_setting_sem; -extern int ide_add_setting(ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set); +int ide_add_setting(ide_drive_t *, const char *, int, int, int, int, int, int, void *, ide_procset_t *set); extern ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name); extern int ide_read_setting(ide_drive_t *t, ide_settings_t *setting); extern int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val); -- cgit v1.2.3 From 7662d046df09e80680b77b68de896beab45e675e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:10 +0200 Subject: ide: move IDE settings handling to ide-proc.c * move __ide_add_setting() ide_add_setting() __ide_remove_setting() auto_remove_settings() ide_find_setting_by_name() ide_read_setting() ide_write_setting() set_xfer_rate() ide_add_generic_settings() ide_register_subdriver() ide_unregister_subdriver() from ide.c to ide-proc.c * set_{io_32bit,pio_mode,using_dma}() cannot be marked static now, fix it * rename ide_[un]register_subdriver() to ide_proc_[un]register_driver(), update device drivers to use new names * add CONFIG_IDE_PROC_FS=n versions of ide_proc_[un]register_driver() and ide_add_generic_settings() * make ide_find_setting_by_name(), ide_{read,write}_setting() and ide_{add,remove}_proc_entries() static * cover IDE settings code in device drivers with CONFIG_IDE_PROC_FS #ifdef, also while at it cover with CONFIG_IDE_PROC_FS #ifdef ide_driver_t.proc * remove bogus comment from ide.h * cover with CONFIG_IDE_PROC_FS #ifdef .proc and .settings in ide_drive_t Besides saner code this patch results in the IDE core smaller by ~2 kB (on x86-32) and IDE disk driver by ~1 kB (ditto) when CONFIG_IDE_PROC_FS=n. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 591a0b55e31c..477b8c6be727 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -559,9 +559,10 @@ typedef struct ide_drive_s { struct ide_drive_s *next; /* circular list of hwgroup drives */ void *driver_data; /* extra driver data */ struct hd_driveid *id; /* drive model identification info */ +#ifdef CONFIG_IDE_PROC_FS struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ struct ide_settings_s *settings;/* /proc/ide/ drive settings */ - +#endif struct hwif_s *hwif; /* actually (ide_hwif_t *) */ unsigned long sleep; /* sleep until this time */ @@ -858,8 +859,15 @@ typedef struct hwgroup_s { unsigned char cmd_buf[4]; } ide_hwgroup_t; -/* structure attached to the request for IDE_TASK_CMDS */ +typedef struct ide_driver_s ide_driver_t; + +extern struct semaphore ide_setting_sem; +int set_io_32bit(ide_drive_t *, int); +int set_pio_mode(ide_drive_t *, int); +int set_using_dma(ide_drive_t *, int); + +#ifdef CONFIG_IDE_PROC_FS /* * configurable drive settings */ @@ -887,12 +895,7 @@ typedef struct ide_settings_s { struct ide_settings_s *next; } ide_settings_t; -extern struct semaphore ide_setting_sem; int ide_add_setting(ide_drive_t *, const char *, int, int, int, int, int, int, void *, ide_procset_t *set); -extern ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name); -extern int ide_read_setting(ide_drive_t *t, ide_settings_t *setting); -extern int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val); -extern void ide_add_generic_settings(ide_drive_t *drive); /* * /proc/ide interface @@ -904,15 +907,17 @@ typedef struct { write_proc_t *write_proc; } ide_proc_entry_t; -#ifdef CONFIG_IDE_PROC_FS extern struct proc_dir_entry *proc_ide_root; void proc_ide_create(void); void proc_ide_destroy(void); void create_proc_ide_interfaces(void); void destroy_proc_ide_interface(ide_hwif_t *); -void ide_add_proc_entries(struct proc_dir_entry *, ide_proc_entry_t *, void *); -void ide_remove_proc_entries(struct proc_dir_entry *, ide_proc_entry_t *); +void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); +void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); + +void ide_add_generic_settings(ide_drive_t *); + read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; @@ -940,6 +945,9 @@ static inline void proc_ide_create(void) { ; } static inline void proc_ide_destroy(void) { ; } static inline void create_proc_ide_interfaces(void) { ; } static inline void destroy_proc_ide_interface(ide_hwif_t *hwif) { ; } +static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } +static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } +static inline void ide_add_generic_settings(ide_drive_t *drive) { ; } #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif @@ -982,7 +990,7 @@ enum { * The gendriver.owner field should be set to the module owner of this driver. * The gendriver.name field should be set to the name of this driver */ -typedef struct ide_driver_s { +struct ide_driver_s { const char *version; u8 media; unsigned supports_dsc_overlap : 1; @@ -990,12 +998,14 @@ typedef struct ide_driver_s { int (*end_request)(ide_drive_t *, int, int); ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); ide_startstop_t (*abort)(ide_drive_t *, struct request *rq); - ide_proc_entry_t *proc; struct device_driver gen_driver; int (*probe)(ide_drive_t *); void (*remove)(ide_drive_t *); void (*shutdown)(ide_drive_t *); -} ide_driver_t; +#ifdef CONFIG_IDE_PROC_FS + ide_proc_entry_t *proc; +#endif +}; #define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver) @@ -1205,9 +1215,6 @@ extern void default_hwif_iops(ide_hwif_t *); extern void default_hwif_mmiops(ide_hwif_t *); extern void default_hwif_transport(ide_hwif_t *); -void ide_register_subdriver(ide_drive_t *, ide_driver_t *); -void ide_unregister_subdriver(ide_drive_t *, ide_driver_t *); - #define ON_BOARD 1 #define NEVER_BOARD 0 -- cgit v1.2.3 From 7f8f48af0861c38c28d4abd550102643e0ea9e6a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:10 +0200 Subject: ide: cable detection fixes (take 2) Tejun's recent eighty_ninty_three() fix has inspired me to do more thorough review of the cable detection code... * print user-friendly warning about limiting the maximum transfer speed to UDMA33 (and the reason behind it) when 80-wire cable is not detected, also while at it cleanup eighty_ninty_three() a bit * use eighty_ninty_three() in ide_ata66_check(), this actually fixes 3 bugs: - bit 14 (word 93 validity check) == 1 && bit 13 (80-wire cable test) == 1 were used as 80-wire cable present test for CONFIG_IDEDMA_IVB=n case (please see FIXME comment in eighty_ninty_three() for more details) - CONFIG_IDEDMA_IVB=y/n cases were interchanged - check for SATA devices was missing * remove private cable warnings from pdc_202xx{old,new} drivers now that core code provides this functionality (plus, in pdc202xx_new case the test could give false warnings for ATAPI devices because pdc202xx_new driver doesn't even support ATAPI DMA) Cc: Tejun Heo Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 477b8c6be727..ca924b295c2e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -605,6 +605,7 @@ typedef struct ide_drive_s { unsigned scsi : 1; /* 0=default, 1=ide-scsi emulation */ unsigned sleeping : 1; /* 1=sleeping & sleep field valid */ unsigned post_reset : 1; + unsigned udma33_warned : 1; u8 addressing; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */ u8 quirk_list; /* considered quirky, set for a specific host */ -- cgit v1.2.3 From 869c56ee9de1b72cd3f8ab9cdfbd3601e55c61f2 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:10 +0200 Subject: ide: add "initializing" argument to ide_register_hw() Add "initializing" argument to ide_register_hw() and use it instead of ide.c wide variable of the same name. Update all users of ide_register_hw() accordingly. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ca924b295c2e..bdb97655ef61 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -223,8 +223,9 @@ typedef struct hw_regs_s { /* * Register new hardware with ide */ -int ide_register_hw(hw_regs_t *hw, struct hwif_s **hwifp); -int ide_register_hw_with_fixup(hw_regs_t *, struct hwif_s **, void (*)(struct hwif_s *)); +int ide_register_hw(hw_regs_t *, int, struct hwif_s **); +int ide_register_hw_with_fixup(hw_regs_t *, int, struct hwif_s **, + void (*)(struct hwif_s *)); /* * Set up hw_regs_t structure before calling ide_register_hw (optional) -- cgit v1.2.3 From 5cbf79cdb37be2aa2a1b4fa94144526b14557060 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:11 +0200 Subject: ide: add ide_proc_register_port() * create_proc_ide_interfaces() tries to add /proc entries for every probed and initialized IDE port, replace it by ide_proc_register_port() which does it only for the given port (also rename destroy_proc_ide_interface() to ide_proc_unregister_port() for consistency) * convert {create,destroy}_proc_ide_interface[s]() users to use new functions * pmac driver depended on proc_ide_create() to add /proc port entries, fix it * au1xxx-ide, swarm and cs5520 drivers depended indirectly on ide-generic driver (CONFIG_IDE_GENERIC=y) to add port /proc entries, fix them * there is now no need to add /proc entries for IDE ports in proc_ide_create() so don't do it * proc_ide_create() needs now to be called before drivers are probed - fix it, while at it make proc_ide_create() create /proc "ide" directory Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index bdb97655ef61..52d482a16dd9 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -909,12 +909,10 @@ typedef struct { write_proc_t *write_proc; } ide_proc_entry_t; -extern struct proc_dir_entry *proc_ide_root; - void proc_ide_create(void); void proc_ide_destroy(void); -void create_proc_ide_interfaces(void); -void destroy_proc_ide_interface(ide_hwif_t *); +void ide_proc_register_port(ide_hwif_t *); +void ide_proc_unregister_port(ide_hwif_t *); void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); @@ -945,8 +943,8 @@ void ide_pci_create_host_proc(const char *, get_info_t *); #else static inline void proc_ide_create(void) { ; } static inline void proc_ide_destroy(void) { ; } -static inline void create_proc_ide_interfaces(void) { ; } -static inline void destroy_proc_ide_interface(ide_hwif_t *hwif) { ; } +static inline void ide_proc_register_port(ide_hwif_t *hwif) { ; } +static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } static inline void ide_add_generic_settings(ide_drive_t *drive) { ; } -- cgit v1.2.3 From 6d208b39c45edee5def6c201fcd51561c5a39828 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 10 May 2007 00:01:11 +0200 Subject: ide: legacy PCI bus order probing fixes IDE PCI host drivers should register themselves with IDE core only when IDE driver is built-in, otherwise (IDE driver is modular and thus IDE PCI host drivers are also modular) the code has no effect and just complicates the probing. Fix it by adding new config option CONFIG_IDEPCI_PCIBUS (defined only when needed and invisible to the user) and covering by #ifdef/#endif the code in question. It turned out that "ide=reverse" was silently accepted but did nothing in case when IDE driver was modular, this is fixed now. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 52d482a16dd9..df4e6a510310 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1205,9 +1205,14 @@ void ide_init_disk(struct gendisk *, ide_drive_t *); extern int ideprobe_init(void); +#ifdef CONFIG_IDEPCI_PCIBUS_ORDER extern void ide_scan_pcibus(int scan_direction) __init; extern int __ide_pci_register_driver(struct pci_driver *driver, struct module *owner, const char *mod_name); #define ide_pci_register_driver(d) __ide_pci_register_driver(d, THIS_MODULE, KBUILD_MODNAME) +#else +#define ide_pci_register_driver(d) pci_register_driver(d) +#endif + void ide_pci_setup_ports(struct pci_dev *, struct ide_pci_device_s *, int, ata_index_t *); extern void ide_setup_pci_noise (struct pci_dev *dev, struct ide_pci_device_s *d); -- cgit v1.2.3 From 44ce6294d07555c3d313757105fd44b78208407f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 9 May 2007 18:51:36 -0700 Subject: Revert "md: improve partition detection in md array" This reverts commit 5b479c91da90eef605f851508744bfe8269591a0. Quoth Neil Brown: "It causes an oops when auto-detecting raid arrays, and it doesn't seem easy to fix. The array may not be 'open' when do_md_run is called, so bdev->bd_disk might be NULL, so bd_set_size can oops. This whole approach of opening an md device before it has been assembled just seems to get more and more painful. I think I'm going to have to come up with something clever to provide both backward comparability with usage expectation, and sane integration into the rest of the kernel." Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index a121f36f4437..de72c49747c8 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -201,6 +201,7 @@ struct mddev_s struct mutex reconfig_mutex; atomic_t active; + int changed; /* true if we might need to reread partition info */ int degraded; /* whether md should consider * adding a spare */ -- cgit v1.2.3